diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index cbd3bd7bec4..c52a58eac8a 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -683,3 +683,4 @@ jobs:
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 finish_check.py
+          python3 merge_pr.py
diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml
index a513eb9216d..d69020d810e 100644
--- a/.github/workflows/docs_check.yml
+++ b/.github/workflows/docs_check.yml
@@ -169,3 +169,4 @@ jobs:
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 finish_check.py
+          python3 merge_pr.py --check-approved
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index aecf3799a5d..c677ec4bf5c 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -4388,3 +4388,4 @@ jobs:
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 finish_check.py
+          python3 merge_pr.py --check-approved
diff --git a/contrib/poco b/contrib/poco
index 79923422618..0ab9bba7cca 160000
--- a/contrib/poco
+++ b/contrib/poco
@@ -1 +1 @@
-Subproject commit 799234226187c0ae0b8c90f23465b25ed7956e56
+Subproject commit 0ab9bba7ccad3c8dacce04a35cb3b78218547ab4
diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index 2582b599d58..3458cf905da 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -5,6 +5,7 @@ set -x
 
 # core.COMM.PID-TID
 sysctl kernel.core_pattern='core.%e.%p-%P'
+dmesg --clear ||:
 
 set -e
 set -u
@@ -368,6 +369,7 @@ if [ -f core.zst ]; then
 fi
 
 rg --text -F '<Fatal>' server.log > fatal.log ||:
+dmesg -T > dmesg.log ||:
 
 zstd --threads=0 server.log
 
@@ -396,6 +398,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s
   <a href="fuzzer.log">fuzzer.log</a>
   <a href="server.log.zst">server.log.zst</a>
   <a href="main.log">main.log</a>
+  <a href="dmesg.log">dmesg.log</a>
   ${CORE_LINK}
 </p>
 <table>
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 2165045e565..ee4b5d7c156 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -128,6 +128,7 @@ function run_tests()
 
     if [[ "${HIGH_LEVEL_COVERAGE}" = "YES" ]]; then
         ADDITIONAL_OPTIONS+=('--report-coverage')
+        ADDITIONAL_OPTIONS+=('--report-logs-stats')
     fi
 
     set +e
diff --git a/docker/test/stress/stress b/docker/test/stress/stress
index cf92b86c18f..4afd2745526 100755
--- a/docker/test/stress/stress
+++ b/docker/test/stress/stress
@@ -289,6 +289,7 @@ if __name__ == "__main__":
                 "--database=system",
                 "--hung-check",
                 "--stress",
+                "--report-logs-stats",
                 "00001_select_1",
             ]
         )
diff --git a/docs/en/engines/database-engines/postgresql.md b/docs/en/engines/database-engines/postgresql.md
index ac19794c167..939995a61c5 100644
--- a/docs/en/engines/database-engines/postgresql.md
+++ b/docs/en/engines/database-engines/postgresql.md
@@ -136,3 +136,7 @@ DESCRIBE TABLE test_database.test_table;
 │ data   │ Nullable(String)  │
 └────────┴───────────────────┘
 ```
+
+## Related content
+
+- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md
index 7f9659400b8..b73d28c8508 100644
--- a/docs/en/engines/table-engines/integrations/postgresql.md
+++ b/docs/en/engines/table-engines/integrations/postgresql.md
@@ -175,3 +175,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
 
 -   [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md)
 -   [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
+
+## Related content
+- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index e28c486afca..d384ed639eb 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -1203,12 +1203,14 @@ SELECT * FROM json_each_row_nested
 - [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`.
 - [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`.
 - [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`.
+- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
+- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
 - [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
 - [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
 - [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
 - [output_format_json_quote_decimals](/docs/en/operations/settings/settings.md/#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`.
 - [output_format_json_escape_forward_slashes](/docs/en/operations/settings/settings.md/#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`.
-- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `false`.
+- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `true`.
 - [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`.
 - [output_format_json_validate_utf8](/docs/en/operations/settings/settings.md/#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`.
 
diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index 376b7480358..7a6b2340d29 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -266,7 +266,7 @@ Default value: 0.
 
 Limits the size in bytes of the hash table used when joining tables.
 
-This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md).
+This setting applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md).
 
 If the query contains joins, ClickHouse checks this setting for every intermediate result.
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 4ffe2bbc7c4..9def33debbd 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -402,40 +402,62 @@ Default value: `ALL`.
 
 ## join_algorithm {#settings-join_algorithm}
 
-Specifies [JOIN](../../sql-reference/statements/select/join.md) algorithm.
+Specifies which [JOIN](../../sql-reference/statements/select/join.md) algorithm is used.
 
 Several algorithms can be specified, and an available one would be chosen for a particular query based on kind/strictness and table engine.
 
 Possible values:
 
-- `default` — `hash` or `direct`, if possible (same as `direct,hash`)
+### `default` 
 
-- `hash` — [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
+This is the equivalent of `hash` or `direct`, if possible (same as `direct,hash`)
 
-- `parallel_hash` - a variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process.
+### `grace_hash` 
+
+[Grace hash join](https://en.wikipedia.org/wiki/Hash_join#Grace_hash_join) is used.  Grace hash provides an algorithm option that provides performant complex joins while limiting memory use.
+
+The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.
+
+### `hash`
+
+[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
+
+### `parallel_hash` 
+
+A variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process.
 
 When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM.
 
-- `partial_merge` — a variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted.
+### `partial_merge` 
+
+A variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted.
 
 The `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported).
 
-When using `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks.
+When using the `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by the `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks.
 
-- `direct` - can be applied when the right storage supports key-value requests.
+### `direct` 
+
+This algorithm can be applied when the storage for the right table supports key-value requests.
 
 The `direct` algorithm performs a lookup in the right table using rows from the left table as keys. It's supported only by special storage such as [Dictionary](../../engines/table-engines/special/dictionary.md/#dictionary) or [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) and only the `LEFT` and `INNER` JOINs.
 
-- `auto` — try `hash` join and switch on the fly to another algorithm if the memory limit is violated.
+### `auto` 
 
-- `full_sorting_merge` — [Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining.
+When set to `auto`, `hash` join is tried first, and the algorithm is switched on the fly to another algorithm if the memory limit is violated.
 
-- `prefer_partial_merge` — ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`.
+### `full_sorting_merge` 
+
+[Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining.
+
+### `prefer_partial_merge` 
+
+ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`.
 
 
 ## join_any_take_last_row {#settings-join_any_take_last_row}
 
-Changes behaviour of join operations with `ANY` strictness.
+Changes the behaviour of join operations with `ANY` strictness.
 
 :::warning
 This setting applies only for `JOIN` operations with [Join](../../engines/table-engines/special/join.md) engine tables.
@@ -498,7 +520,7 @@ Default value: 65536.
 
 Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk.
 
-The bigger the value of the setting, the more RAM used and the less disk I/O needed.
+The bigger the value of the setting, the more RAM is used and the less disk I/O is needed.
 
 Possible values:
 
@@ -514,12 +536,12 @@ Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations.
 Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour.
 :::
 
-When the legacy behaviour enabled:
+When the legacy behaviour is enabled:
 
 -   Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping.
 -   Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do.
 
-When the legacy behaviour disabled:
+When the legacy behaviour is disabled:
 
 -   Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations.
 -   Results of `ANY INNER JOIN` operations contain one row per key from both the left and right tables.
@@ -572,7 +594,7 @@ Default value: `163840`.
 
 ## merge_tree_min_rows_for_concurrent_read_for_remote_filesystem {#merge-tree-min-rows-for-concurrent-read-for-remote-filesystem}
 
-The minimum number of lines to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
+The minimum number of lines to read from one file before the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
 
 Possible values:
 
@@ -706,7 +728,7 @@ log_queries=1
 
 ## log_queries_min_query_duration_ms {#settings-log-queries-min-query-duration-ms}
 
-If enabled (non-zero), queries faster then the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables:
+If enabled (non-zero), queries faster than the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables:
 
 - `system.query_log`
 - `system.query_thread_log`
@@ -741,7 +763,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
 
 Setting up query threads logging.
 
-Query threads log into [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting have effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.
+Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.
 
 Possible values:
 
@@ -760,7 +782,7 @@ log_query_threads=1
 
 Setting up query views logging.
 
-When a query run by ClickHouse with this setup on has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_views_log) server configuration parameter.
+When a query run by ClickHouse with this setting enabled has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_views_log) server configuration parameter.
 
 Example:
 
@@ -787,7 +809,7 @@ It can be used to improve the readability of server logs. Additionally, it helps
 
 Possible values:
 
--   Any string no longer than [max_query_size](#settings-max_query_size). If length is exceeded, the server throws an exception.
+-   Any string no longer than [max_query_size](#settings-max_query_size). If the max_query_size is exceeded, the server throws an exception.
 
 Default value: empty string.
 
@@ -821,11 +843,11 @@ The setting also does not have a purpose when using INSERT SELECT, since data is
 
 Default value: 1,048,576.
 
-The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM.
+The default is slightly more than `max_block_size`. The reason for this is that certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM.
 
 ## min_insert_block_size_rows {#min-insert-block-size-rows}
 
-Sets the minimum number of rows in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
+Sets the minimum number of rows in the block that can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
 
 Possible values:
 
@@ -891,7 +913,7 @@ Higher values will lead to higher memory usage.
 
 ## max_compress_block_size {#max-compress-block-size}
 
-The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
+The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying a smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
 
 :::warning
 This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse.
@@ -935,7 +957,7 @@ Default value: 1000.
 
 ## interactive_delay {#interactive-delay}
 
-The interval in microseconds for checking whether request execution has been cancelled and sending the progress.
+The interval in microseconds for checking whether request execution has been canceled and sending the progress.
 
 Default value: 100,000 (checks for cancelling and sends the progress ten times per second).
 
@@ -4122,7 +4144,20 @@ Enabled by default.
 
 Serialize named tuple columns as JSON objects.
 
-Disabled by default.
+Enabled by default.
+
+### input_format_json_named_tuples_as_objects {#input_format_json_named_tuples_as_objects}
+
+Parse named tuple columns as JSON objects.
+
+Enabled by default.
+
+### input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}
+
+Insert default values for missing elements in JSON object while parsing named tuple.
+This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled.
+
+Enabled by default.
 
 ### output_format_json_array_of_rows {#output_format_json_array_of_rows}
 
diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index 963ddfe7a02..a4fa5579638 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -120,5 +120,6 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
 
 ## Related Content
 
+- [Extracting, converting, and querying data in local files using clickhouse-local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local)
 - [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1)
 - [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data)
diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md
index 575141766dd..bd8e72e0fec 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/index.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/index.md
@@ -57,6 +57,7 @@ ClickHouse-specific aggregate functions:
 -   [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md)
 -   [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md)
 -   [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md)
+-   [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md)
 -   [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md)
 -   [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md)
 -   [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md)
@@ -77,4 +78,6 @@ ClickHouse-specific aggregate functions:
 -   [contingency](./contingency.md)
 -   [cramersV](./cramersv.md)
 -   [cramersVBiasCorrected](./cramersvbiascorrected.md)
--   [theilsU](./theilsu.md)
\ No newline at end of file
+-   [theilsU](./theilsu.md)
+-   [maxIntersections](./maxintersections.md)
+-   [maxIntersectionsPosition](./maxintersectionsposition.md)
diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
new file mode 100644
index 00000000000..db99b900a3e
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
@@ -0,0 +1,64 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/maxintersections
+sidebar_position: 360
+title: maxIntersections
+---
+
+# maxIntersections
+
+Aggregate function that calculates the maximum number of times that a group of intervals intersects each other (if all the intervals intersect at least once).
+
+The syntax is:
+
+```sql
+maxIntersections(start_column, end_column)
+```
+
+**Arguments**
+
+- `start_column` – the numeric column that represents the start of each interval. If `start_column` is `NULL` or 0 then the interval will be skipped.
+
+- `end_column` - the numeric column that represents the end of each interval. If `end_column` is `NULL` or 0 then the interval will be skipped.
+
+**Returned value**
+
+Returns the maximum number of intersected intervals.
+
+**Example**
+
+```sql
+CREATE TABLE my_events (
+    start UInt32,
+    end UInt32
+)
+Engine = MergeTree
+ORDER BY tuple();
+
+INSERT INTO my_events VALUES
+   (1, 3),
+   (1, 6),
+   (2, 5),
+   (3, 7);
+```
+
+The intervals look like the following:
+
+```response
+1 - 3
+1 - - - - 6
+  2 - - 5
+    3 - - - 7
+```
+
+Three of these intervals have a common value (the value is `4`, but the value that is common is not important, we are measuring the count of the intersections). The intervals `(1,3)` and `(3,7)` share an endpoint but are not considered intersecting by the `maxIntersections` function.
+
+```sql
+SELECT maxIntersections(start, end) FROM my_events;
+```
+
+Response:
+```response
+3
+```
+
+If you have multiple occurrences of the maximum interval, you can use the [`maxIntersectionsPosition` function](./maxintersectionsposition.md) to locate the number and location of those occurrences.
\ No newline at end of file
diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md
new file mode 100644
index 00000000000..7dd63f09316
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md
@@ -0,0 +1,64 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/maxintersectionsposition
+sidebar_position: 361
+title: maxIntersectionsPosition
+---
+
+# maxIntersectionsPosition
+
+Aggregate function that calculates the positions of the occurrences of the [`maxIntersections` function](./maxintersections.md).
+
+The syntax is:
+
+```sql
+maxIntersectionsPosition(start_column, end_column)
+```
+
+**Arguments**
+
+- `start_column` – the numeric column that represents the start of each interval. If `start_column` is `NULL` or 0 then the interval will be skipped.
+
+- `end_column` - the numeric column that represents the end of each interval. If `end_column` is `NULL` or 0 then the interval will be skipped.
+
+**Returned value**
+
+Returns the start positions of the maximum number of intersected intervals.
+
+**Example**
+
+```sql
+CREATE TABLE my_events (
+    start UInt32,
+    end UInt32
+)
+Engine = MergeTree
+ORDER BY tuple();
+
+INSERT INTO my_events VALUES
+   (1, 3),
+   (1, 6),
+   (2, 5),
+   (3, 7);
+```
+
+The intervals look like the following:
+
+```response
+1 - 3
+1 - - - - 6
+  2 - - 5
+    3 - - - 7
+```
+
+Notice that three of these intervals have the value 4 in common, and that starts with the 2nd interval:
+
+```sql
+SELECT maxIntersectionsPosition(start, end) FROM my_events;
+```
+
+Response:
+```response
+2
+```
+
+In other words, the `(1,6)` row is the start of the 3 intervals that intersect, and 3 is the maximum number of intervals that intersect.
\ No newline at end of file
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md
new file mode 100644
index 00000000000..07fcd187217
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md
@@ -0,0 +1,68 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/quantileInterpolatedWeighted
+sidebar_position: 203
+---
+
+# quantileInterpolatedWeighted
+
+Computes [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using linear interpolation, taking into account the weight of each element.
+
+To get the interpolated value, all the passed values are combined into an array, which are then sorted by their corresponding weights. Quantile interpolation is then performed using the [weighted percentile method](https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method) by building a cumulative distribution based on weights and then a linear interpolation is performed using the weights and the values to compute the quantiles.
+
+When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
+
+**Syntax**
+
+``` sql
+quantileInterpolatedWeighted(level)(expr, weight)
+```
+
+Alias: `medianInterpolatedWeighted`.
+
+**Arguments**
+
+-   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
+-   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
+-   `weight` — Column with weights of sequence members. Weight is a number of value occurrences.
+
+**Returned value**
+
+-   Quantile of the specified level.
+
+Type:
+
+-   [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
+-   [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
+-   [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
+
+**Example**
+
+Input table:
+
+``` text
+┌─n─┬─val─┐
+│ 0 │   3 │
+│ 1 │   2 │
+│ 2 │   1 │
+│ 5 │   4 │
+└───┴─────┘
+```
+
+Query:
+
+``` sql
+SELECT quantileInterpolatedWeighted(n, val) FROM t
+```
+
+Result:
+
+``` text
+┌─quantileInterpolatedWeighted(n, val)─┐
+│                                    1 │
+└──────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
+-   [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
index 5c9120fb8f4..57151915336 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
@@ -9,7 +9,7 @@ sidebar_position: 201
 
 Syntax: `quantiles(level1, level2, …)(x)`
 
-All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
+All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
 
 ## quantilesExactExclusive
 
diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md
index ab1596b1760..d9099ba5ad3 100644
--- a/docs/en/sql-reference/data-types/json.md
+++ b/docs/en/sql-reference/data-types/json.md
@@ -6,6 +6,10 @@ sidebar_label: JSON
 
 # JSON
 
+:::warning
+This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/guides/developer/working-with-json/json-load-data.md) instead.
+:::
+
 Stores JavaScript Object Notation (JSON) documents in a single column.
 
 `JSON` is an alias for `Object('json')`.
diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index c044b972754..9d2f89c1837 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -121,7 +121,7 @@ Accepts an empty array and returns a one-element array that is equal to the defa
 
 ## range(end), range(\[start, \] end \[, step\])
 
-Returns an array of `UInt` numbers from `start` to `end - 1` by `step`.
+Returns an array of numbers from `start` to `end - 1` by `step`. The supported types are [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64](../data-types/int-uint.md).
 
 **Syntax**
 ``` sql
@@ -130,31 +130,30 @@ range([start, ] end [, step])
 
 **Arguments**
 
--   `start` — The first element of the array. Optional, required if `step` is used. Default value: 0. [UInt](../data-types/int-uint.md)
--   `end` — The number before which the array is constructed. Required. [UInt](../data-types/int-uint.md)
--   `step` — Determines the incremental step between each element in the array. Optional. Default value: 1. [UInt](../data-types/int-uint.md)
+-   `start` — The first element of the array. Optional, required if `step` is used. Default value: 0.
+-   `end` — The number before which the array is constructed. Required.
+-   `step` — Determines the incremental step between each element in the array. Optional. Default value: 1.
 
 **Returned value**
 
--   Array of `UInt` numbers from `start` to `end - 1` by `step`.
+-   Array of numbers from `start` to `end - 1` by `step`.
 
 **Implementation details**
 
--   All arguments must be positive values: `start`, `end`, `step` are `UInt` data types, as well as elements of the returned array.
+-   All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's.
 -   An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting.
 
-
 **Examples**
 
 Query:
 ``` sql
-SELECT range(5), range(1, 5), range(1, 5, 2);
+SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2);
 ```
 Result:
 ```txt
-┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┐
-│ [0,1,2,3,4] │ [1,2,3,4]   │ [1,3]          │
-└─────────────┴─────────────┴────────────────┘
+┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┬─range(-1, 5, 2)─┐
+│ [0,1,2,3,4] │ [1,2,3,4]   │ [1,3]          │ [-1,1,3]        │
+└─────────────┴─────────────┴────────────────┴─────────────────┘
 ```
 
 ## array(x1, …), operator \[x1, …\]
diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md
index dd56b47cd3a..380c8364090 100644
--- a/docs/en/sql-reference/table-functions/generate.md
+++ b/docs/en/sql-reference/table-functions/generate.md
@@ -39,3 +39,16 @@ SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(
 │ [68]     │  -67417.0770 │ ('2080-03-12 14:17:31.269','110425e5-413f-10a6-05ba-fa6b3e929f15') │
 └──────────┴──────────────┴────────────────────────────────────────────────────────────────────┘
 ```
+
+```sql
+CREATE TABLE random (a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) engine=Memory;
+INSERT INTO random SELECT * FROM generateRandom() LIMIT 2;
+SELECT * FROM random;
+```
+
+```text
+┌─a────────────────────────────┬────────────d─┬─c──────────────────────────────────────────────────────────────────┐
+│ []                           │   68091.8197 │ ('2037-10-02 12:44:23.368','039ecab7-81c2-45ee-208c-844e5c6c5652') │
+│ [8,-83,0,-22,65,9,-30,28,64] │ -186233.4909 │ ('2062-01-11 00:06:04.124','69563ea1-5ad1-f870-16d8-67061da0df25') │
+└──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘
+```
\ No newline at end of file
diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md
index 565304710cc..d150b94b8af 100644
--- a/docs/zh/sql-reference/functions/array-functions.md
+++ b/docs/zh/sql-reference/functions/array-functions.md
@@ -117,7 +117,7 @@ SELECT notEmpty([1,2]);
 
 ## range(end), range(\[start, \] end \[, step\]) {#range}
 
-返回一个以`step`作为增量步长的从`start`到`end - 1`的`UInt`类型数字数组。
+返回一个以`step`作为增量步长的从`start`到`end - 1`的整形数字数组， 支持类型包括[`UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`](../data-types/int-uint.md)。
 
 **语法**
 ``` sql
@@ -126,31 +126,30 @@ range([start, ] end [, step])
 
 **参数**
 
--   `start` — 数组的第一个元素。可选项，如果设置了`step`时同样需要`start`，默认值为：0，类型为[UInt](../data-types/int-uint.md)。
--   `end` — 计数到`end`结束，但不包括`end`，必填项，类型为[UInt](../data-types/int-uint.md)。
--   `step` — 确定数组中每个元素之间的增量步长。可选项，默认值为：1，类型为[UInt](../data-types/int-uint.md)。
+-   `start` — 数组的第一个元素。可选项，如果设置了`step`时同样需要`start`，默认值为：0。
+-   `end` — 计数到`end`结束，但不包括`end`，必填项。
+-   `step` — 确定数组中每个元素之间的增量步长。可选项，默认值为：1。
 
 **返回值**
 
--   以`step`作为增量步长的从`start`到`end - 1`的`UInt`类型数字数组。
+-   以`step`作为增量步长的从`start`到`end - 1`的数字数组。
 
 **注意事项**
 
--   所有参数必须是正值：`start`、`end`、`step`，类型均为`UInt`，结果数组的元素与此相同。
+-   所有参数`start`、`end`、`step`必须属于以下几种类型之一：[`UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`](../data-types/int-uint.md)。结果数组的元素数据类型为所有入参类型的最小超类，也必须属于以上几种类型之一。
 -   如果查询结果的数组总长度超过[function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block)指定的元素数，将会抛出异常。
 
-
 **示例**
 
 查询语句:
 ``` sql
-SELECT range(5), range(1, 5), range(1, 5, 2);
+SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2);
 ```
 结果:
 ```txt
-┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┐
-│ [0,1,2,3,4] │ [1,2,3,4]   │ [1,3]          │
-└─────────────┴─────────────┴────────────────┘
+┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┬─range(-1, 5, 2)─┐
+│ [0,1,2,3,4] │ [1,2,3,4]   │ [1,3]          │ [-1,1,3]        │
+└─────────────┴─────────────┴────────────────┴─────────────────┘
 ```
 
 ## array(x1, …), operator \[x1, …\] {#arrayx1-operator-x1}
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 742838d6433..419b80ccff2 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -140,6 +140,7 @@ namespace CurrentMetrics
 namespace ProfileEvents
 {
     extern const Event MainConfigLoads;
+    extern const Event ServerStartupMilliseconds;
 }
 
 namespace fs = std::filesystem;
@@ -652,6 +653,8 @@ static void sanityChecks(Server & server)
 int Server::main(const std::vector<std::string> & /*args*/)
 try
 {
+    Stopwatch startup_watch;
+
     Poco::Logger * log = &logger();
 
     UseSSL use_ssl;
@@ -1822,6 +1825,9 @@ try
             LOG_INFO(log, "Ready for connections.");
         }
 
+        startup_watch.stop();
+        ProfileEvents::increment(ProfileEvents::ServerStartupMilliseconds, startup_watch.elapsedMilliseconds());
+
         try
         {
             global_context->startClusterDiscovery();
diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index 366667410d5..f1f99fc9166 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -167,6 +167,7 @@ enum class AccessType
     M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \
     M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \
     M(SYSTEM_RESTORE_REPLICA, "RESTORE REPLICA", TABLE, SYSTEM) \
+    M(SYSTEM_WAIT_LOADING_PARTS, "WAIT LOADING PARTS", TABLE, SYSTEM) \
     M(SYSTEM_SYNC_DATABASE_REPLICA, "SYNC DATABASE REPLICA", DATABASE, SYSTEM) \
     M(SYSTEM_SYNC_TRANSACTION_LOG, "SYNC TRANSACTION LOG", GLOBAL, SYSTEM) \
     M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \
diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp
index 02aafb7415b..e21ebda2a31 100644
--- a/src/Access/tests/gtest_access_rights_ops.cpp
+++ b/src/Access/tests/gtest_access_rights_ops.cpp
@@ -53,7 +53,7 @@ TEST(AccessRights, Union)
               "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
               "SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
               "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
-              "SYSTEM RESTORE REPLICA, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
+              "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
 }
 
 
diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h
index c559b3f115f..ac81f7466fa 100644
--- a/src/AggregateFunctions/AggregateFunctionHistogram.h
+++ b/src/AggregateFunctions/AggregateFunctionHistogram.h
@@ -207,7 +207,7 @@ private:
         {
             // Fuse points if their text representations differ only in last digit
             auto min_diff = 10 * (points[left].mean + points[right].mean) * std::numeric_limits<Mean>::epsilon();
-            if (points[left].mean + min_diff >= points[right].mean)
+            if (points[left].mean + std::fabs(min_diff) >= points[right].mean)
             {
                 points[left] = points[left] + points[right];
             }
diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h
index 6427d03f089..49157acf690 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.h
@@ -232,6 +232,9 @@ struct NameQuantilesExactInclusive { static constexpr auto name = "quantilesExac
 struct NameQuantileExactWeighted { static constexpr auto name = "quantileExactWeighted"; };
 struct NameQuantilesExactWeighted { static constexpr auto name = "quantilesExactWeighted"; };
 
+struct NameQuantileInterpolatedWeighted { static constexpr auto name = "quantileInterpolatedWeighted"; };
+struct NameQuantilesInterpolatedWeighted { static constexpr auto name = "quantilesInterpolatedWeighted"; };
+
 struct NameQuantileTiming { static constexpr auto name = "quantileTiming"; };
 struct NameQuantileTimingWeighted { static constexpr auto name = "quantileTimingWeighted"; };
 struct NameQuantilesTiming { static constexpr auto name = "quantilesTiming"; };
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp b/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp
new file mode 100644
index 00000000000..68b42376df7
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp
@@ -0,0 +1,70 @@
+#include <AggregateFunctions/AggregateFunctionQuantile.h>
+#include <AggregateFunctions/QuantileInterpolatedWeighted.h>
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/Helpers.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <Core/Field.h>
+
+
+namespace DB
+{
+struct Settings;
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+
+    template <typename Value, bool _> using FuncQuantileInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantileInterpolatedWeighted, true, void, false>;
+    template <typename Value, bool _> using FuncQuantilesInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantilesInterpolatedWeighted, true, void, true>;
+
+    template <template <typename, bool> class Function>
+    AggregateFunctionPtr createAggregateFunctionQuantile(
+        const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
+    {
+        /// Second argument type check doesn't depend on the type of the first one.
+        Function<void, true>::assertSecondArg(argument_types);
+
+        const DataTypePtr & argument_type = argument_types[0];
+        WhichDataType which(argument_type);
+
+#define DISPATCH(TYPE) \
+    if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
+        FOR_BASIC_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+        if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
+        if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
+
+        if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
+        if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
+        if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
+        if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
+        if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
+
+        if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
+        if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<UInt128, true>>(argument_types, params);
+        if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
+        if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
+
+        throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
+                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+}
+
+void registerAggregateFunctionsQuantileInterpolatedWeighted(AggregateFunctionFactory & factory)
+{
+    /// For aggregate functions returning array we cannot return NULL on empty set.
+    AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
+
+    factory.registerFunction(NameQuantileInterpolatedWeighted::name, createAggregateFunctionQuantile<FuncQuantileInterpolatedWeighted>);
+    factory.registerFunction(NameQuantilesInterpolatedWeighted::name, { createAggregateFunctionQuantile<FuncQuantilesInterpolatedWeighted>, properties });
+
+    /// 'median' is an alias for 'quantile'
+    factory.registerAlias("medianInterpolatedWeighted", NameQuantileInterpolatedWeighted::name);
+}
+
+}
diff --git a/src/AggregateFunctions/QuantileInterpolatedWeighted.h b/src/AggregateFunctions/QuantileInterpolatedWeighted.h
new file mode 100644
index 00000000000..95daeed2e57
--- /dev/null
+++ b/src/AggregateFunctions/QuantileInterpolatedWeighted.h
@@ -0,0 +1,308 @@
+#pragma once
+
+#include <base/sort.h>
+
+#include <Common/HashTable/HashMap.h>
+#include <Common/NaNUtils.h>
+
+
+namespace DB
+{
+struct Settings;
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+/** Approximates Quantile by:
+  * - sorting input values and weights
+  * - building a cumulative distribution based on weights
+  * - performing linear interpolation between the weights and values
+  *
+  */
+template <typename Value>
+struct QuantileInterpolatedWeighted
+{
+    struct Int128Hash
+    {
+        size_t operator()(Int128 x) const
+        {
+            return CityHash_v1_0_2::Hash128to64({x >> 64, x & 0xffffffffffffffffll});
+        }
+    };
+
+    using Weight = UInt64;
+    using UnderlyingType = NativeType<Value>;
+    using Hasher = std::conditional_t<std::is_same_v<Value, Decimal128>, Int128Hash, HashCRC32<UnderlyingType>>;
+
+    /// When creating, the hash table must be small.
+    using Map = HashMapWithStackMemory<UnderlyingType, Weight, Hasher, 4>;
+
+    Map map;
+
+    void add(const Value & x)
+    {
+        /// We must skip NaNs as they are not compatible with comparison sorting.
+        if (!isNaN(x))
+            ++map[x];
+    }
+
+    void add(const Value & x, Weight weight)
+    {
+        if (!isNaN(x))
+            map[x] += weight;
+    }
+
+    void merge(const QuantileInterpolatedWeighted & rhs)
+    {
+        for (const auto & pair : rhs.map)
+            map[pair.getKey()] += pair.getMapped();
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        map.write(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        typename Map::Reader reader(buf);
+        while (reader.next())
+        {
+            const auto & pair = reader.get();
+            map[pair.first] = pair.second;
+        }
+    }
+
+    Value get(Float64 level) const
+    {
+        return getImpl<Value>(level);
+    }
+
+    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const
+    {
+        getManyImpl<Value>(levels, indices, size, result);
+    }
+
+    /// The same, but in the case of an empty state, NaN is returned.
+    Float64 getFloat(Float64) const
+    {
+        throw Exception("Method getFloat is not implemented for QuantileInterpolatedWeighted", ErrorCodes::NOT_IMPLEMENTED);
+    }
+
+    void getManyFloat(const Float64 *, const size_t *, size_t, Float64 *) const
+    {
+        throw Exception("Method getManyFloat is not implemented for QuantileInterpolatedWeighted", ErrorCodes::NOT_IMPLEMENTED);
+    }
+
+private:
+    using Pair = typename std::pair<UnderlyingType, Float64>;
+
+    /// Get the value of the `level` quantile. The level must be between 0 and 1.
+    template <typename T>
+    T getImpl(Float64 level) const
+    {
+        size_t size = map.size();
+
+        if (0 == size)
+            return std::numeric_limits<Value>::quiet_NaN();
+
+        /// Maintain a vector of pair of values and weights for easier sorting and for building
+        /// a cumulative distribution using the provided weights.
+        std::vector<Pair> value_weight_pairs;
+        value_weight_pairs.reserve(size);
+
+        /// Note: weight provided must be a 64-bit integer
+        /// Float64 is used as accumulator here to get approximate results.
+        /// But weight used in the internal array is stored as Float64 as we
+        /// do some quantile estimation operation which involves division and
+        /// require Float64 level of precision.
+
+        Float64 sum_weight = 0;
+        for (const auto & pair : map)
+        {
+            sum_weight += pair.getMapped();
+            auto value = pair.getKey();
+            auto weight = pair.getMapped();
+            value_weight_pairs.push_back({value, weight});
+        }
+
+        ::sort(value_weight_pairs.begin(), value_weight_pairs.end(), [](const Pair & a, const Pair & b) { return a.first < b.first; });
+
+        Float64 accumulated = 0;
+
+        /// vector for populating and storing the cumulative sum using the provided weights.
+        /// example: [0,1,2,3,4,5] -> [0,1,3,6,10,15]
+        std::vector<Float64> weights_cum_sum;
+        weights_cum_sum.reserve(size);
+
+        for (size_t idx = 0; idx < size; ++idx)
+        {
+            accumulated += value_weight_pairs[idx].second;
+            weights_cum_sum.push_back(accumulated);
+        }
+
+        /// The following estimation of quantile is general and the idea is:
+        /// https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method
+
+        /// calculates a simple cumulative distribution based on weights
+        if (sum_weight != 0)
+        {
+            for (size_t idx = 0; idx < size; ++idx)
+                value_weight_pairs[idx].second = (weights_cum_sum[idx] - 0.5 * value_weight_pairs[idx].second) / sum_weight;
+        }
+
+        /// perform linear interpolation
+        size_t idx = 0;
+        if (size >= 2)
+        {
+            if (level >= value_weight_pairs[size - 2].second)
+            {
+                idx = size - 2;
+            }
+            else
+            {
+                size_t start = 0, end = size - 1;
+                while (start <= end)
+                {
+                    size_t mid = start + (end - start) / 2;
+                    if (mid > size)
+                        break;
+                    if (level > value_weight_pairs[mid + 1].second)
+                        start = mid + 1;
+                    else
+                    {
+                        idx = mid;
+                        end = mid - 1;
+                    }
+                }
+            }
+        }
+
+        size_t l = idx;
+        size_t u = idx + 1 < size ? idx + 1 : idx;
+
+        Float64 xl = value_weight_pairs[l].second, xr = value_weight_pairs[u].second;
+        UnderlyingType yl = value_weight_pairs[l].first, yr = value_weight_pairs[u].first;
+
+        if (level < xl)
+            yr = yl;
+        if (level > xr)
+            yl = yr;
+
+        return static_cast<T>(interpolate(level, xl, xr, yl, yr));
+    }
+
+    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
+    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
+    template <typename T>
+    void getManyImpl(const Float64 * levels, const size_t * indices, size_t num_levels, Value * result) const
+    {
+        size_t size = map.size();
+
+        if (0 == size)
+        {
+            for (size_t i = 0; i < num_levels; ++i)
+                result[i] = Value();
+            return;
+        }
+
+        std::vector<Pair> value_weight_pairs;
+        value_weight_pairs.reserve(size);
+
+        Float64 sum_weight = 0;
+        for (const auto & pair : map)
+        {
+            sum_weight += pair.getMapped();
+            auto value = pair.getKey();
+            auto weight = pair.getMapped();
+            value_weight_pairs.push_back({value, weight});
+        }
+
+        ::sort(value_weight_pairs.begin(), value_weight_pairs.end(), [](const Pair & a, const Pair & b) { return a.first < b.first; });
+
+        Float64 accumulated = 0;
+
+        /// vector for populating and storing the cumulative sum using the provided weights.
+        /// example: [0,1,2,3,4,5] -> [0,1,3,6,10,15]
+        std::vector<Float64> weights_cum_sum;
+        weights_cum_sum.reserve(size);
+
+        for (size_t idx = 0; idx < size; ++idx)
+        {
+            accumulated += value_weight_pairs[idx].second;
+            weights_cum_sum.emplace_back(accumulated);
+        }
+
+
+        /// The following estimation of quantile is general and the idea is:
+        /// https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method
+
+        /// calculates a simple cumulative distribution based on weights
+        if (sum_weight != 0)
+        {
+            for (size_t idx = 0; idx < size; ++idx)
+                value_weight_pairs[idx].second = (weights_cum_sum[idx] - 0.5 * value_weight_pairs[idx].second) / sum_weight;
+        }
+
+        for (size_t level_index = 0; level_index < num_levels; ++level_index)
+        {
+            /// perform linear interpolation for every level
+            auto level = levels[indices[level_index]];
+
+            size_t idx = 0;
+            if (size >= 2)
+            {
+                if (level >= value_weight_pairs[size - 2].second)
+                {
+                    idx = size - 2;
+                }
+                else
+                {
+                    size_t start = 0, end = size - 1;
+                    while (start <= end)
+                    {
+                        size_t mid = start + (end - start) / 2;
+                        if (mid > size)
+                            break;
+                        if (level > value_weight_pairs[mid + 1].second)
+                            start = mid + 1;
+                        else
+                        {
+                            idx = mid;
+                            end = mid - 1;
+                        }
+                    }
+                }
+            }
+
+            size_t l = idx;
+            size_t u = idx + 1 < size ? idx + 1 : idx;
+
+            Float64 xl = value_weight_pairs[l].second, xr = value_weight_pairs[u].second;
+            UnderlyingType yl = value_weight_pairs[l].first, yr = value_weight_pairs[u].first;
+
+            if (level < xl)
+                yr = yl;
+            if (level > xr)
+                yl = yr;
+
+            result[indices[level_index]] = static_cast<T>(interpolate(level, xl, xr, yl, yr));
+        }
+    }
+
+    /// This ignores overflows or NaN's that might arise during add, sub and mul operations and doesn't aim to provide exact
+    /// results since `the quantileInterpolatedWeighted` function itself relies mainly on approximation.
+    UnderlyingType NO_SANITIZE_UNDEFINED interpolate(Float64 level, Float64 xl, Float64 xr, UnderlyingType yl, UnderlyingType yr) const
+    {
+        UnderlyingType dy = yr - yl;
+        Float64 dx = xr - xl;
+        dx = dx == 0 ? 1 : dx; /// to handle NaN behavior that might arise during integer division below.
+
+        /// yl + (dy / dx) * (level - xl)
+        return static_cast<UnderlyingType>(yl + (dy / dx) * (level - xl));
+    }
+};
+
+}
diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp
index ecf6ab51367..1fe759c122a 100644
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@@ -21,6 +21,7 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileExactWeighted(AggregateFunctionFactory &);
+void registerAggregateFunctionsQuantileInterpolatedWeighted(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileExactLow(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileExactHigh(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileExactInclusive(AggregateFunctionFactory &);
@@ -106,6 +107,7 @@ void registerAggregateFunctions()
         registerAggregateFunctionsQuantileDeterministic(factory);
         registerAggregateFunctionsQuantileExact(factory);
         registerAggregateFunctionsQuantileExactWeighted(factory);
+        registerAggregateFunctionsQuantileInterpolatedWeighted(factory);
         registerAggregateFunctionsQuantileExactLow(factory);
         registerAggregateFunctionsQuantileExactHigh(factory);
         registerAggregateFunctionsQuantileExactInclusive(factory);
diff --git a/src/Analyzer/MatcherNode.cpp b/src/Analyzer/MatcherNode.cpp
index 9d822771087..fc74b4ff67e 100644
--- a/src/Analyzer/MatcherNode.cpp
+++ b/src/Analyzer/MatcherNode.cpp
@@ -11,6 +11,7 @@
 #include <Parsers/ASTQualifiedAsterisk.h>
 #include <Parsers/ASTColumnsMatcher.h>
 #include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTColumnsTransformers.h>
 
 namespace DB
 {
@@ -206,19 +207,43 @@ QueryTreeNodePtr MatcherNode::cloneImpl() const
 ASTPtr MatcherNode::toASTImpl() const
 {
     ASTPtr result;
+    ASTPtr transformers;
+
+    if (!children.empty())
+    {
+        transformers = std::make_shared<ASTColumnsTransformerList>();
+
+        for (const auto & child : children)
+            transformers->children.push_back(child->toAST());
+    }
 
     if (matcher_type == MatcherNodeType::ASTERISK)
     {
         if (qualified_identifier.empty())
         {
-            result = std::make_shared<ASTAsterisk>();
+            auto asterisk = std::make_shared<ASTAsterisk>();
+
+            if (transformers)
+            {
+                asterisk->transformers = std::move(transformers);
+                asterisk->children.push_back(asterisk->transformers);
+            }
+
+            result = asterisk;
         }
         else
         {
             auto qualified_asterisk = std::make_shared<ASTQualifiedAsterisk>();
 
             auto identifier_parts = qualified_identifier.getParts();
-            qualified_asterisk->children.push_back(std::make_shared<ASTIdentifier>(std::move(identifier_parts)));
+            qualified_asterisk->qualifier = std::make_shared<ASTIdentifier>(std::move(identifier_parts));
+            qualified_asterisk->children.push_back(qualified_asterisk->qualifier);
+
+            if (transformers)
+            {
+                qualified_asterisk->transformers = std::move(transformers);
+                qualified_asterisk->children.push_back(qualified_asterisk->transformers);
+            }
 
             result = qualified_asterisk;
         }
@@ -229,6 +254,13 @@ ASTPtr MatcherNode::toASTImpl() const
         {
             auto regexp_matcher = std::make_shared<ASTColumnsRegexpMatcher>();
             regexp_matcher->setPattern(columns_matcher->pattern());
+
+            if (transformers)
+            {
+                regexp_matcher->transformers = std::move(transformers);
+                regexp_matcher->children.push_back(regexp_matcher->transformers);
+            }
+
             result = regexp_matcher;
         }
         else
@@ -237,7 +269,14 @@ ASTPtr MatcherNode::toASTImpl() const
             regexp_matcher->setPattern(columns_matcher->pattern());
 
             auto identifier_parts = qualified_identifier.getParts();
-            regexp_matcher->children.push_back(std::make_shared<ASTIdentifier>(std::move(identifier_parts)));
+            regexp_matcher->qualifier = std::make_shared<ASTIdentifier>(std::move(identifier_parts));
+            regexp_matcher->children.push_back(regexp_matcher->qualifier);
+
+            if (transformers)
+            {
+                regexp_matcher->transformers = std::move(transformers);
+                regexp_matcher->children.push_back(regexp_matcher->transformers);
+            }
 
             result = regexp_matcher;
         }
@@ -257,23 +296,36 @@ ASTPtr MatcherNode::toASTImpl() const
         {
             auto columns_list_matcher = std::make_shared<ASTColumnsListMatcher>();
             columns_list_matcher->column_list = std::move(column_list);
+            columns_list_matcher->children.push_back(columns_list_matcher->column_list);
+
+            if (transformers)
+            {
+                columns_list_matcher->transformers = std::move(transformers);
+                columns_list_matcher->children.push_back(columns_list_matcher->transformers);
+            }
+
             result = columns_list_matcher;
         }
         else
         {
             auto columns_list_matcher = std::make_shared<ASTQualifiedColumnsListMatcher>();
-            columns_list_matcher->column_list = std::move(column_list);
 
             auto identifier_parts = qualified_identifier.getParts();
-            columns_list_matcher->children.push_back(std::make_shared<ASTIdentifier>(std::move(identifier_parts)));
+            columns_list_matcher->qualifier = std::make_shared<ASTIdentifier>(std::move(identifier_parts));
+            columns_list_matcher->column_list = std::move(column_list);
+            columns_list_matcher->children.push_back(columns_list_matcher->qualifier);
+            columns_list_matcher->children.push_back(columns_list_matcher->column_list);
+
+            if (transformers)
+            {
+                columns_list_matcher->transformers = std::move(transformers);
+                columns_list_matcher->children.push_back(columns_list_matcher->transformers);
+            }
 
             result = columns_list_matcher;
         }
     }
 
-    for (const auto & child : children)
-        result->children.push_back(child->toAST());
-
     return result;
 }
 
diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
index 01072e0b3fc..149af61e002 100644
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
@@ -73,7 +73,7 @@ public:
         if (!inner_function_node)
             return;
 
-        auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes();
+        const auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes();
         if (inner_function_arguments_nodes.size() != 2)
             return;
 
@@ -119,13 +119,15 @@ public:
             {
                 lower_function_name = function_name_if_constant_is_negative;
             }
-            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[1], lower_function_name);
 
-            auto inner_function = aggregate_function_arguments_nodes[0];
-            auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]);
-            aggregate_function_arguments_nodes = {inner_function_right_argument};
-            inner_function_arguments_nodes[1] = node;
-            node = std::move(inner_function);
+            auto inner_function_clone = inner_function_node->clone();
+            auto & inner_function_clone_arguments = inner_function_clone->as<FunctionNode &>().getArguments();
+            auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
+            auto inner_function_clone_right_argument = inner_function_clone_arguments_nodes[1];
+            aggregate_function_arguments_nodes = {inner_function_clone_right_argument};
+            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_right_argument, lower_function_name);
+            inner_function_clone_arguments_nodes[1] = node;
+            node = std::move(inner_function_clone);
         }
         else if (right_argument_constant_node)
         {
@@ -136,18 +138,20 @@ public:
             {
                 lower_function_name = function_name_if_constant_is_negative;
             }
-            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[0], function_name_if_constant_is_negative);
 
-            auto inner_function = aggregate_function_arguments_nodes[0];
-            auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]);
-            aggregate_function_arguments_nodes = {inner_function_left_argument};
-            inner_function_arguments_nodes[0] = node;
-            node = std::move(inner_function);
+            auto inner_function_clone = inner_function_node->clone();
+            auto & inner_function_clone_arguments = inner_function_clone->as<FunctionNode &>().getArguments();
+            auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
+            auto inner_function_clone_left_argument = inner_function_clone_arguments_nodes[0];
+            aggregate_function_arguments_nodes = {inner_function_clone_left_argument};
+            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_left_argument, lower_function_name);
+            inner_function_clone_arguments_nodes[0] = node;
+            node = std::move(inner_function_clone);
         }
     }
 
 private:
-    static inline void resolveAggregateFunctionNode(FunctionNode & function_node, QueryTreeNodePtr & argument, const String & aggregate_function_name)
+    static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
     {
         auto function_aggregate_function = function_node.getAggregateFunction();
 
diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
new file mode 100644
index 00000000000..8c9db191bbd
--- /dev/null
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -0,0 +1,124 @@
+#include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
+#include <Analyzer/ColumnNode.h>
+#include <Analyzer/FunctionNode.h>
+#include <Analyzer/HashUtils.h>
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+#include <Analyzer/QueryNode.h>
+#include <Analyzer/SortNode.h>
+#include <Functions/IFunction.h>
+
+namespace DB
+{
+
+namespace
+{
+
+class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor<OptimizeRedundantFunctionsInOrderByVisitor>
+{
+public:
+    static bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/)
+    {
+        if (node->as<FunctionNode>())
+            return false;
+        return true;
+    }
+
+    void visitImpl(QueryTreeNodePtr & node)
+    {
+        auto * query = node->as<QueryNode>();
+        if (!query)
+            return;
+
+        if (!query->hasOrderBy())
+            return;
+
+        auto & order_by = query->getOrderBy();
+        for (auto & elem : order_by.getNodes())
+        {
+            auto * order_by_elem = elem->as<SortNode>();
+            if (order_by_elem->withFill())
+                return;
+        }
+
+        QueryTreeNodes new_order_by_nodes;
+        new_order_by_nodes.reserve(order_by.getNodes().size());
+
+        for (auto & elem : order_by.getNodes())
+        {
+            auto & order_by_expr = elem->as<SortNode>()->getExpression();
+            switch (order_by_expr->getNodeType())
+            {
+                case QueryTreeNodeType::FUNCTION:
+                {
+                    if (isRedundantExpression(order_by_expr))
+                        continue;
+                    break;
+                }
+                case QueryTreeNodeType::COLUMN:
+                {
+                    existing_keys.insert(order_by_expr);
+                    break;
+                }
+                default:
+                    break;
+            }
+
+            new_order_by_nodes.push_back(elem);
+        }
+        existing_keys.clear();
+
+        if (new_order_by_nodes.size() < order_by.getNodes().size())
+            order_by.getNodes() = std::move(new_order_by_nodes);
+    }
+
+private:
+    QueryTreeNodePtrWithHashSet existing_keys;
+
+    bool isRedundantExpression(QueryTreeNodePtr function)
+    {
+        QueryTreeNodes nodes_to_process{ function };
+        while (!nodes_to_process.empty())
+        {
+            auto node = nodes_to_process.back();
+            nodes_to_process.pop_back();
+
+            // TODO: handle constants here
+            switch (node->getNodeType())
+            {
+                case QueryTreeNodeType::FUNCTION:
+                {
+                    auto * function_node = node->as<FunctionNode>();
+                    const auto & function_arguments = function_node->getArguments().getNodes();
+                    if (function_arguments.empty())
+                        return false;
+                    const auto & function_base = function_node->getFunction();
+                    if (!function_base || !function_base->isDeterministicInScopeOfQuery())
+                        return false;
+
+                    // Process arguments in order
+                    for (auto it = function_arguments.rbegin(); it != function_arguments.rend(); ++it)
+                        nodes_to_process.push_back(*it);
+                    break;
+                }
+                case QueryTreeNodeType::COLUMN:
+                {
+                    if (!existing_keys.contains(node))
+                        return false;
+                    break;
+                }
+                default:
+                    return false;
+            }
+        }
+        return true;
+    }
+};
+
+}
+
+void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr query_tree_node, ContextPtr /*context*/)
+{
+    OptimizeRedundantFunctionsInOrderByVisitor().visit(query_tree_node);
+}
+
+}
diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h
new file mode 100644
index 00000000000..609a6360d27
--- /dev/null
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <Analyzer/IQueryTreePass.h>
+
+namespace DB
+{
+
+/** If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x.
+  * Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y
+  * in case if f(), g(), h(), t() are deterministic (in scope of query).
+  * Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x).
+  */
+class OptimizeRedundantFunctionsInOrderByPass final : public IQueryTreePass
+{
+public:
+    String getName() override { return "OptimizeRedundantFunctionsInOrderBy"; }
+
+    String getDescription() override { return "If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x."; }
+
+    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+};
+
+}
diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
index 879eb4d4a8d..1faf79e87f9 100644
--- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp
+++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
@@ -77,11 +77,11 @@ public:
         if (!nested_function || nested_function->getFunctionName() != "if")
             return;
 
-        auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes();
+        const auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes();
         if (nested_if_function_arguments_nodes.size() != 3)
             return;
 
-        auto & cond_argument = nested_if_function_arguments_nodes[0];
+        const auto & cond_argument = nested_if_function_arguments_nodes[0];
         const auto * if_true_condition_constant_node = nested_if_function_arguments_nodes[1]->as<ConstantNode>();
         const auto * if_false_condition_constant_node = nested_if_function_arguments_nodes[2]->as<ConstantNode>();
 
@@ -101,7 +101,7 @@ public:
         /// Rewrite `sum(if(cond, 1, 0))` into `countIf(cond)`.
         if (if_true_condition_value == 1 && if_false_condition_value == 0)
         {
-            function_node_arguments_nodes[0] = std::move(nested_if_function_arguments_nodes[0]);
+            function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0];
             function_node_arguments_nodes.resize(1);
 
             resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
@@ -120,7 +120,7 @@ public:
             auto not_function = std::make_shared<FunctionNode>("not");
 
             auto & not_function_arguments = not_function->getArguments().getNodes();
-            not_function_arguments.push_back(std::move(nested_if_function_arguments_nodes[0]));
+            not_function_arguments.push_back(nested_if_function_arguments_nodes[0]);
 
             not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentColumns()));
 
diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp
index 2b2326badfa..adaa878ae2f 100644
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@@ -111,7 +111,7 @@ private:
 
     QueryTreeNodePtr buildJoinTree(const ASTPtr & tables_in_select_query, const ContextPtr & context) const;
 
-    ColumnTransformersNodes buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index, const ContextPtr & context) const;
+    ColumnTransformersNodes buildColumnTransformers(const ASTPtr & matcher_expression, const ContextPtr & context) const;
 
     ASTPtr query;
     QueryTreeNodePtr query_tree_node;
@@ -439,13 +439,13 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
     }
     else if (const auto * asterisk = expression->as<ASTAsterisk>())
     {
-        auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
+        auto column_transformers = buildColumnTransformers(asterisk->transformers, context);
         result = std::make_shared<MatcherNode>(std::move(column_transformers));
     }
     else if (const auto * qualified_asterisk = expression->as<ASTQualifiedAsterisk>())
     {
-        auto & qualified_identifier = qualified_asterisk->children.at(0)->as<ASTTableIdentifier &>();
-        auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
+        auto & qualified_identifier = qualified_asterisk->qualifier->as<ASTIdentifier &>();
+        auto column_transformers = buildColumnTransformers(qualified_asterisk->transformers, context);
         result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), std::move(column_transformers));
     }
     else if (const auto * ast_literal = expression->as<ASTLiteral>())
@@ -543,7 +543,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
     }
     else if (const auto * columns_regexp_matcher = expression->as<ASTColumnsRegexpMatcher>())
     {
-        auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
+        auto column_transformers = buildColumnTransformers(columns_regexp_matcher->transformers, context);
         result = std::make_shared<MatcherNode>(columns_regexp_matcher->getMatcher(), std::move(column_transformers));
     }
     else if (const auto * columns_list_matcher = expression->as<ASTColumnsListMatcher>())
@@ -557,18 +557,18 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
             column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts});
         }
 
-        auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
+        auto column_transformers = buildColumnTransformers(columns_list_matcher->transformers, context);
         result = std::make_shared<MatcherNode>(std::move(column_list_identifiers), std::move(column_transformers));
     }
     else if (const auto * qualified_columns_regexp_matcher = expression->as<ASTQualifiedColumnsRegexpMatcher>())
     {
-        auto & qualified_identifier = qualified_columns_regexp_matcher->children.at(0)->as<ASTTableIdentifier &>();
-        auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
+        auto & qualified_identifier = qualified_columns_regexp_matcher->qualifier->as<ASTIdentifier &>();
+        auto column_transformers = buildColumnTransformers(qualified_columns_regexp_matcher->transformers, context);
         result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), qualified_columns_regexp_matcher->getMatcher(), std::move(column_transformers));
     }
     else if (const auto * qualified_columns_list_matcher = expression->as<ASTQualifiedColumnsListMatcher>())
     {
-        auto & qualified_identifier = qualified_columns_list_matcher->children.at(0)->as<ASTTableIdentifier &>();
+        auto & qualified_identifier = qualified_columns_list_matcher->qualifier->as<ASTIdentifier &>();
 
         Identifiers column_list_identifiers;
         column_list_identifiers.reserve(qualified_columns_list_matcher->column_list->children.size());
@@ -579,7 +579,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
             column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts});
         }
 
-        auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
+        auto column_transformers = buildColumnTransformers(qualified_columns_list_matcher->transformers, context);
         result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), std::move(column_list_identifiers), std::move(column_transformers));
     }
     else
@@ -833,15 +833,15 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
 }
 
 
-ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index, const ContextPtr & context) const
+ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & matcher_expression, const ContextPtr & context) const
 {
     ColumnTransformersNodes column_transformers;
-    size_t children_size = matcher_expression->children.size();
 
-    for (; start_child_index < children_size; ++start_child_index)
+    if (!matcher_expression)
+        return column_transformers;
+
+    for (const auto & child : matcher_expression->children)
     {
-        const auto & child = matcher_expression->children[start_child_index];
-
         if (auto * apply_transformer = child->as<ASTColumnsApplyTransformer>())
         {
             if (apply_transformer->lambda)
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index 4148d42ee23..8efe0dd4602 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -15,6 +15,7 @@
 #include <Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h>
 #include <Analyzer/Passes/FuseFunctionsPass.h>
 #include <Analyzer/Passes/IfTransformStringsToEnumPass.h>
+#include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
 
 #include <IO/WriteHelpers.h>
 #include <IO/Operators.h>
@@ -91,7 +92,6 @@ public:
   * TODO: Support setting optimize_move_functions_out_of_any.
   * TODO: Support setting optimize_aggregators_of_group_by_keys.
   * TODO: Support setting optimize_duplicate_order_by_and_distinct.
-  * TODO: Support setting optimize_redundant_functions_in_order_by.
   * TODO: Support setting optimize_monotonous_functions_in_order_by.
   * TODO: Support settings.optimize_or_like_chain.
   * TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
@@ -203,6 +203,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)
     if (settings.optimize_if_chain_to_multiif)
         manager.addPass(std::make_unique<IfChainToMultiIfPass>());
 
+    if (settings.optimize_redundant_functions_in_order_by)
+        manager.addPass(std::make_unique<OptimizeRedundantFunctionsInOrderByPass>());
+
     manager.addPass(std::make_unique<OrderByTupleEliminationPass>());
     manager.addPass(std::make_unique<OrderByLimitByDuplicateEliminationPass>());
 
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 4529f34e2a7..aa667fde06f 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -156,10 +156,9 @@ void BackupWriterS3::copyObjectImpl(
     const String & src_key,
     const String & dst_bucket,
     const String & dst_key,
-    const Aws::S3::Model::HeadObjectResult & head,
+    size_t size,
     const std::optional<ObjectAttributes> & metadata) const
 {
-    size_t size = head.GetContentLength();
     LOG_TRACE(log, "Copying {} bytes using single-operation copy", size);
 
     Aws::S3::Model::CopyObjectRequest request;
@@ -177,7 +176,7 @@ void BackupWriterS3::copyObjectImpl(
     if (!outcome.IsSuccess() && (outcome.GetError().GetExceptionName() == "EntityTooLarge"
             || outcome.GetError().GetExceptionName() == "InvalidRequest"))
     { // Can't come here with MinIO, MinIO allows single part upload for large objects.
-        copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata);
+        copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, size, metadata);
         return;
     }
 
@@ -191,10 +190,9 @@ void BackupWriterS3::copyObjectMultipartImpl(
     const String & src_key,
     const String & dst_bucket,
     const String & dst_key,
-    const Aws::S3::Model::HeadObjectResult & head,
+    size_t size,
     const std::optional<ObjectAttributes> & metadata) const
 {
-    size_t size = head.GetContentLength();
     LOG_TRACE(log, "Copying {} bytes using multipart upload copy", size);
 
     String multipart_upload_id;
@@ -309,16 +307,16 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_
         std::string source_bucket = object_storage->getObjectsNamespace();
         auto file_path = fs::path(s3_uri.key) / file_name_to;
 
-        auto head = S3::headObject(*client, source_bucket, objects[0].absolute_path).GetResult();
-        if (static_cast<size_t>(head.GetContentLength()) < request_settings.getUploadSettings().max_single_operation_copy_size)
+        auto size = S3::getObjectSize(*client, source_bucket, objects[0].absolute_path);
+        if (size < request_settings.getUploadSettings().max_single_operation_copy_size)
         {
             copyObjectImpl(
-                source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head);
+                source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, size);
         }
         else
         {
             copyObjectMultipartImpl(
-                source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head);
+                source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, size);
         }
     }
 }
diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h
index 634b35c1e74..70487717c48 100644
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@@ -67,7 +67,7 @@ private:
         const String & src_key,
         const String & dst_bucket,
         const String & dst_key,
-        const Aws::S3::Model::HeadObjectResult & head,
+        size_t size,
         const std::optional<ObjectAttributes> & metadata = std::nullopt) const;
 
     void copyObjectMultipartImpl(
@@ -75,7 +75,7 @@ private:
         const String & src_key,
         const String & dst_bucket,
         const String & dst_key,
-        const Aws::S3::Model::HeadObjectResult & head,
+        size_t size,
         const std::optional<ObjectAttributes> & metadata = std::nullopt) const;
 
     void removeFilesBatch(const Strings & file_names);
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index abc28299f96..3732af0d4f3 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -309,6 +309,8 @@ The server successfully detected this situation and will download merged part fr
     M(S3CopyObject, "Number of S3 API CopyObject calls.") \
     M(S3ListObjects, "Number of S3 API ListObjects calls.") \
     M(S3HeadObject,  "Number of S3 API HeadObject calls.") \
+    M(S3GetObjectAttributes, "Number of S3 API GetObjectAttributes calls.") \
+    M(S3GetObjectMetadata, "Number of S3 API GetObject calls for getting metadata.") \
     M(S3CreateMultipartUpload, "Number of S3 API CreateMultipartUpload calls.") \
     M(S3UploadPartCopy, "Number of S3 API UploadPartCopy calls.") \
     M(S3UploadPart, "Number of S3 API UploadPart calls.") \
@@ -321,6 +323,8 @@ The server successfully detected this situation and will download merged part fr
     M(DiskS3CopyObject, "Number of DiskS3 API CopyObject calls.") \
     M(DiskS3ListObjects, "Number of DiskS3 API ListObjects calls.") \
     M(DiskS3HeadObject,  "Number of DiskS3 API HeadObject calls.") \
+    M(DiskS3GetObjectAttributes, "Number of DiskS3 API GetObjectAttributes calls.") \
+    M(DiskS3GetObjectMetadata, "Number of DiskS3 API GetObject calls for getting metadata.") \
     M(DiskS3CreateMultipartUpload, "Number of DiskS3 API CreateMultipartUpload calls.") \
     M(DiskS3UploadPartCopy, "Number of DiskS3 API UploadPartCopy calls.") \
     M(DiskS3UploadPart, "Number of DiskS3 API UploadPart calls.") \
@@ -449,7 +453,8 @@ The server successfully detected this situation and will download merged part fr
     M(OverflowBreak, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'break' and the result is incomplete.") \
     M(OverflowThrow, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'throw' and exception was thrown.") \
     M(OverflowAny, "Number of times approximate GROUP BY was in effect: when aggregation was performed only on top of first 'max_rows_to_group_by' unique keys and other keys were ignored due to 'group_by_overflow_mode' = 'any'.") \
-
+    \
+    M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\
 
 namespace ProfileEvents
 {
diff --git a/src/Common/logger_useful.h b/src/Common/logger_useful.h
index 1e84efd8085..e83245c0fa1 100644
--- a/src/Common/logger_useful.h
+++ b/src/Common/logger_useful.h
@@ -7,6 +7,29 @@
 #include <Poco/Message.h>
 #include <Common/CurrentThread.h>
 
+/// This wrapper is useful to save formatted message into a String before sending it to a logger
+class LogToStrImpl
+{
+    String & out_str;
+    Poco::Logger * logger;
+    bool propagate_to_actual_log = true;
+public:
+    LogToStrImpl(String & out_str_, Poco::Logger * logger_) : out_str(out_str_) , logger(logger_) {}
+    LogToStrImpl & operator -> () { return *this; }
+    bool is(Poco::Message::Priority priority) { propagate_to_actual_log &= logger->is(priority); return true; }
+    LogToStrImpl * getChannel() {return this; }
+    const String & name() const { return logger->name(); }
+    void log(const Poco::Message & message)
+    {
+        out_str = message.getText();
+        if (!propagate_to_actual_log)
+            return;
+        if (auto * channel = logger->getChannel())
+            channel->log(message);
+    }
+};
+
+#define LogToStr(x, y) std::make_unique<LogToStrImpl>(x, y)
 
 namespace
 {
@@ -17,8 +40,37 @@ namespace
 
     [[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; };
     [[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); };
+    [[maybe_unused]] std::unique_ptr<LogToStrImpl> getLogger(std::unique_ptr<LogToStrImpl> && logger) { return logger; };
+
+    template<typename T> struct is_fmt_runtime : std::false_type {};
+    template<typename T> struct is_fmt_runtime<fmt::basic_runtime<T>> : std::true_type {};
+
+    /// Usually we use LOG_*(...) macros with either string literals or fmt::runtime(whatever) as a format string.
+    /// This function is useful to get a string_view to a static format string passed to LOG_* macro.
+    template <typename T> constexpr std::string_view tryGetStaticFormatString(T && x)
+    {
+        if constexpr (is_fmt_runtime<T>::value)
+        {
+            /// It definitely was fmt::runtime(something).
+            /// We are not sure about a lifetime of the string, so return empty view.
+            /// Also it can be arbitrary string, not a formatting pattern.
+            /// So returning empty pattern will not pollute the set of patterns.
+            return std::string_view();
+        }
+        else
+        {
+            /// Most likely it was a string literal.
+            /// Unfortunately, there's no good way to check if something is a string literal.
+            /// But fmtlib requires a format string to be compile-time constant unless fmt::runtime is used.
+            static_assert(std::is_nothrow_convertible<T, const char * const>::value);
+            static_assert(!std::is_pointer<T>::value);
+            return std::string_view(x);
+        }
+    }
 }
 
+#define LOG_IMPL_FIRST_ARG(X, ...) X
+
 /// Logs a message to a specified logger with that level.
 /// If more than one argument is provided,
 ///  the first argument is interpreted as template with {}-substitutions
@@ -30,7 +82,7 @@ namespace
     auto _logger = ::getLogger(logger);                                           \
     const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) &&    \
         (DB::CurrentThread::getGroup()->client_logs_level >= (priority));         \
-    if (_logger->is((PRIORITY)) || _is_clients_log)                               \
+    if (_is_clients_log || _logger->is((PRIORITY)))                               \
     {                                                                             \
         std::string formatted_message = numArgs(__VA_ARGS__) > 1 ? fmt::format(__VA_ARGS__) : firstArg(__VA_ARGS__); \
         if (auto _channel = _logger->getChannel())                                \
@@ -40,7 +92,7 @@ namespace
             file_function += "; ";                                                \
             file_function += __PRETTY_FUNCTION__;                                 \
             Poco::Message poco_message(_logger->name(), formatted_message,        \
-                                 (PRIORITY), file_function.c_str(), __LINE__);    \
+                (PRIORITY), file_function.c_str(), __LINE__, tryGetStaticFormatString(LOG_IMPL_FIRST_ARG(__VA_ARGS__)));    \
             _channel->log(poco_message);                                          \
         }                                                                         \
     }                                                                             \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index b8d46244b6c..f58bd7ebafb 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -773,6 +773,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
     M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \
     M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \
+    M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \
+    M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
     M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
     M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
     M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index d67f1b94d5d..534fcd42037 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -80,7 +80,8 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
-    {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}}},
+    {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"},
+              {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}}},
     {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"},
                {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
                {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp
index 0ed2b034985..50d956584b9 100644
--- a/src/DataTypes/Serializations/SerializationTuple.cpp
+++ b/src/DataTypes/Serializations/SerializationTuple.cpp
@@ -16,6 +16,7 @@ namespace ErrorCodes
 {
     extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
     extern const int NOT_FOUND_COLUMN_IN_BLOCK;
+    extern const int INCORRECT_DATA;
 }
 
 
@@ -154,7 +155,7 @@ void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, co
 
 void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    if (settings.json.named_tuples_as_objects
+    if (settings.json.write_named_tuples_as_objects
         && have_explicit_names)
     {
         writeChar('{', ostr);
@@ -185,7 +186,7 @@ void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_nu
 
 void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    if (settings.json.named_tuples_as_objects
+    if (settings.json.read_named_tuples_as_objects
         && have_explicit_names)
     {
         skipWhitespaceIfAny(istr);
@@ -194,12 +195,15 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
 
         addElementSafe(elems.size(), column, [&]
         {
-            // Require all elements but in arbitrary order.
-            for (size_t i = 0; i < elems.size(); ++i)
+            std::vector<UInt8> seen_elements(elems.size(), 0);
+            size_t i = 0;
+            while (!istr.eof() && *istr.position() != '}')
             {
+                if (i == elems.size())
+                    throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected number of elements in named tuple. Expected no more than {}", elems.size());
+
                 if (i > 0)
                 {
-                    skipWhitespaceIfAny(istr);
                     assertChar(',', istr);
                     skipWhitespaceIfAny(istr);
                 }
@@ -211,12 +215,35 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
                 skipWhitespaceIfAny(istr);
 
                 const size_t element_pos = getPositionByName(name);
+                seen_elements[element_pos] = 1;
                 auto & element_column = extractElementColumn(column, element_pos);
                 elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
+
+                skipWhitespaceIfAny(istr);
+                ++i;
             }
 
-            skipWhitespaceIfAny(istr);
             assertChar('}', istr);
+
+            /// Check if we have missing elements.
+            if (i != elems.size())
+            {
+                for (size_t element_pos = 0; element_pos != seen_elements.size(); ++element_pos)
+                {
+                    if (seen_elements[element_pos])
+                        continue;
+
+                    if (!settings.json.defaults_for_missing_elements_in_named_tuple)
+                        throw Exception(
+                            ErrorCodes::INCORRECT_DATA,
+                            "JSON object doesn't contain tuple element {}. If you want to insert defaults in case of missing elements, "
+                            "enable setting input_format_json_defaults_for_missing_elements_in_named_tuple",
+                            elems[element_pos]->getElementName());
+
+                    auto & element_column = extractElementColumn(column, element_pos);
+                    element_column.insertDefault();
+                }
+            }
         });
     }
     else
diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp
index f0137e5bd60..ffe84f6fb77 100644
--- a/src/Databases/DDLDependencyVisitor.cpp
+++ b/src/Databases/DDLDependencyVisitor.cpp
@@ -2,6 +2,7 @@
 #include <Dictionaries/getDictionaryConfigurationFromAST.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/misc.h>
 #include <Interpreters/InDepthNodeVisitor.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/getClusterName.h>
@@ -175,7 +176,7 @@ namespace
         /// Finds dependencies of a function.
         void visitFunction(const ASTFunction & function)
         {
-            if (function.name == "joinGet" || function.name == "dictHas" || function.name == "dictIsIn" || function.name.starts_with("dictGet"))
+            if (functionIsJoinGet(function.name) || functionIsDictGet(function.name))
             {
                 /// dictGet('dict_name', attr_names, id_expr)
                 /// dictHas('dict_name', id_expr)
diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp
index 8536d1c890d..3a61f821629 100644
--- a/src/Databases/DDLLoadingDependencyVisitor.cpp
+++ b/src/Databases/DDLLoadingDependencyVisitor.cpp
@@ -1,6 +1,7 @@
 #include <Databases/DDLLoadingDependencyVisitor.h>
 #include <Dictionaries/getDictionaryConfigurationFromAST.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/misc.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
@@ -52,23 +53,41 @@ bool DDLMatcherBase::needChildVisit(const ASTPtr & node, const ASTPtr & child)
     return true;
 }
 
-ssize_t DDLMatcherBase::getPositionOfTableNameArgument(const ASTFunction & function)
+ssize_t DDLMatcherBase::getPositionOfTableNameArgumentToEvaluate(const ASTFunction & function)
 {
-    if (function.name == "joinGet" ||
-        function.name == "dictHas" ||
-        function.name == "dictIsIn" ||
-        function.name.starts_with("dictGet"))
+    if (functionIsJoinGet(function.name) || functionIsDictGet(function.name))
         return 0;
 
-    if (Poco::toLower(function.name) == "in")
+    return -1;
+}
+
+ssize_t DDLMatcherBase::getPositionOfTableNameArgumentToVisit(const ASTFunction & function)
+{
+    ssize_t maybe_res = getPositionOfTableNameArgumentToEvaluate(function);
+    if (0 <= maybe_res)
+        return maybe_res;
+
+    if (functionIsInOrGlobalInOperator(function.name))
+    {
+        if (function.children.empty())
+            return -1;
+
+        const auto * args = function.children[0]->as<ASTExpressionList>();
+        if (!args || args->children.size() != 2)
+            return -1;
+
+        if (args->children[1]->as<ASTFunction>())
+            return -1;
+
         return 1;
+    }
 
     return -1;
 }
 
 void DDLLoadingDependencyVisitor::visit(const ASTFunction & function, Data & data)
 {
-    ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function);
+    ssize_t table_name_arg_idx = getPositionOfTableNameArgumentToVisit(function);
     if (table_name_arg_idx < 0)
         return;
     extractTableNameFromArgument(function, data, table_name_arg_idx);
diff --git a/src/Databases/DDLLoadingDependencyVisitor.h b/src/Databases/DDLLoadingDependencyVisitor.h
index f987e885266..f173517f852 100644
--- a/src/Databases/DDLLoadingDependencyVisitor.h
+++ b/src/Databases/DDLLoadingDependencyVisitor.h
@@ -23,7 +23,8 @@ class DDLMatcherBase
 {
 public:
     static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
-    static ssize_t getPositionOfTableNameArgument(const ASTFunction & function);
+    static ssize_t getPositionOfTableNameArgumentToVisit(const ASTFunction & function);
+    static ssize_t getPositionOfTableNameArgumentToEvaluate(const ASTFunction & function);
 };
 
 /// Visits ASTCreateQuery and extracts the names of all tables which should be loaded before a specified table.
diff --git a/src/Databases/NormalizeAndEvaluateConstantsVisitor.cpp b/src/Databases/NormalizeAndEvaluateConstantsVisitor.cpp
index d9e494e7c9a..c4d1e8bda8c 100644
--- a/src/Databases/NormalizeAndEvaluateConstantsVisitor.cpp
+++ b/src/Databases/NormalizeAndEvaluateConstantsVisitor.cpp
@@ -23,7 +23,7 @@ void NormalizeAndEvaluateConstants::visit(const ASTFunction & function, Data & d
 {
     /// Replace expressions like "dictGet(currentDatabase() || '.dict', 'value', toUInt32(1))"
     /// with "dictGet('db_name.dict', 'value', toUInt32(1))"
-    ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function);
+    ssize_t table_name_arg_idx = getPositionOfTableNameArgumentToEvaluate(function);
     if (table_name_arg_idx < 0)
         return;
 
diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
index ad1d690f4a9..711f1553ce6 100644
--- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
+++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
@@ -171,8 +171,9 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl()
     if (!hasPendingDataToRead())
         return false;
 
-    size_t size, offset;
+    chassert(file_offset_of_buffer_end <= impl->getFileSize());
 
+    size_t size, offset;
     if (prefetch_future.valid())
     {
         ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::AsynchronousRemoteReadWaitMicroseconds);
@@ -210,8 +211,8 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl()
     /// In case of multiple files for the same file in clickhouse (i.e. log family)
     /// file_offset_of_buffer_end will not match getImplementationBufferOffset()
     /// so we use [impl->getImplementationBufferOffset(), impl->getFileSize()]
-    assert(file_offset_of_buffer_end >= impl->getImplementationBufferOffset());
-    assert(file_offset_of_buffer_end <= impl->getFileSize());
+    chassert(file_offset_of_buffer_end >= impl->getImplementationBufferOffset());
+    chassert(file_offset_of_buffer_end <= impl->getFileSize());
 
     return bytes_read;
 }
@@ -277,6 +278,15 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset, int whence)
     /// First reset the buffer so the next read will fetch new data to the buffer.
     resetWorkingBuffer();
 
+    if (read_until_position && new_pos > *read_until_position)
+    {
+        ProfileEvents::increment(ProfileEvents::RemoteFSSeeksWithReset);
+        impl->reset();
+
+        file_offset_of_buffer_end = new_pos = *read_until_position; /// read_until_position is a non-included boundary.
+        return new_pos;
+    }
+
     /**
     * Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer.
     * Note: we read in range [file_offset_of_buffer_end, read_until_position).
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
index 01d4154199a..712a9a7c3b1 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@@ -256,7 +256,7 @@ size_t ReadBufferFromRemoteFSGather::getFileSize() const
 String ReadBufferFromRemoteFSGather::getInfoForLog()
 {
     if (!current_buf)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get info: buffer not initialized");
+        return "";
 
     return current_buf->getInfoForLog();
 }
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 3c620ca819e..a56a78d6722 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -125,14 +125,19 @@ std::string S3ObjectStorage::generateBlobNameForPath(const std::string & /* path
         getRandomASCIIString(key_name_total_size - key_name_prefix_size));
 }
 
-Aws::S3::Model::HeadObjectOutcome S3ObjectStorage::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const
+size_t S3ObjectStorage::getObjectSize(const std::string & bucket_from, const std::string & key) const
 {
-    return S3::headObject(*client.get(), bucket_from, key, "", true);
+    return S3::getObjectSize(*client.get(), bucket_from, key, {}, /* for_disk_s3= */ true);
 }
 
 bool S3ObjectStorage::exists(const StoredObject & object) const
 {
-    return S3::objectExists(*client.get(), bucket, object.absolute_path, "", true);
+    return S3::objectExists(*client.get(), bucket, object.absolute_path, {}, /* for_disk_s3= */ true);
+}
+
+void S3ObjectStorage::checkObjectExists(const std::string & bucket_from, const std::string & key, std::string_view description) const
+{
+    return S3::checkObjectExists(*client.get(), bucket_from, key, {}, /* for_disk_s3= */ true, description);
 }
 
 std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
@@ -409,13 +414,10 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons
 {
     ObjectMetadata result;
 
-    auto object_head = requestObjectHeadData(bucket, path);
-    throwIfError(object_head);
-
-    auto & object_head_result = object_head.GetResult();
-    result.size_bytes = object_head_result.GetContentLength();
-    result.last_modified = object_head_result.GetLastModified().Millis();
-    result.attributes = object_head_result.GetMetadata();
+    auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, /* for_disk_s3= */ true);
+    result.size_bytes = object_info.size;
+    result.last_modified = object_info.last_modification_time;
+    result.attributes = S3::getObjectMetadata(*client.get(), bucket, path, {}, /* for_disk_s3= */ true);
 
     return result;
 }
@@ -442,7 +444,7 @@ void S3ObjectStorage::copyObjectImpl(
     const String & src_key,
     const String & dst_bucket,
     const String & dst_key,
-    std::optional<Aws::S3::Model::HeadObjectResult> head,
+    size_t size,
     std::optional<ObjectAttributes> metadata) const
 {
     auto client_ptr = client.get();
@@ -464,7 +466,7 @@ void S3ObjectStorage::copyObjectImpl(
     if (!outcome.IsSuccess() && (outcome.GetError().GetExceptionName() == "EntityTooLarge"
             || outcome.GetError().GetExceptionName() == "InvalidRequest"))
     { // Can't come here with MinIO, MinIO allows single part upload for large objects.
-        copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata);
+        copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, size, metadata);
         return;
     }
 
@@ -472,12 +474,7 @@ void S3ObjectStorage::copyObjectImpl(
 
     auto settings_ptr = s3_settings.get();
     if (settings_ptr->request_settings.check_objects_after_upload)
-    {
-        auto object_head = requestObjectHeadData(dst_bucket, dst_key);
-        if (!object_head.IsSuccess())
-            throw Exception(ErrorCodes::S3_ERROR, "Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", dst_key, dst_bucket);
-    }
-
+        checkObjectExists(dst_bucket, dst_key, "Immediately after upload");
 }
 
 void S3ObjectStorage::copyObjectMultipartImpl(
@@ -485,15 +482,11 @@ void S3ObjectStorage::copyObjectMultipartImpl(
     const String & src_key,
     const String & dst_bucket,
     const String & dst_key,
-    std::optional<Aws::S3::Model::HeadObjectResult> head,
+    size_t size,
     std::optional<ObjectAttributes> metadata) const
 {
-    if (!head)
-        head = requestObjectHeadData(src_bucket, src_key).GetResult();
-
     auto settings_ptr = s3_settings.get();
     auto client_ptr = client.get();
-    size_t size = head->GetContentLength();
 
     String multipart_upload_id;
 
@@ -569,29 +562,24 @@ void S3ObjectStorage::copyObjectMultipartImpl(
     }
 
     if (settings_ptr->request_settings.check_objects_after_upload)
-    {
-        auto object_head = requestObjectHeadData(dst_bucket, dst_key);
-        if (!object_head.IsSuccess())
-            throw Exception(ErrorCodes::S3_ERROR, "Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", dst_key, dst_bucket);
-    }
-
+        checkObjectExists(dst_bucket, dst_key, "Immediately after upload");
 }
 
 void S3ObjectStorage::copyObject( // NOLINT
     const StoredObject & object_from, const StoredObject & object_to, std::optional<ObjectAttributes> object_to_attributes)
 {
-    auto head = requestObjectHeadData(bucket, object_from.absolute_path).GetResult();
+    auto size = getObjectSize(bucket, object_from.absolute_path);
     static constexpr int64_t multipart_upload_threashold = 5UL * 1024 * 1024 * 1024;
 
-    if (head.GetContentLength() >= multipart_upload_threashold)
+    if (size >= multipart_upload_threashold)
     {
         copyObjectMultipartImpl(
-            bucket, object_from.absolute_path, bucket, object_to.absolute_path, head, object_to_attributes);
+            bucket, object_from.absolute_path, bucket, object_to.absolute_path, size, object_to_attributes);
     }
     else
     {
         copyObjectImpl(
-            bucket, object_from.absolute_path, bucket, object_to.absolute_path, head, object_to_attributes);
+            bucket, object_from.absolute_path, bucket, object_to.absolute_path, size, object_to_attributes);
     }
 }
 
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index 0a07639e253..a6318bf59b8 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -172,7 +172,7 @@ private:
         const String & src_key,
         const String & dst_bucket,
         const String & dst_key,
-        std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt,
+        size_t size,
         std::optional<ObjectAttributes> metadata = std::nullopt) const;
 
     void copyObjectMultipartImpl(
@@ -180,13 +180,14 @@ private:
         const String & src_key,
         const String & dst_bucket,
         const String & dst_key,
-        std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt,
+        size_t size,
         std::optional<ObjectAttributes> metadata = std::nullopt) const;
 
     void removeObjectImpl(const StoredObject & object, bool if_exists);
     void removeObjectsImpl(const StoredObjects & objects, bool if_exists);
 
-    Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const;
+    size_t getObjectSize(const std::string & bucket_from, const std::string & key) const;
+    void checkObjectExists(const std::string & bucket_from, const std::string & key, std::string_view description) const;
 
     std::string bucket;
 
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index dc2f4ffcf55..ed2464f98e8 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -90,7 +90,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
     format_settings.json.array_of_rows = settings.output_format_json_array_of_rows;
     format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
-    format_settings.json.named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
+    format_settings.json.write_named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
+    format_settings.json.read_named_tuples_as_objects = settings.input_format_json_named_tuples_as_objects;
+    format_settings.json.defaults_for_missing_elements_in_named_tuple = settings.input_format_json_defaults_for_missing_elements_in_named_tuple;
     format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
     format_settings.json.quote_64bit_floats = settings.output_format_json_quote_64bit_floats;
     format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index dcdd44edfeb..9d8680a009d 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -153,7 +153,9 @@ struct FormatSettings
         bool quote_denormals = true;
         bool quote_decimals = false;
         bool escape_forward_slashes = true;
-        bool named_tuples_as_objects = false;
+        bool read_named_tuples_as_objects = false;
+        bool write_named_tuples_as_objects = false;
+        bool defaults_for_missing_elements_in_named_tuple = false;
         bool serialize_as_strings = false;
         bool read_bools_as_numbers = true;
         bool read_numbers_as_strings = true;
diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h
index edb0df3ae34..3622db234b5 100644
--- a/src/Functions/MatchImpl.h
+++ b/src/Functions/MatchImpl.h
@@ -118,6 +118,16 @@ struct MatchImpl
         if (haystack_offsets.empty())
             return;
 
+        /// Shortcut for the silly but practical case that the pattern matches everything/nothing independently of the haystack:
+        /// - col [not] [i]like '%' / '%%'
+        /// - match(col, '.*')
+        if ((is_like && (needle == "%" or needle == "%%")) || (!is_like && (needle == ".*" || needle == ".*?")))
+        {
+            for (auto & x : res)
+                x = !negate;
+            return;
+        }
+
         /// Special case that the [I]LIKE expression reduces to finding a substring in a string
         String strstr_pattern;
         if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern))
@@ -267,6 +277,16 @@ struct MatchImpl
         if (haystack.empty())
             return;
 
+        /// Shortcut for the silly but practical case that the pattern matches everything/nothing independently of the haystack:
+        /// - col [not] [i]like '%' / '%%'
+        /// - match(col, '.*')
+        if ((is_like && (needle == "%" or needle == "%%")) || (!is_like && (needle == ".*" || needle == ".*?")))
+        {
+            for (auto & x : res)
+                x = !negate;
+            return;
+        }
+
         /// Special case that the [I]LIKE expression reduces to finding a substring in a string
         String strstr_pattern;
         if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern))
diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index 2d274435a74..69d2a244097 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -250,7 +250,7 @@ size_t ReadBufferFromS3::getFileSize()
     if (file_size)
         return *file_size;
 
-    auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id, true, read_settings.for_object_storage);
+    auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id, /* for_disk_s3= */ read_settings.for_object_storage);
 
     file_size = object_size;
     return *file_size;
diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp
index 0c3a63b46ea..a18fcf70566 100644
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@@ -27,6 +27,8 @@
 #    include <aws/core/utils/UUID.h>
 #    include <aws/core/http/HttpClientFactory.h>
 #    include <aws/s3/S3Client.h>
+#    include <aws/s3/model/GetObjectAttributesRequest.h>
+#    include <aws/s3/model/GetObjectRequest.h>
 #    include <aws/s3/model/HeadObjectRequest.h>
 
 #    include <IO/S3/PocoHTTPClientFactory.h>
@@ -40,7 +42,11 @@
 
 namespace ProfileEvents
 {
+    extern const Event S3GetObjectAttributes;
+    extern const Event S3GetObjectMetadata;
     extern const Event S3HeadObject;
+    extern const Event DiskS3GetObjectAttributes;
+    extern const Event DiskS3GetObjectMetadata;
     extern const Event DiskS3HeadObject;
 }
 
@@ -699,6 +705,92 @@ public:
     }
 };
 
+/// Extracts the endpoint from a constructed S3 client.
+String getEndpoint(const Aws::S3::S3Client & client)
+{
+    const auto * endpoint_provider = dynamic_cast<const Aws::S3::Endpoint::S3DefaultEpProviderBase *>(const_cast<Aws::S3::S3Client &>(client).accessEndpointProvider().get());
+    if (!endpoint_provider)
+        return {};
+    String endpoint;
+    endpoint_provider->GetBuiltInParameters().GetParameter("Endpoint").GetString(endpoint);
+    return endpoint;
+}
+
+/// Performs a request to get the size and last modification time of an object.
+/// The function performs either HeadObject or GetObjectAttributes request depending on the endpoint.
+std::pair<std::optional<DB::S3::ObjectInfo>, Aws::S3::S3Error> tryGetObjectInfo(
+    const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3)
+{
+    auto endpoint = getEndpoint(client);
+    bool use_get_object_attributes_request = (endpoint.find(".amazonaws.com") != String::npos);
+
+    if (use_get_object_attributes_request)
+    {
+        /// It's better not to use `HeadObject` requests for AWS S3 because they don't work well with the global region.
+        /// Details: `HeadObject` request never returns a response body (even if there is an error) however
+        /// if the request was sent without specifying a region in the endpoint (i.e. for example "https://test.s3.amazonaws.com/mydata.csv"
+        /// instead of "https://test.s3-us-west-2.amazonaws.com/mydata.csv") then that response body is one of the main ways
+        /// to determine the correct region and try to repeat the request again with the correct region.
+        /// For any other request type (`GetObject`, `ListObjects`, etc.) AWS SDK does that because they have response bodies,
+        /// but for `HeadObject` there is no response body so this way doesn't work. That's why we use `GetObjectAttributes` request instead.
+        /// See https://github.com/aws/aws-sdk-cpp/issues/1558 and also the function S3ErrorMarshaller::ExtractRegion() for more information.
+
+        ProfileEvents::increment(ProfileEvents::S3GetObjectAttributes);
+        if (for_disk_s3)
+            ProfileEvents::increment(ProfileEvents::DiskS3GetObjectAttributes);
+
+        Aws::S3::Model::GetObjectAttributesRequest req;
+        req.SetBucket(bucket);
+        req.SetKey(key);
+
+        if (!version_id.empty())
+            req.SetVersionId(version_id);
+
+        req.SetObjectAttributes({Aws::S3::Model::ObjectAttributes::ObjectSize});
+
+        auto outcome = client.GetObjectAttributes(req);
+        if (outcome.IsSuccess())
+        {
+            const auto & result = outcome.GetResult();
+            DB::S3::ObjectInfo object_info;
+            object_info.size = static_cast<size_t>(result.GetObjectSize());
+            object_info.last_modification_time = result.GetLastModified().Millis() / 1000;
+            return {object_info, {}};
+        }
+
+        return {std::nullopt, outcome.GetError()};
+    }
+    else
+    {
+        /// By default we use `HeadObject` requests.
+        /// We cannot just use `GetObjectAttributes` requests always because some S3 providers (e.g. Minio)
+        /// don't support `GetObjectAttributes` requests.
+
+        ProfileEvents::increment(ProfileEvents::S3HeadObject);
+        if (for_disk_s3)
+            ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
+
+        Aws::S3::Model::HeadObjectRequest req;
+        req.SetBucket(bucket);
+        req.SetKey(key);
+
+        if (!version_id.empty())
+            req.SetVersionId(version_id);
+
+        auto outcome = client.HeadObject(req);
+        if (outcome.IsSuccess())
+        {
+            const auto & result = outcome.GetResult();
+            DB::S3::ObjectInfo object_info;
+            object_info.size = static_cast<size_t>(result.GetContentLength());
+            object_info.last_modification_time = result.GetLastModified().Millis() / 1000;
+            return {object_info, {}};
+        }
+
+        return {std::nullopt, outcome.GetError()};
+    }
+}
+
 }
 
 
@@ -894,54 +986,33 @@ namespace S3
         return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND || error == Aws::S3::S3Errors::NO_SUCH_KEY;
     }
 
-    Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3)
+    ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error)
     {
-        ProfileEvents::increment(ProfileEvents::S3HeadObject);
-        if (for_disk_s3)
-            ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
-
-        Aws::S3::Model::HeadObjectRequest req;
-        req.SetBucket(bucket);
-        req.SetKey(key);
-
-        if (!version_id.empty())
-            req.SetVersionId(version_id);
-
-        return client.HeadObject(req);
-    }
-
-    S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
-    {
-        auto outcome = headObject(client, bucket, key, version_id, for_disk_s3);
-
-        if (outcome.IsSuccess())
+        auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3);
+        if (object_info)
         {
-            auto read_result = outcome.GetResultWithOwnership();
-            return {.size = static_cast<size_t>(read_result.GetContentLength()), .last_modification_time = read_result.GetLastModified().Millis() / 1000};
+            return *object_info;
         }
         else if (throw_on_error)
         {
-            const auto & error = outcome.GetError();
             throw DB::Exception(ErrorCodes::S3_ERROR,
-                "Failed to HEAD object: {}. HTTP response code: {}",
+                "Failed to get object attributes: {}. HTTP response code: {}",
                 error.GetMessage(), static_cast<size_t>(error.GetResponseCode()));
         }
         return {};
     }
 
-    size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
+    size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error)
     {
-        return getObjectInfo(client, bucket, key, version_id, throw_on_error, for_disk_s3).size;
+        return getObjectInfo(client, bucket, key, version_id, for_disk_s3, throw_on_error).size;
     }
 
     bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3)
     {
-        auto outcome = headObject(client, bucket, key, version_id, for_disk_s3);
-
-        if (outcome.IsSuccess())
+        auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3);
+        if (object_info)
             return true;
 
-        const auto & error = outcome.GetError();
         if (isNotFoundError(error.GetErrorType()))
             return false;
 
@@ -949,6 +1020,48 @@ namespace S3
             "Failed to check existence of key {} in bucket {}: {}",
             key, bucket, error.GetMessage());
     }
+
+    void checkObjectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, std::string_view description)
+    {
+        auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3);
+        if (object_info)
+            return;
+        throw S3Exception(error.GetErrorType(), "{}Object {} in bucket {} suddenly disappeared: {}",
+                          (description.empty() ? "" : (String(description) + ": ")), key, bucket, error.GetMessage());
+    }
+
+    std::map<String, String> getObjectMetadata(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error)
+    {
+        ProfileEvents::increment(ProfileEvents::S3GetObjectMetadata);
+        if (for_disk_s3)
+            ProfileEvents::increment(ProfileEvents::DiskS3GetObjectMetadata);
+
+        /// We must not use the `HeadObject` request, see the comment about `HeadObjectRequest` in S3Common.h.
+
+        Aws::S3::Model::GetObjectRequest req;
+        req.SetBucket(bucket);
+        req.SetKey(key);
+
+        /// Only the first byte will be read.
+        /// We don't need that first byte but the range should be set otherwise the entire object will be read.
+        req.SetRange("bytes=0-0");
+
+        if (!version_id.empty())
+            req.SetVersionId(version_id);
+
+        auto outcome = client.GetObject(req);
+
+        if (outcome.IsSuccess())
+            return outcome.GetResult().GetMetadata();
+
+        if (!throw_on_error)
+            return {};
+
+        const auto & error = outcome.GetError();
+        throw S3Exception(error.GetErrorType(),
+            "Failed to get metadata of key {} in bucket {}: {}",
+            key, bucket, error.GetMessage());
+    }
 }
 
 }
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index f0844c05abc..69ae1cbb4f4 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -11,15 +11,15 @@
 #if USE_AWS_S3
 
 #include <base/types.h>
-#include <aws/core/Aws.h>
-#include <aws/core/client/ClientConfiguration.h>
-#include <aws/s3/S3Client.h>
-#include <aws/s3/S3Errors.h>
-#include <Poco/URI.h>
-
 #include <Common/Exception.h>
 #include <Common/Throttler_fwd.h>
 
+#include <Poco/URI.h>
+#include <aws/core/Aws.h>
+#include <aws/s3/S3Errors.h>
+
+
+namespace Aws::S3 { class S3Client; }
 
 namespace DB
 {
@@ -121,22 +121,29 @@ struct URI
     static void validateBucket(const String & bucket, const Poco::URI & uri);
 };
 
+/// WARNING: Don't use `HeadObjectRequest`! Use the functions below instead.
+/// For explanation see the comment about `HeadObject` request in the function tryGetObjectInfo().
+
 struct ObjectInfo
 {
     size_t size = 0;
     time_t last_modification_time = 0;
 };
 
-bool isNotFoundError(Aws::S3::S3Errors error);
+ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true);
 
-Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false);
-
-S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
-
-size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
+size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true);
 
 bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false);
 
+/// Throws an exception if a specified object doesn't exist. `description` is used as a part of the error message.
+void checkObjectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, std::string_view description = {});
+
+bool isNotFoundError(Aws::S3::S3Errors error);
+
+/// Returns the object's metadata.
+std::map<String, String> getObjectMetadata(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true);
+
 }
 #endif
 
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index ec77fc44de6..7d279c07e03 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -182,12 +182,8 @@ void WriteBufferFromS3::finalizeImpl()
     if (check_objects_after_upload)
     {
         LOG_TRACE(log, "Checking object {} exists after upload", key);
-
-        auto response = S3::headObject(*client_ptr, bucket, key, "", write_settings.for_object_storage);
-        if (!response.IsSuccess())
-            throw S3Exception(fmt::format("Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", key, bucket), response.GetError().GetErrorType());
-        else
-            LOG_TRACE(log, "Object {} exists after upload", key);
+        S3::checkObjectExists(*client_ptr, bucket, key, {}, /* for_disk_s3= */ write_settings.for_object_storage, "Immediately after upload");
+        LOG_TRACE(log, "Object {} exists after upload", key);
     }
 }
 
diff --git a/src/Interpreters/DatabaseAndTableWithAlias.cpp b/src/Interpreters/DatabaseAndTableWithAlias.cpp
index 70825ea8292..7fb581c1b4d 100644
--- a/src/Interpreters/DatabaseAndTableWithAlias.cpp
+++ b/src/Interpreters/DatabaseAndTableWithAlias.cpp
@@ -28,13 +28,29 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableIdentifier &
         database = current_database;
 }
 
+DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database)
+{
+    alias = identifier.tryGetAlias();
+
+    if (identifier.name_parts.size() == 2)
+        std::tie(database, table) = std::tie(identifier.name_parts[0], identifier.name_parts[1]);
+    else if (identifier.name_parts.size() == 1)
+        table = identifier.name_parts[0];
+    else
+        throw Exception("Logical error: invalid identifier", ErrorCodes::LOGICAL_ERROR);
+
+    if (database.empty())
+        database = current_database;
+}
+
 DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTPtr & node, const String & current_database)
 {
-    const auto * identifier = node->as<ASTTableIdentifier>();
-    if (!identifier)
-        throw Exception("Logical error: table identifier expected", ErrorCodes::LOGICAL_ERROR);
-
-    *this = DatabaseAndTableWithAlias(*identifier, current_database);
+    if (const auto * table_identifier = node->as<ASTTableIdentifier>())
+        *this = DatabaseAndTableWithAlias(*table_identifier, current_database);
+    else if (const auto * identifier = node->as<ASTIdentifier>())
+        *this = DatabaseAndTableWithAlias(*identifier, current_database);
+    else
+        throw Exception("Logical error: identifier or table identifier expected", ErrorCodes::LOGICAL_ERROR);
 }
 
 DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database)
diff --git a/src/Interpreters/DatabaseAndTableWithAlias.h b/src/Interpreters/DatabaseAndTableWithAlias.h
index 237c56d3ce3..58327ff1d81 100644
--- a/src/Interpreters/DatabaseAndTableWithAlias.h
+++ b/src/Interpreters/DatabaseAndTableWithAlias.h
@@ -14,6 +14,7 @@ namespace DB
 {
 
 class ASTSelectQuery;
+class ASTIdentifier;
 class ASTTableIdentifier;
 struct ASTTableExpression;
 
@@ -28,6 +29,7 @@ struct DatabaseAndTableWithAlias
 
     DatabaseAndTableWithAlias() = default;
     explicit DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = "");
+    explicit DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = "");
     explicit DatabaseAndTableWithAlias(const ASTTableIdentifier & identifier, const String & current_database = "");
     explicit DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database = "");
 
diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.cpp b/src/Interpreters/GatherFunctionQuantileVisitor.cpp
index 2abd7af1455..805fcfec181 100644
--- a/src/Interpreters/GatherFunctionQuantileVisitor.cpp
+++ b/src/Interpreters/GatherFunctionQuantileVisitor.cpp
@@ -25,6 +25,7 @@ static const std::unordered_map<String, String> quantile_fuse_name_mapping = {
     {NameQuantileExactInclusive::name, NameQuantilesExactInclusive::name},
     {NameQuantileExactLow::name, NameQuantilesExactLow::name},
     {NameQuantileExactWeighted::name, NameQuantilesExactWeighted::name},
+    {NameQuantileInterpolatedWeighted::name, NameQuantilesInterpolatedWeighted::name},
     {NameQuantileTDigest::name, NameQuantilesTDigest::name},
     {NameQuantileTDigestWeighted::name, NameQuantilesTDigestWeighted::name},
     {NameQuantileTiming::name, NameQuantilesTiming::name},
@@ -61,9 +62,11 @@ void GatherFunctionQuantileData::FuseQuantileAggregatesData::addFuncNode(ASTPtr
 
     const auto & arguments = func->arguments->children;
 
+
     bool need_two_args = func->name == NameQuantileDeterministic::name || func->name == NameQuantileExactWeighted::name
-        || func->name == NameQuantileTimingWeighted::name || func->name == NameQuantileTDigestWeighted::name
-        || func->name == NameQuantileBFloat16Weighted::name;
+        || func->name == NameQuantileInterpolatedWeighted::name || func->name == NameQuantileTimingWeighted::name
+        || func->name == NameQuantileTDigestWeighted::name || func->name == NameQuantileBFloat16Weighted::name;
+
     if (arguments.size() != (need_two_args ? 2 : 1))
         return;
 
diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp
index 2e4fd50cd01..17a6b695088 100644
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@@ -288,6 +288,20 @@ struct ExplainSettings : public Settings
     }
 };
 
+struct QuerySyntaxSettings
+{
+    bool oneline = false;
+
+    constexpr static char name[] = "SYNTAX";
+
+    std::unordered_map<std::string, std::reference_wrapper<bool>> boolean_settings =
+    {
+        {"oneline", oneline},
+    };
+
+    std::unordered_map<std::string, std::reference_wrapper<Int64>> integer_settings;
+};
+
 template <typename Settings>
 ExplainSettings<Settings> checkAndGetSettings(const ASTPtr & ast_settings)
 {
@@ -362,13 +376,12 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
         }
         case ASTExplainQuery::AnalyzedSyntax:
         {
-            if (ast.getSettings())
-                throw Exception("Settings are not supported for EXPLAIN SYNTAX query.", ErrorCodes::UNKNOWN_SETTING);
+            auto settings = checkAndGetSettings<QuerySyntaxSettings>(ast.getSettings());
 
             ExplainAnalyzedSyntaxVisitor::Data data(getContext());
             ExplainAnalyzedSyntaxVisitor(data).visit(query);
 
-            ast.getExplainedQuery()->format(IAST::FormatSettings(buf, false));
+            ast.getExplainedQuery()->format(IAST::FormatSettings(buf, settings.oneline));
             break;
         }
         case ASTExplainQuery::QueryTree:
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 1f1ef68492c..a82a11e7c97 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -487,7 +487,7 @@ BlockIO InterpreterSystemQuery::execute()
             dropDatabaseReplica(query);
             break;
         case Type::SYNC_REPLICA:
-            syncReplica(query);
+            syncReplica();
             break;
         case Type::SYNC_DATABASE_REPLICA:
             syncReplicatedDatabase(query);
@@ -507,6 +507,9 @@ BlockIO InterpreterSystemQuery::execute()
         case Type::RESTORE_REPLICA:
             restoreReplica();
             break;
+        case Type::WAIT_LOADING_PARTS:
+            waitLoadingParts();
+            break;
         case Type::RESTART_DISK:
             restartDisk(query.disk);
         case Type::FLUSH_LOGS:
@@ -852,7 +855,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
         throw Exception("Invalid query", ErrorCodes::LOGICAL_ERROR);
 }
 
-void InterpreterSystemQuery::syncReplica(ASTSystemQuery &)
+void InterpreterSystemQuery::syncReplica()
 {
     getContext()->checkAccess(AccessType::SYSTEM_SYNC_REPLICA, table_id);
     StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext());
@@ -872,6 +875,23 @@ void InterpreterSystemQuery::syncReplica(ASTSystemQuery &)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, table_is_not_replicated.data(), table_id.getNameForLogs());
 }
 
+void InterpreterSystemQuery::waitLoadingParts()
+{
+    getContext()->checkAccess(AccessType::SYSTEM_WAIT_LOADING_PARTS, table_id);
+    StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext());
+
+    if (auto * merge_tree = dynamic_cast<MergeTreeData *>(table.get()))
+    {
+        LOG_TRACE(log, "Waiting for loading of parts of table {}", table_id.getFullTableName());
+        merge_tree->waitForOutdatedPartsToBeLoaded();
+        LOG_TRACE(log, "Finished waiting for loading of parts of table {}", table_id.getFullTableName());
+    }
+    else
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "Command WAIT LOADING PARTS is supported only for MergeTree table, but got: {}", table->getName());
+    }
+}
 
 void InterpreterSystemQuery::syncReplicatedDatabase(ASTSystemQuery & query)
 {
@@ -1071,6 +1091,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
             required_access.emplace_back(AccessType::SYSTEM_RESTART_REPLICA);
             break;
         }
+        case Type::WAIT_LOADING_PARTS:
+        {
+            required_access.emplace_back(AccessType::SYSTEM_WAIT_LOADING_PARTS, query.getDatabase(), query.getTable());
+            break;
+        }
         case Type::SYNC_DATABASE_REPLICA:
         {
             required_access.emplace_back(AccessType::SYSTEM_SYNC_DATABASE_REPLICA, query.getDatabase());
diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h
index 0058d0c9def..5673890daf3 100644
--- a/src/Interpreters/InterpreterSystemQuery.h
+++ b/src/Interpreters/InterpreterSystemQuery.h
@@ -56,7 +56,8 @@ private:
 
     void restartReplica(const StorageID & replica, ContextMutablePtr system_context);
     void restartReplicas(ContextMutablePtr system_context);
-    void syncReplica(ASTSystemQuery & query);
+    void syncReplica();
+    void waitLoadingParts();
 
     void syncReplicatedDatabase(ASTSystemQuery & query);
 
diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
index a57b8d2354b..5500c274c23 100644
--- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
+++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
@@ -49,7 +49,8 @@ ASTPtr makeSubqueryTemplate()
 ASTPtr makeSubqueryQualifiedAsterisk()
 {
     auto asterisk = std::make_shared<ASTQualifiedAsterisk>();
-    asterisk->children.emplace_back(std::make_shared<ASTTableIdentifier>("--.s"));
+    asterisk->qualifier = std::make_shared<ASTIdentifier>("--.s");
+    asterisk->children.push_back(asterisk->qualifier);
     return asterisk;
 }
 
@@ -153,24 +154,34 @@ private:
                 for (auto & table_name : data.tables_order)
                     data.addTableColumns(table_name, columns);
 
-                for (const auto & transformer : asterisk->children)
-                    IASTColumnsTransformer::transform(transformer, columns);
+                if (asterisk->transformers)
+                {
+                    for (const auto & transformer : asterisk->transformers->children)
+                        IASTColumnsTransformer::transform(transformer, columns);
+                }
             }
             else if (const auto * qualified_asterisk = child->as<ASTQualifiedAsterisk>())
             {
                 has_asterisks = true;
 
-                auto & identifier = child->children[0]->as<ASTTableIdentifier &>();
+                if (!qualified_asterisk->qualifier)
+                    throw Exception("Logical error: qualified asterisk must have a qualifier", ErrorCodes::LOGICAL_ERROR);
+
+                auto & identifier = qualified_asterisk->qualifier->as<ASTIdentifier &>();
 
                 data.addTableColumns(identifier.name(), columns);
 
-                // QualifiedAsterisk's transformers start to appear at child 1
-                for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
+                if (qualified_asterisk->transformers)
                 {
-                    if (it->get()->as<ASTColumnsApplyTransformer>() || it->get()->as<ASTColumnsExceptTransformer>() || it->get()->as<ASTColumnsReplaceTransformer>())
-                        IASTColumnsTransformer::transform(*it, columns);
-                    else
-                        throw Exception("Logical error: qualified asterisk must only have children of IASTColumnsTransformer type", ErrorCodes::LOGICAL_ERROR);
+                    for (const auto & transformer : qualified_asterisk->transformers->children)
+                    {
+                        if (transformer->as<ASTColumnsApplyTransformer>() ||
+                            transformer->as<ASTColumnsExceptTransformer>() ||
+                            transformer->as<ASTColumnsReplaceTransformer>())
+                            IASTColumnsTransformer::transform(transformer, columns);
+                        else
+                            throw Exception("Logical error: qualified asterisk must only have children of IASTColumnsTransformer type", ErrorCodes::LOGICAL_ERROR);
+                    }
                 }
             }
             else if (const auto * columns_list_matcher = child->as<ASTColumnsListMatcher>())
@@ -180,8 +191,11 @@ private:
                 for (const auto & ident : columns_list_matcher->column_list->children)
                     columns.emplace_back(ident->clone());
 
-                for (const auto & transformer : columns_list_matcher->children)
-                    IASTColumnsTransformer::transform(transformer, columns);
+                if (columns_list_matcher->transformers)
+                {
+                    for (const auto & transformer : columns_list_matcher->transformers->children)
+                        IASTColumnsTransformer::transform(transformer, columns);
+                }
             }
             else if (const auto * columns_regexp_matcher = child->as<ASTColumnsRegexpMatcher>())
             {
@@ -193,8 +207,11 @@ private:
                         columns,
                         [&](const String & column_name) { return columns_regexp_matcher->isColumnMatching(column_name); });
 
-                for (const auto & transformer : columns_regexp_matcher->children)
-                    IASTColumnsTransformer::transform(transformer, columns);
+                if (columns_regexp_matcher->transformers)
+                {
+                    for (const auto & transformer : columns_regexp_matcher->transformers->children)
+                        IASTColumnsTransformer::transform(transformer, columns);
+                }
             }
             else
                 data.new_select_expression_list->children.push_back(child);
@@ -425,6 +442,7 @@ private:
         {
             if (data.expression_list->children.empty())
                 data.expression_list->children.emplace_back(std::make_shared<ASTAsterisk>());
+
             select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(data.expression_list));
         }
         data.done = true;
diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp
index b88bb5d1caf..1d8676cfc57 100644
--- a/src/Interpreters/JoinedTables.cpp
+++ b/src/Interpreters/JoinedTables.cpp
@@ -154,7 +154,7 @@ private:
 
     static void visit(const ASTQualifiedAsterisk & node, const ASTPtr &, Data & data)
     {
-        auto & identifier = node.children[0]->as<ASTTableIdentifier &>();
+        auto & identifier = node.qualifier->as<ASTIdentifier &>();
         bool rewritten = false;
         for (const auto & table : data)
         {
diff --git a/src/Interpreters/MergeTreeTransaction.cpp b/src/Interpreters/MergeTreeTransaction.cpp
index f438194b87b..f16ece46530 100644
--- a/src/Interpreters/MergeTreeTransaction.cpp
+++ b/src/Interpreters/MergeTreeTransaction.cpp
@@ -303,7 +303,6 @@ bool MergeTreeTransaction::rollback() noexcept
         part->version.unlockRemovalTID(tid, TransactionInfoContext{part->storage.getStorageID(), part->name});
     }
 
-
     assert([&]()
     {
         std::lock_guard lock{mutex};
diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp
index 6d490e3e95f..45d5a7b2344 100644
--- a/src/Interpreters/TextLog.cpp
+++ b/src/Interpreters/TextLog.cpp
@@ -49,7 +49,9 @@ NamesAndTypesList TextLogElement::getNamesAndTypes()
         {"revision", std::make_shared<DataTypeUInt32>()},
 
         {"source_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
-        {"source_line", std::make_shared<DataTypeUInt64>()}
+        {"source_line", std::make_shared<DataTypeUInt64>()},
+
+        {"message_format_string", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
     };
 }
 
@@ -74,6 +76,8 @@ void TextLogElement::appendToBlock(MutableColumns & columns) const
 
     columns[i++]->insert(source_file);
     columns[i++]->insert(source_line);
+
+    columns[i++]->insert(message_format_string);
 }
 
 TextLog::TextLog(ContextPtr context_, const String & database_name_,
diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h
index 243e001fc52..6efc1c906d4 100644
--- a/src/Interpreters/TextLog.h
+++ b/src/Interpreters/TextLog.h
@@ -28,6 +28,8 @@ struct TextLogElement
     String source_file;
     UInt64 source_line{};
 
+    std::string_view message_format_string;
+
     static std::string name() { return "TextLog"; }
     static NamesAndTypesList getNamesAndTypes();
     static NamesAndAliases getNamesAndAliases() { return {}; }
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
index 9c3a681fd32..36691885459 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@@ -156,21 +156,19 @@ void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, D
         func_arguments->children.clear();
 }
 
-void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk &, const ASTPtr & ast, Data & data)
+void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & node, const ASTPtr &, Data & data)
 {
-    if (ast->children.empty())
-        throw Exception("Logical error: qualified asterisk must have children", ErrorCodes::LOGICAL_ERROR);
-
-    auto & ident = ast->children[0];
+    if (!node.qualifier)
+        throw Exception("Logical error: qualified asterisk must have a qualifier", ErrorCodes::LOGICAL_ERROR);
 
     /// @note it could contain table alias as table name.
-    DatabaseAndTableWithAlias db_and_table(ident);
+    DatabaseAndTableWithAlias db_and_table(node.qualifier);
 
     for (const auto & known_table : data.tables)
         if (db_and_table.satisfies(known_table.table, true))
             return;
 
-    throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
+    throw Exception("Unknown qualified identifier: " + node.qualifier->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
 }
 
 void TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data & data)
@@ -266,16 +264,22 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
                 first_table = false;
             }
 
-            for (const auto & transformer : asterisk->children)
-                IASTColumnsTransformer::transform(transformer, columns);
+            if (asterisk->transformers)
+            {
+                for (const auto & transformer : asterisk->transformers->children)
+                    IASTColumnsTransformer::transform(transformer, columns);
+            }
         }
         else if (auto * asterisk_column_list = child->as<ASTColumnsListMatcher>())
         {
             for (const auto & ident : asterisk_column_list->column_list->children)
                 columns.emplace_back(ident->clone());
 
-            for (const auto & transformer : asterisk_column_list->children)
-                IASTColumnsTransformer::transform(transformer, columns);
+            if (asterisk_column_list->transformers)
+            {
+                for (const auto & transformer : asterisk_column_list->transformers->children)
+                    IASTColumnsTransformer::transform(transformer, columns);
+            }
         }
         else if (const auto * asterisk_regexp_pattern = child->as<ASTColumnsRegexpMatcher>())
         {
@@ -292,12 +296,15 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
                 first_table = false;
             }
 
-            for (const auto & transformer : asterisk_regexp_pattern->children)
-                IASTColumnsTransformer::transform(transformer, columns);
+            if (asterisk_regexp_pattern->transformers)
+            {
+                for (const auto & transformer : asterisk_regexp_pattern->transformers->children)
+                    IASTColumnsTransformer::transform(transformer, columns);
+            }
         }
         else if (const auto * qualified_asterisk = child->as<ASTQualifiedAsterisk>())
         {
-            DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]);
+            DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->qualifier);
 
             for (const auto & table : tables_with_columns)
             {
@@ -309,10 +316,10 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
                 }
             }
 
-            // QualifiedAsterisk's transformers start to appear at child 1
-            for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
+            if (qualified_asterisk->transformers)
             {
-                IASTColumnsTransformer::transform(*it, columns);
+                for (const auto & transformer : qualified_asterisk->transformers->children)
+                    IASTColumnsTransformer::transform(transformer, columns);
             }
         }
         else
diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp
index 35a6d4ad86a..7974d5212e1 100644
--- a/src/Loggers/OwnSplitChannel.cpp
+++ b/src/Loggers/OwnSplitChannel.cpp
@@ -133,6 +133,8 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg)
             elem.source_file = msg.getSourceFile();
 
         elem.source_line = msg.getSourceLine();
+        elem.message_format_string = msg.getFormatString();
+
         std::shared_ptr<TextLog> text_log_locked{};
         {
             std::lock_guard<std::mutex> lock(text_log_mutex);
diff --git a/src/Parsers/ASTAsterisk.cpp b/src/Parsers/ASTAsterisk.cpp
index e2f45d04fa4..1ffbb85da7c 100644
--- a/src/Parsers/ASTAsterisk.cpp
+++ b/src/Parsers/ASTAsterisk.cpp
@@ -8,21 +8,37 @@ namespace DB
 ASTPtr ASTAsterisk::clone() const
 {
     auto clone = std::make_shared<ASTAsterisk>(*this);
-    clone->cloneChildren();
+
+    if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
+    if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
     return clone;
 }
 
-void ASTAsterisk::appendColumnName(WriteBuffer & ostr) const { ostr.write('*'); }
+void ASTAsterisk::appendColumnName(WriteBuffer & ostr) const
+{
+    if (expression)
+    {
+        expression->appendColumnName(ostr);
+        writeCString(".", ostr);
+    }
+
+    ostr.write('*');
+}
 
 void ASTAsterisk::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
+    if (expression)
+    {
+        expression->formatImpl(settings, state, frame);
+        settings.ostr << ".";
+    }
+
     settings.ostr << "*";
 
-    /// Format column transformers
-    for (const auto & child : children)
+    if (transformers)
     {
-        settings.ostr << ' ';
-        child->formatImpl(settings, state, frame);
+        transformers->formatImpl(settings, state, frame);
     }
 }
 
diff --git a/src/Parsers/ASTAsterisk.h b/src/Parsers/ASTAsterisk.h
index 027758ba48c..840b7996536 100644
--- a/src/Parsers/ASTAsterisk.h
+++ b/src/Parsers/ASTAsterisk.h
@@ -16,6 +16,8 @@ public:
     ASTPtr clone() const override;
     void appendColumnName(WriteBuffer & ostr) const override;
 
+    ASTPtr expression;
+    ASTPtr transformers;
 protected:
     void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
 };
diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp
index 124206043cf..d301394cc54 100644
--- a/src/Parsers/ASTColumnsMatcher.cpp
+++ b/src/Parsers/ASTColumnsMatcher.cpp
@@ -18,12 +18,20 @@ namespace ErrorCodes
 ASTPtr ASTColumnsRegexpMatcher::clone() const
 {
     auto clone = std::make_shared<ASTColumnsRegexpMatcher>(*this);
-    clone->cloneChildren();
+
+    if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
+    if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
     return clone;
 }
 
 void ASTColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const
 {
+    if (expression)
+    {
+        expression->appendColumnName(ostr);
+        writeCString(".", ostr);
+    }
     writeCString("COLUMNS(", ostr);
     writeQuotedString(original_pattern, ostr);
     writeChar(')', ostr);
@@ -38,15 +46,21 @@ void ASTColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state) const
 
 void ASTColumnsRegexpMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
-    settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
+    settings.ostr << (settings.hilite ? hilite_keyword : "");
+
+    if (expression)
+    {
+        expression->formatImpl(settings, state, frame);
+        settings.ostr << ".";
+    }
+
+    settings.ostr << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
     settings.ostr << quoteString(original_pattern);
     settings.ostr << ")";
 
-    /// Format column transformers
-    for (const auto & child : children)
+    if (transformers)
     {
-        settings.ostr << ' ';
-        child->formatImpl(settings, state, frame);
+        transformers->formatImpl(settings, state, frame);
     }
 }
 
@@ -60,6 +74,11 @@ void ASTColumnsRegexpMatcher::setPattern(String pattern)
             DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
 }
 
+const String & ASTColumnsRegexpMatcher::getPattern() const
+{
+    return original_pattern;
+}
+
 const std::shared_ptr<re2::RE2> & ASTColumnsRegexpMatcher::getMatcher() const
 {
     return column_matcher;
@@ -73,19 +92,23 @@ bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const
 ASTPtr ASTColumnsListMatcher::clone() const
 {
     auto clone = std::make_shared<ASTColumnsListMatcher>(*this);
-    clone->column_list = column_list->clone();
-    clone->cloneChildren();
-    return clone;
-}
 
-void ASTColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const
-{
-    column_list->updateTreeHash(hash_state);
-    IAST::updateTreeHashImpl(hash_state);
+    if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
+    if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
+    clone->column_list = column_list->clone();
+    clone->children.push_back(clone->column_list);
+
+    return clone;
 }
 
 void ASTColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
 {
+    if (expression)
+    {
+        expression->appendColumnName(ostr);
+        writeCString(".", ostr);
+    }
     writeCString("COLUMNS(", ostr);
     for (auto * it = column_list->children.begin(); it != column_list->children.end(); ++it)
     {
@@ -99,7 +122,15 @@ void ASTColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
 
 void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
-    settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
+    settings.ostr << (settings.hilite ? hilite_keyword : "");
+
+    if (expression)
+    {
+        expression->formatImpl(settings, state, frame);
+        settings.ostr << ".";
+    }
+
+    settings.ostr << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
 
     for (ASTs::const_iterator it = column_list->children.begin(); it != column_list->children.end(); ++it)
     {
@@ -111,33 +142,39 @@ void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatSt
     }
     settings.ostr << ")";
 
-    /// Format column transformers
-    for (const auto & child : children)
+    if (transformers)
     {
-        settings.ostr << ' ';
-        child->formatImpl(settings, state, frame);
+        transformers->formatImpl(settings, state, frame);
     }
 }
 
 ASTPtr ASTQualifiedColumnsRegexpMatcher::clone() const
 {
     auto clone = std::make_shared<ASTQualifiedColumnsRegexpMatcher>(*this);
-    clone->cloneChildren();
+
+    if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
+    clone->qualifier = qualifier->clone();
+    clone->children.push_back(clone->qualifier);
+
     return clone;
 }
 
 void ASTQualifiedColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const
 {
-    const auto & qualifier = children.at(0);
     qualifier->appendColumnName(ostr);
     writeCString(".COLUMNS(", ostr);
     writeQuotedString(original_pattern, ostr);
     writeChar(')', ostr);
 }
 
-void ASTQualifiedColumnsRegexpMatcher::setPattern(String pattern)
+void ASTQualifiedColumnsRegexpMatcher::setPattern(String pattern, bool set_matcher)
 {
     original_pattern = std::move(pattern);
+
+    if (!set_matcher)
+        return;
+
     column_matcher = std::make_shared<RE2>(original_pattern, RE2::Quiet);
     if (!column_matcher->ok())
         throw DB::Exception(
@@ -166,35 +203,35 @@ void ASTQualifiedColumnsRegexpMatcher::formatImpl(const FormatSettings & setting
 {
     settings.ostr << (settings.hilite ? hilite_keyword : "");
 
-    const auto & qualifier = children.at(0);
     qualifier->formatImpl(settings, state, frame);
 
     settings.ostr << ".COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
     settings.ostr << quoteString(original_pattern);
     settings.ostr << ")";
 
-    /// Format column transformers
-    size_t children_size = children.size();
-
-    for (size_t i = 1; i < children_size; ++i)
+    if (transformers)
     {
-        const auto & child = children[i];
-        settings.ostr << ' ';
-        child->formatImpl(settings, state, frame);
+        transformers->formatImpl(settings, state, frame);
     }
 }
 
 ASTPtr ASTQualifiedColumnsListMatcher::clone() const
 {
     auto clone = std::make_shared<ASTQualifiedColumnsListMatcher>(*this);
+
+    if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
+    clone->qualifier = qualifier->clone();
     clone->column_list = column_list->clone();
-    clone->cloneChildren();
+
+    clone->children.push_back(clone->qualifier);
+    clone->children.push_back(clone->column_list);
+
     return clone;
 }
 
 void ASTQualifiedColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
 {
-    const auto & qualifier = children.at(0);
     qualifier->appendColumnName(ostr);
     writeCString(".COLUMNS(", ostr);
 
@@ -208,19 +245,10 @@ void ASTQualifiedColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
     writeChar(')', ostr);
 }
 
-void ASTQualifiedColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const
-{
-    column_list->updateTreeHash(hash_state);
-    IAST::updateTreeHashImpl(hash_state);
-}
-
 void ASTQualifiedColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
     settings.ostr << (settings.hilite ? hilite_keyword : "");
-
-    const auto & qualifier = children.at(0);
     qualifier->formatImpl(settings, state, frame);
-
     settings.ostr << ".COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
 
     for (ASTs::const_iterator it = column_list->children.begin(); it != column_list->children.end(); ++it)
@@ -232,14 +260,9 @@ void ASTQualifiedColumnsListMatcher::formatImpl(const FormatSettings & settings,
     }
     settings.ostr << ")";
 
-    /// Format column transformers
-    size_t children_size = children.size();
-
-    for (size_t i = 1; i < children_size; ++i)
+    if (transformers)
     {
-        const auto & child = children[i];
-        settings.ostr << ' ';
-        child->formatImpl(settings, state, frame);
+        transformers->formatImpl(settings, state, frame);
     }
 }
 
diff --git a/src/Parsers/ASTColumnsMatcher.h b/src/Parsers/ASTColumnsMatcher.h
index 7ce246608b9..f31a8bd9a22 100644
--- a/src/Parsers/ASTColumnsMatcher.h
+++ b/src/Parsers/ASTColumnsMatcher.h
@@ -24,10 +24,13 @@ public:
 
     void appendColumnName(WriteBuffer & ostr) const override;
     void setPattern(String pattern);
+    const String & getPattern() const;
     const std::shared_ptr<re2::RE2> & getMatcher() const;
     bool isColumnMatching(const String & column_name) const;
     void updateTreeHashImpl(SipHash & hash_state) const override;
 
+    ASTPtr expression;
+    ASTPtr transformers;
 protected:
     void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
 
@@ -43,9 +46,10 @@ public:
     String getID(char) const override { return "ColumnsListMatcher"; }
     ASTPtr clone() const override;
     void appendColumnName(WriteBuffer & ostr) const override;
-    void updateTreeHashImpl(SipHash & hash_state) const override;
 
+    ASTPtr expression;
     ASTPtr column_list;
+    ASTPtr transformers;
 protected:
     void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
 };
@@ -59,10 +63,12 @@ public:
 
     void appendColumnName(WriteBuffer & ostr) const override;
     const std::shared_ptr<re2::RE2> & getMatcher() const;
-    void setPattern(String pattern);
+    void setPattern(String pattern, bool set_matcher = true);
     void setMatcher(std::shared_ptr<re2::RE2> matcher);
     void updateTreeHashImpl(SipHash & hash_state) const override;
 
+    ASTPtr qualifier;
+    ASTPtr transformers;
 protected:
     void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
 
@@ -78,9 +84,10 @@ public:
     String getID(char) const override { return "QualifiedColumnsListMatcher"; }
     ASTPtr clone() const override;
     void appendColumnName(WriteBuffer & ostr) const override;
-    void updateTreeHashImpl(SipHash & hash_state) const override;
 
+    ASTPtr qualifier;
     ASTPtr column_list;
+    ASTPtr transformers;
 protected:
     void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
 };
diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp
index 16752fa115e..f3bbeb6167b 100644
--- a/src/Parsers/ASTColumnsTransformers.cpp
+++ b/src/Parsers/ASTColumnsTransformers.cpp
@@ -19,6 +19,15 @@ namespace ErrorCodes
     extern const int CANNOT_COMPILE_REGEXP;
 }
 
+void ASTColumnsTransformerList::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
+{
+    for (const auto & child : children)
+    {
+        settings.ostr << ' ';
+        child->formatImpl(settings, state, frame);
+    }
+}
+
 void IASTColumnsTransformer::transform(const ASTPtr & transformer, ASTs & nodes)
 {
     if (const auto * apply = transformer->as<ASTColumnsApplyTransformer>())
diff --git a/src/Parsers/ASTColumnsTransformers.h b/src/Parsers/ASTColumnsTransformers.h
index 5179726e8cb..f67993724c1 100644
--- a/src/Parsers/ASTColumnsTransformers.h
+++ b/src/Parsers/ASTColumnsTransformers.h
@@ -9,6 +9,23 @@ namespace re2
 
 namespace DB
 {
+
+/// A list of column transformers
+class ASTColumnsTransformerList : public IAST
+{
+public:
+    String getID(char) const override { return "ColumnsTransformerList"; }
+    ASTPtr clone() const override
+    {
+        auto clone = std::make_shared<ASTColumnsTransformerList>(*this);
+        clone->cloneChildren();
+        return clone;
+    }
+
+protected:
+    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+};
+
 class IASTColumnsTransformer : public IAST
 {
 public:
diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h
index 156ffdeacb9..cb8b3199c81 100644
--- a/src/Parsers/ASTExplainQuery.h
+++ b/src/Parsers/ASTExplainQuery.h
@@ -6,6 +6,10 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
 
 /// AST, EXPLAIN or other query with meaning of explanation query instead of execution
 class ASTExplainQuery : public ASTQueryWithOutput
@@ -23,6 +27,45 @@ public:
         CurrentTransaction, /// 'EXPLAIN CURRENT TRANSACTION'
     };
 
+    static String toString(ExplainKind kind)
+    {
+        switch (kind)
+        {
+            case ParsedAST: return "EXPLAIN AST";
+            case AnalyzedSyntax: return "EXPLAIN SYNTAX";
+            case QueryTree: return "EXPLAIN QUERY TREE";
+            case QueryPlan: return "EXPLAIN";
+            case QueryPipeline: return "EXPLAIN PIPELINE";
+            case QueryEstimates: return "EXPLAIN ESTIMATE";
+            case TableOverride: return "EXPLAIN TABLE OVERRIDE";
+            case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION";
+        }
+
+        UNREACHABLE();
+    }
+
+    static ExplainKind fromString(const String & str)
+    {
+        if (str == "EXPLAIN AST")
+            return ParsedAST;
+        if (str == "EXPLAIN SYNTAX")
+            return AnalyzedSyntax;
+        if (str == "EXPLAIN QUERY TREE")
+            return QueryTree;
+        if (str == "EXPLAIN" || str == "EXPLAIN PLAN")
+            return QueryPlan;
+        if (str == "EXPLAIN PIPELINE")
+            return QueryPipeline;
+        if (str == "EXPLAIN ESTIMATE")
+            return QueryEstimates;
+        if (str == "EXPLAIN TABLE OVERRIDE")
+            return TableOverride;
+        if (str == "EXPLAIN CURRENT TRANSACTION")
+            return CurrentTransaction;
+
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown explain kind '{}'", str);
+    }
+
     explicit ASTExplainQuery(ExplainKind kind_) : kind(kind_) {}
 
     String getID(char delim) const override { return "Explain" + (delim + toString(kind)); }
@@ -103,23 +146,6 @@ private:
     /// Used by EXPLAIN TABLE OVERRIDE
     ASTPtr table_function;
     ASTPtr table_override;
-
-    static String toString(ExplainKind kind)
-    {
-        switch (kind)
-        {
-            case ParsedAST: return "EXPLAIN AST";
-            case AnalyzedSyntax: return "EXPLAIN SYNTAX";
-            case QueryTree: return "EXPLAIN QUERY TREE";
-            case QueryPlan: return "EXPLAIN";
-            case QueryPipeline: return "EXPLAIN PIPELINE";
-            case QueryEstimates: return "EXPLAIN ESTIMATE";
-            case TableOverride: return "EXPLAIN TABLE OVERRIDE";
-            case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION";
-        }
-
-        UNREACHABLE();
-    }
 };
 
 }
diff --git a/src/Parsers/ASTQualifiedAsterisk.cpp b/src/Parsers/ASTQualifiedAsterisk.cpp
index b755e4eb98c..2dcf481adb7 100644
--- a/src/Parsers/ASTQualifiedAsterisk.cpp
+++ b/src/Parsers/ASTQualifiedAsterisk.cpp
@@ -7,22 +7,18 @@ namespace DB
 
 void ASTQualifiedAsterisk::appendColumnName(WriteBuffer & ostr) const
 {
-    const auto & qualifier = children.at(0);
     qualifier->appendColumnName(ostr);
     writeCString(".*", ostr);
 }
 
 void ASTQualifiedAsterisk::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
-    const auto & qualifier = children.at(0);
     qualifier->formatImpl(settings, state, frame);
     settings.ostr << ".*";
 
-    /// Format column transformers
-    for (ASTs::const_iterator it = children.begin() + 1; it != children.end(); ++it)
+    if (transformers)
     {
-        settings.ostr << ' ';
-        (*it)->formatImpl(settings, state, frame);
+        transformers->formatImpl(settings, state, frame);
     }
 }
 
diff --git a/src/Parsers/ASTQualifiedAsterisk.h b/src/Parsers/ASTQualifiedAsterisk.h
index 1b644532f53..e67b4cd82dd 100644
--- a/src/Parsers/ASTQualifiedAsterisk.h
+++ b/src/Parsers/ASTQualifiedAsterisk.h
@@ -17,11 +17,18 @@ public:
     ASTPtr clone() const override
     {
         auto clone = std::make_shared<ASTQualifiedAsterisk>(*this);
-        clone->cloneChildren();
+
+        if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
+        clone->qualifier = qualifier->clone();
+        clone->children.push_back(clone->qualifier);
+
         return clone;
     }
     void appendColumnName(WriteBuffer & ostr) const override;
 
+    ASTPtr qualifier;
+    ASTPtr transformers;
 protected:
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
 };
diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp
index 5ed77f48ceb..bfc7c5e6a45 100644
--- a/src/Parsers/ASTSystemQuery.cpp
+++ b/src/Parsers/ASTSystemQuery.cpp
@@ -166,6 +166,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
     else if (  type == Type::RESTART_REPLICA
             || type == Type::RESTORE_REPLICA
             || type == Type::SYNC_REPLICA
+            || type == Type::WAIT_LOADING_PARTS
             || type == Type::FLUSH_DISTRIBUTED
             || type == Type::RELOAD_DICTIONARY
             || type == Type::RELOAD_MODEL
diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h
index 76788fd31fe..ae08fe464ad 100644
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@@ -35,6 +35,7 @@ public:
         RESTART_REPLICAS,
         RESTART_REPLICA,
         RESTORE_REPLICA,
+        WAIT_LOADING_PARTS,
         DROP_REPLICA,
         DROP_DATABASE_REPLICA,
         SYNC_REPLICA,
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 5951128c285..231897605e0 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -28,6 +28,8 @@
 #include <Parsers/ASTWindowDefinition.h>
 #include <Parsers/ASTAssignment.h>
 #include <Parsers/ASTColumnsMatcher.h>
+#include <Parsers/ASTExplainQuery.h>
+#include <Parsers/ASTSetQuery.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Parsers/ExpressionListParsers.h>
@@ -116,8 +118,40 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     }
     else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected))
     {
-        /// Replace SELECT * FROM (EXPLAIN SELECT ...) with SELECT * FROM viewExplain(EXPLAIN SELECT ...)
-        result_node = buildSelectFromTableFunction(makeASTFunction("viewExplain", explain_node));
+        const auto & explain_query = explain_node->as<const ASTExplainQuery &>();
+
+        if (explain_query.getTableFunction() || explain_query.getTableOverride())
+            throw Exception("EXPLAIN in a subquery cannot have a table function or table override", ErrorCodes::BAD_ARGUMENTS);
+
+        /// Replace subquery `(EXPLAIN <kind> <explain_settings> SELECT ...)`
+        /// with `(SELECT * FROM viewExplain("<kind>", "<explain_settings>", SELECT ...))`
+
+        String kind_str = ASTExplainQuery::toString(explain_query.getKind());
+
+        String settings_str;
+        if (ASTPtr settings_ast = explain_query.getSettings())
+        {
+            if (!settings_ast->as<ASTSetQuery>())
+                throw Exception("EXPLAIN settings must be a SET query", ErrorCodes::BAD_ARGUMENTS);
+            settings_str = queryToString(settings_ast);
+        }
+
+        const ASTPtr & explained_ast = explain_query.getExplainedQuery();
+        if (explained_ast)
+        {
+            auto view_explain = makeASTFunction("viewExplain",
+                std::make_shared<ASTLiteral>(kind_str),
+                std::make_shared<ASTLiteral>(settings_str),
+                explained_ast);
+            result_node = buildSelectFromTableFunction(view_explain);
+        }
+        else
+        {
+            auto view_explain = makeASTFunction("viewExplain",
+                std::make_shared<ASTLiteral>(kind_str),
+                std::make_shared<ASTLiteral>(settings_str));
+            result_node = buildSelectFromTableFunction(view_explain);
+        }
     }
     else
     {
@@ -1623,13 +1657,21 @@ bool ParserAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     {
         ++pos;
         auto asterisk = std::make_shared<ASTAsterisk>();
+        auto transformers = std::make_shared<ASTColumnsTransformerList>();
         ParserColumnsTransformers transformers_p(allowed_transformers);
         ASTPtr transformer;
         while (transformers_p.parse(pos, transformer, expected))
         {
-            asterisk->children.push_back(transformer);
+            transformers->children.push_back(transformer);
         }
-        node = asterisk;
+
+        if (!transformers->children.empty())
+        {
+            asterisk->transformers = std::move(transformers);
+            asterisk->children.push_back(asterisk->transformers);
+        }
+
+        node = std::move(asterisk);
         return true;
     }
     return false;
@@ -1638,7 +1680,7 @@ bool ParserAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
 bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    if (!ParserCompoundIdentifier(true, true).parse(pos, node, expected))
+    if (!ParserCompoundIdentifier(false, true).parse(pos, node, expected))
         return false;
 
     if (pos->type != TokenType::Dot)
@@ -1650,13 +1692,23 @@ bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
     ++pos;
 
     auto res = std::make_shared<ASTQualifiedAsterisk>();
-    res->children.push_back(node);
+    auto transformers = std::make_shared<ASTColumnsTransformerList>();
     ParserColumnsTransformers transformers_p;
     ASTPtr transformer;
     while (transformers_p.parse(pos, transformer, expected))
     {
-        res->children.push_back(transformer);
+        transformers->children.push_back(transformer);
     }
+
+    res->qualifier = std::move(node);
+    res->children.push_back(res->qualifier);
+
+    if (!transformers->children.empty())
+    {
+        res->transformers = std::move(transformers);
+        res->children.push_back(res->transformers);
+    }
+
     node = std::move(res);
     return true;
 }
@@ -1680,28 +1732,44 @@ static bool parseColumnsMatcherBody(IParser::Pos & pos, ASTPtr & node, Expected
         return false;
     ++pos;
 
+    auto transformers = std::make_shared<ASTColumnsTransformerList>();
+    ParserColumnsTransformers transformers_p(allowed_transformers);
+    ASTPtr transformer;
+    while (transformers_p.parse(pos, transformer, expected))
+    {
+        transformers->children.push_back(transformer);
+    }
+
     ASTPtr res;
     if (column_list)
     {
         auto list_matcher = std::make_shared<ASTColumnsListMatcher>();
-        list_matcher->column_list = column_list;
-        res = list_matcher;
+
+        list_matcher->column_list = std::move(column_list);
+        list_matcher->children.push_back(list_matcher->column_list);
+
+        if (!transformers->children.empty())
+        {
+            list_matcher->transformers = std::move(transformers);
+            list_matcher->children.push_back(list_matcher->transformers);
+        }
+
+        node = std::move(list_matcher);
     }
     else
     {
         auto regexp_matcher = std::make_shared<ASTColumnsRegexpMatcher>();
         regexp_matcher->setPattern(regex_node->as<ASTLiteral &>().value.get<String>());
-        res = regexp_matcher;
+
+        if (!transformers->children.empty())
+        {
+            regexp_matcher->transformers = std::move(transformers);
+            regexp_matcher->children.push_back(regexp_matcher->transformers);
+        }
+
+        node = std::move(regexp_matcher);
     }
 
-    ParserColumnsTransformers transformers_p(allowed_transformers);
-    ASTPtr transformer;
-    while (transformers_p.parse(pos, transformer, expected))
-    {
-        res->children.push_back(transformer);
-    }
-
-    node = std::move(res);
     return true;
 }
 
@@ -1717,29 +1785,19 @@ bool ParserColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
 
 bool ParserQualifiedColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    if (!ParserCompoundIdentifier(true, true).parse(pos, node, expected))
+    if (!ParserCompoundIdentifier(false, true).parse(pos, node, expected))
         return false;
 
     auto identifier_node = node;
-    const auto & identifier_node_typed = identifier_node->as<ASTTableIdentifier &>();
+    auto & identifier_node_typed = identifier_node->as<ASTIdentifier &>();
+    auto & name_parts = identifier_node_typed.name_parts;
 
     /// ParserCompoundIdentifier parse identifier.COLUMNS
-    if (identifier_node_typed.name_parts.size() == 1 || identifier_node_typed.name_parts.back() != "COLUMNS")
+    if (name_parts.size() == 1 || name_parts.back() != "COLUMNS")
         return false;
 
-    /// TODO: ASTTableIdentifier can contain only 2 parts
-
-    if (identifier_node_typed.name_parts.size() == 2)
-    {
-        auto table_name = identifier_node_typed.name_parts[0];
-        identifier_node = std::make_shared<ASTTableIdentifier>(table_name);
-    }
-    else
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "Expected identifier to contain no more than 2 parts. Actual {}",
-            identifier_node_typed.full_name);
-    }
+    name_parts.pop_back();
+    identifier_node = std::make_shared<ASTIdentifier>(std::move(name_parts), false, std::move(node->children));
 
     if (!parseColumnsMatcherBody(pos, node, expected, allowed_transformers))
         return false;
@@ -1747,28 +1805,36 @@ bool ParserQualifiedColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected
     if (auto * columns_list_matcher = node->as<ASTColumnsListMatcher>())
     {
         auto result = std::make_shared<ASTQualifiedColumnsListMatcher>();
+        result->qualifier = std::move(identifier_node);
         result->column_list = std::move(columns_list_matcher->column_list);
 
-        result->children.reserve(columns_list_matcher->children.size() + 1);
-        result->children.push_back(std::move(identifier_node));
+        result->children.push_back(result->qualifier);
+        result->children.push_back(result->column_list);
 
-        for (auto && child : columns_list_matcher->children)
-            result->children.push_back(std::move(child));
+        if (columns_list_matcher->transformers)
+        {
+            result->transformers = std::move(columns_list_matcher->transformers);
+            result->children.push_back(result->transformers);
+        }
 
-        node = result;
+        node = std::move(result);
     }
     else if (auto * column_regexp_matcher = node->as<ASTColumnsRegexpMatcher>())
     {
         auto result = std::make_shared<ASTQualifiedColumnsRegexpMatcher>();
+        result->setPattern(column_regexp_matcher->getPattern(), false);
         result->setMatcher(column_regexp_matcher->getMatcher());
 
-        result->children.reserve(column_regexp_matcher->children.size() + 1);
-        result->children.push_back(std::move(identifier_node));
+        result->qualifier = std::move(identifier_node);
+        result->children.push_back(result->qualifier);
 
-        for (auto && child : column_regexp_matcher->children)
-            result->children.push_back(std::move(child));
+        if (column_regexp_matcher->transformers)
+        {
+            result->transformers = std::move(column_regexp_matcher->transformers);
+            result->children.push_back(result->transformers);
+        }
 
-        node = result;
+        node = std::move(result);
     }
     else
     {
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 06befbef95e..2e20a68f9b1 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -4,6 +4,7 @@
 #include <Parsers/ParserSetQuery.h>
 
 #include <Parsers/ASTAsterisk.h>
+#include <Parsers/ASTColumnsMatcher.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTFunctionWithKeyValueArguments.h>
@@ -2194,7 +2195,7 @@ struct ParserExpressionImpl
     using Layers = std::vector<std::unique_ptr<Layer>>;
 
     Action tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected);
-    static Action tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected);
+    Action tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected);
 };
 
 
@@ -2523,8 +2524,6 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos
 
 Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected)
 {
-    ASTPtr tmp;
-
     /// ParserExpression can be called in this part of the query:
     ///  ALTER TABLE partition_all2 CLEAR INDEX [ p ] IN PARTITION ALL
     ///
@@ -2544,17 +2543,17 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po
 
     if (cur_op == operators_table.end())
     {
+        ASTPtr alias;
         ParserAlias alias_parser(layers.back()->allow_alias_without_as_keyword);
-        auto old_pos = pos;
+
         if (layers.back()->allow_alias &&
             !layers.back()->parsed_alias &&
-            alias_parser.parse(pos, tmp, expected) &&
-            layers.back()->insertAlias(tmp))
+            alias_parser.parse(pos, alias, expected) &&
+            layers.back()->insertAlias(alias))
         {
             layers.back()->parsed_alias = true;
             return Action::OPERATOR;
         }
-        pos = old_pos;
         return Action::NONE;
     }
 
@@ -2618,33 +2617,57 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po
         layers.back()->pushOperand(function);
     }
 
+    /// Dot (TupleElement operator) can be a beginning of a .* or .COLUMNS expressions
+    if (op.type == OperatorType::TupleElement)
+    {
+        ASTPtr tmp;
+        if (asterisk_parser.parse(pos, tmp, expected) ||
+            columns_matcher_parser.parse(pos, tmp, expected))
+        {
+            if (auto * asterisk = tmp->as<ASTAsterisk>())
+            {
+                if (!layers.back()->popOperand(asterisk->expression))
+                    return Action::NONE;
+            }
+            else if (auto * columns_list_matcher = tmp->as<ASTColumnsListMatcher>())
+            {
+                if (!layers.back()->popOperand(columns_list_matcher->expression))
+                    return Action::NONE;
+            }
+            else if (auto * columns_regexp_matcher = tmp->as<ASTColumnsRegexpMatcher>())
+            {
+                if (!layers.back()->popOperand(columns_regexp_matcher->expression))
+                    return Action::NONE;
+            }
+
+            layers.back()->pushOperand(std::move(tmp));
+            return Action::OPERATOR;
+        }
+    }
+
     layers.back()->pushOperator(op);
 
-    if (op.type == OperatorType::ArrayElement)
-        layers.push_back(std::make_unique<ArrayElementLayer>());
-
-
-    Action next = Action::OPERAND;
-
     /// isNull & isNotNull are postfix unary operators
     if (op.type == OperatorType::IsNull)
-        next = Action::OPERATOR;
-
-    if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween)
-        layers.back()->between_counter++;
+        return Action::OPERATOR;
 
     if (op.type == OperatorType::Cast)
     {
-        next = Action::OPERATOR;
-
         ASTPtr type_ast;
         if (!ParserDataType().parse(pos, type_ast, expected))
             return Action::NONE;
 
         layers.back()->pushOperand(std::make_shared<ASTLiteral>(queryToString(type_ast)));
+        return Action::OPERATOR;
     }
 
-    return next;
+    if (op.type == OperatorType::ArrayElement)
+        layers.push_back(std::make_unique<ArrayElementLayer>());
+
+    if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween)
+        layers.back()->between_counter++;
+
+    return Action::OPERAND;
 }
 
 }
diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp
index a44516fc4a3..0eb263869b1 100644
--- a/src/Parsers/ParserSystemQuery.cpp
+++ b/src/Parsers/ParserSystemQuery.cpp
@@ -253,6 +253,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
 
         case Type::RESTART_REPLICA:
         case Type::SYNC_REPLICA:
+        case Type::WAIT_LOADING_PARTS:
         {
             if (!parseQueryWithOnCluster(res, pos, expected))
                 return false;
diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp
index a9083d8c4a8..ad504e65f94 100644
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@@ -196,7 +196,7 @@ void PipelineExecutor::executeSingleThread(size_t thread_num)
 
 #ifndef NDEBUG
     auto & context = tasks.getThreadContext(thread_num);
-    LOG_TRACE(log,
+    LOG_TEST(log,
               "Thread finished. Total time: {} sec. Execution time: {} sec. Processing time: {} sec. Wait time: {} sec.",
               context.total_time_ns / 1e9,
               context.execution_time_ns / 1e9,
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index da7f18260a9..8c6cd8bd91b 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -304,7 +304,9 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node
                     };
                 }
 
-                if (null_as_default)
+                /// If the Union is ['Null', Nested-Type], since the Nested-Type can not be inside
+                /// Nullable, so we will get Nested-Type, instead of Nullable type.
+                if (null_as_default || !target.isNullable())
                 {
                     auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
                     return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
@@ -1001,7 +1003,7 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node)
         case avro::Type::AVRO_STRING:
             return std::make_shared<DataTypeString>();
         case avro::Type::AVRO_BYTES:
-            return std::make_shared<DataTypeFloat32>();
+            return std::make_shared<DataTypeString>();
         case avro::Type::AVRO_ENUM:
         {
             if (node->names() < 128)
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index d8ca1c124ae..5b3d18c66f6 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -402,14 +402,10 @@ void TCPHandler::runImpl()
                     {
                         auto callback = [this]()
                         {
-                            {
-                                std::lock_guard task_callback_lock(task_callback_mutex);
+                            std::scoped_lock lock(task_callback_mutex, fatal_error_mutex);
 
-                                if (isQueryCancelled())
-                                    return true;
-                            }
-
-                            std::lock_guard lock(fatal_error_mutex);
+                            if (isQueryCancelled())
+                                return true;
 
                             sendProgress();
                             sendSelectProfileEvents();
@@ -424,6 +420,9 @@ void TCPHandler::runImpl()
                 }
 
                 state.io.onFinish();
+
+                std::lock_guard lock(task_callback_mutex);
+
                 /// Send final progress after calling onFinish(), since it will update the progress.
                 ///
                 /// NOTE: we cannot send Progress for regular INSERT (with VALUES)
@@ -446,8 +445,11 @@ void TCPHandler::runImpl()
             if (state.is_connection_closed)
                 break;
 
-            sendLogs();
-            sendEndOfStream();
+            {
+                std::lock_guard lock(task_callback_mutex);
+                sendLogs();
+                sendEndOfStream();
+            }
 
             /// QueryState should be cleared before QueryScope, since otherwise
             /// the MemoryTracker will be wrong for possible deallocations.
@@ -760,6 +762,9 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
         }
     }
 
+    /// Defer locking to cover a part of the scope below and everything after it
+    std::unique_lock progress_lock(task_callback_mutex, std::defer_lock);
+
     {
         PullingAsyncPipelineExecutor executor(pipeline);
         CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread};
@@ -796,6 +801,11 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
             }
         }
 
+        /// This lock wasn't acquired before and we make .lock() call here
+        /// so everything under this line is covered even together
+        /// with sendProgress() out of the scope
+        progress_lock.lock();
+
         /** If data has run out, we will send the profiling data and total values to
           * the last zero block to be able to use
           * this information in the suffix output of stream.
diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index 39e91e19014..04fac6c3402 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -60,6 +60,7 @@ namespace ErrorCodes
     extern const int TOO_MANY_PARTITIONS;
     extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES;
     extern const int ARGUMENT_OUT_OF_BOUND;
+    extern const int LOGICAL_ERROR;
 }
 
 
@@ -365,18 +366,22 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
     const std::string & relative_path_,
     ConnectionPoolPtr pool_,
     ActionBlocker & monitor_blocker_,
-    BackgroundSchedulePool & bg_pool)
+    BackgroundSchedulePool & bg_pool,
+    bool initialize_from_disk)
     : storage(storage_)
     , pool(std::move(pool_))
     , disk(disk_)
     , relative_path(relative_path_)
     , path(fs::path(disk->getPath()) / relative_path / "")
+    , broken_relative_path(fs::path(relative_path) / "broken")
+    , broken_path(fs::path(path) / "broken" / "")
     , should_batch_inserts(storage.getDistributedSettingsRef().monitor_batch_inserts)
     , split_batch_on_failure(storage.getDistributedSettingsRef().monitor_split_batch_on_failure)
     , dir_fsync(storage.getDistributedSettingsRef().fsync_directories)
     , min_batched_block_size_rows(storage.getContext()->getSettingsRef().min_insert_block_size_rows)
     , min_batched_block_size_bytes(storage.getContext()->getSettingsRef().min_insert_block_size_bytes)
     , current_batch_file_path(path + "current_batch.txt")
+    , pending_files(std::numeric_limits<size_t>::max())
     , default_sleep_time(storage.getDistributedSettingsRef().monitor_sleep_time_ms.totalMilliseconds())
     , sleep_time(default_sleep_time)
     , max_sleep_time(storage.getDistributedSettingsRef().monitor_max_sleep_time_ms.totalMilliseconds())
@@ -385,6 +390,11 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
     , metric_pending_files(CurrentMetrics::DistributedFilesToInsert, 0)
     , metric_broken_files(CurrentMetrics::BrokenDistributedFilesToInsert, 0)
 {
+    fs::create_directory(broken_path);
+
+    if (initialize_from_disk)
+        initializeFilesFromDisk();
+
     task_handle = bg_pool.createTask(getLoggerName() + "/Bg", [this]{ run(); });
     task_handle->activateAndSchedule();
 }
@@ -392,35 +402,29 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
 
 StorageDistributedDirectoryMonitor::~StorageDistributedDirectoryMonitor()
 {
-    if (!quit)
+    if (!pending_files.isFinished())
     {
-        quit = true;
+        pending_files.clearAndFinish();
         task_handle->deactivate();
     }
 }
 
 void StorageDistributedDirectoryMonitor::flushAllData()
 {
-    if (quit)
+    if (pending_files.isFinished())
         return;
 
     std::lock_guard lock{mutex};
-
-    const auto & files = getFiles();
-    if (!files.empty())
-    {
-        processFiles(files);
-
-        /// Update counters.
-        getFiles();
-    }
+    if (!hasPendingFiles())
+        return;
+    processFiles();
 }
 
 void StorageDistributedDirectoryMonitor::shutdownAndDropAllData()
 {
-    if (!quit)
+    if (!pending_files.isFinished())
     {
-        quit = true;
+        pending_files.clearAndFinish();
         task_handle->deactivate();
     }
 
@@ -434,19 +438,21 @@ void StorageDistributedDirectoryMonitor::run()
     std::lock_guard lock{mutex};
 
     bool do_sleep = false;
-    while (!quit)
+    while (!pending_files.isFinished())
     {
         do_sleep = true;
 
-        const auto & files = getFiles();
-        if (files.empty())
+        if (!hasPendingFiles())
             break;
 
         if (!monitor_blocker.isCancelled())
         {
             try
             {
-                do_sleep = !processFiles(files);
+                processFiles();
+                /// No errors while processing existing files.
+                /// Let's see maybe there are more files to process.
+                do_sleep = false;
 
                 std::lock_guard status_lock(status_mutex);
                 status.last_exception = std::exception_ptr{};
@@ -470,9 +476,7 @@ void StorageDistributedDirectoryMonitor::run()
             }
         }
         else
-        {
             LOG_DEBUG(log, "Skipping send data over distributed table.");
-        }
 
         const auto now = std::chrono::system_clock::now();
         if (now - last_decrease_time > decrease_error_count_period)
@@ -487,10 +491,7 @@ void StorageDistributedDirectoryMonitor::run()
             break;
     }
 
-    /// Update counters.
-    getFiles();
-
-    if (!quit && do_sleep)
+    if (!pending_files.isFinished() && do_sleep)
         task_handle->scheduleAfter(sleep_time.count());
 }
 
@@ -568,41 +569,83 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
         settings.distributed_replica_error_cap);
 }
 
-
-std::map<UInt64, std::string> StorageDistributedDirectoryMonitor::getFiles()
+bool StorageDistributedDirectoryMonitor::hasPendingFiles() const
 {
-    std::map<UInt64, std::string> files;
+    return fs::exists(current_batch_file_path) || !current_batch_file.empty() || !pending_files.empty();
+}
+
+void StorageDistributedDirectoryMonitor::initializeFilesFromDisk()
+{
+    /// NOTE: This method does not requires to hold status_mutex, hence, no TSA
+    /// annotations in the header file.
 
     fs::directory_iterator end;
-    for (fs::directory_iterator it{path}; it != end; ++it)
+
+    /// Initialize pending files
     {
-        const auto & file_path_str = it->path();
-        if (!it->is_directory() && startsWith(fs::path(file_path_str).extension(), ".bin"))
+        size_t bytes_count = 0;
+
+        for (fs::directory_iterator it{path}; it != end; ++it)
         {
-            files[parse<UInt64>(fs::path(file_path_str).stem())] = file_path_str;
+            const auto & file_path = it->path();
+            const auto & base_name = file_path.stem().string();
+            if (!it->is_directory() && startsWith(fs::path(file_path).extension(), ".bin") && parse<UInt64>(base_name))
+            {
+                const std::string & file_path_str = file_path.string();
+                if (!pending_files.push(file_path_str))
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add pending file");
+                bytes_count += fs::file_size(file_path);
+            }
+            else if (base_name != "tmp" && base_name != "broken")
+            {
+                /// It is OK to log current_batch.txt here too (useful for debugging).
+                LOG_WARNING(log, "Unexpected file {} in {}", file_path.string(), path);
+            }
         }
+
+        LOG_TRACE(log, "Files set to {}", pending_files.size());
+        LOG_TRACE(log, "Bytes set to {}", bytes_count);
+
+        metric_pending_files.changeTo(pending_files.size());
+        status.files_count = pending_files.size();
+        status.bytes_count = bytes_count;
     }
 
-    return files;
+    /// Initialize broken files
+    {
+        size_t broken_bytes_count = 0;
+        size_t broken_files = 0;
+
+        for (fs::directory_iterator it{broken_path}; it != end; ++it)
+        {
+            const auto & file_path = it->path();
+            if (!it->is_directory() && startsWith(fs::path(file_path).extension(), ".bin") && parse<UInt64>(file_path.stem()))
+                broken_bytes_count += fs::file_size(file_path);
+            else
+                LOG_WARNING(log, "Unexpected file {} in {}", file_path.string(), broken_path);
+        }
+
+        LOG_TRACE(log, "Broken files set to {}", broken_files);
+        LOG_TRACE(log, "Broken bytes set to {}", broken_bytes_count);
+
+        metric_broken_files.changeTo(broken_files);
+        status.broken_files_count = broken_files;
+        status.broken_bytes_count = broken_bytes_count;
+    }
 }
-bool StorageDistributedDirectoryMonitor::processFiles(const std::map<UInt64, std::string> & files)
+void StorageDistributedDirectoryMonitor::processFiles()
 {
     if (should_batch_inserts)
-    {
-        processFilesWithBatching(files);
-    }
+        processFilesWithBatching();
     else
     {
-        for (const auto & file : files)
-        {
-            if (quit)
-                return true;
+        /// Process unprocessed file.
+        if (!current_batch_file.empty())
+            processFile(current_batch_file);
 
-            processFile(file.second);
-        }
+        while (pending_files.tryPop(current_batch_file))
+            processFile(current_batch_file);
     }
-
-    return true;
 }
 
 void StorageDistributedDirectoryMonitor::processFile(const std::string & file_path)
@@ -649,7 +692,11 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
             thread_trace_context->root_span.addAttribute(std::current_exception());
 
         e.addMessage(fmt::format("While sending {}", file_path));
-        maybeMarkAsBroken(file_path, e);
+        if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
+        {
+            markAsBroken(file_path);
+            current_batch_file.clear();
+        }
         throw;
     }
     catch (...)
@@ -662,6 +709,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
 
     auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
     markAsSend(file_path);
+    current_batch_file.clear();
     LOG_TRACE(log, "Finished processing `{}` (took {} ms)", file_path, watch.elapsedMilliseconds());
 }
 
@@ -701,23 +749,19 @@ struct StorageDistributedDirectoryMonitor::BatchHeader
 
 struct StorageDistributedDirectoryMonitor::Batch
 {
-    std::vector<UInt64> file_indices;
     size_t total_rows = 0;
     size_t total_bytes = 0;
     bool recovered = false;
 
     StorageDistributedDirectoryMonitor & parent;
-    const std::map<UInt64, String> & file_index_to_path;
+    std::vector<std::string> files;
 
     bool split_batch_on_failure = true;
     bool fsync = false;
     bool dir_fsync = false;
 
-    Batch(
-        StorageDistributedDirectoryMonitor & parent_,
-        const std::map<UInt64, String> & file_index_to_path_)
+    explicit Batch(StorageDistributedDirectoryMonitor & parent_)
         : parent(parent_)
-        , file_index_to_path(file_index_to_path_)
         , split_batch_on_failure(parent.split_batch_on_failure)
         , fsync(parent.storage.getDistributedSettingsRef().fsync_after_insert)
         , dir_fsync(parent.dir_fsync)
@@ -732,7 +776,7 @@ struct StorageDistributedDirectoryMonitor::Batch
 
     void send()
     {
-        if (file_indices.empty())
+        if (files.empty())
             return;
 
         CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
@@ -775,7 +819,7 @@ struct StorageDistributedDirectoryMonitor::Batch
             }
             catch (const Exception & e)
             {
-                if (split_batch_on_failure && file_indices.size() > 1 && isSplittableErrorCode(e.code(), e.isRemoteException()))
+                if (split_batch_on_failure && files.size() > 1 && isSplittableErrorCode(e.code(), e.isRemoteException()))
                 {
                     tryLogCurrentException(parent.log, "Trying to split batch due to");
                     sendSeparateFiles();
@@ -795,44 +839,28 @@ struct StorageDistributedDirectoryMonitor::Batch
             }
             else
             {
-                std::vector<std::string> files;
-                for (const auto && file_info : file_index_to_path | boost::adaptors::indexed())
-                {
-                    if (file_info.index() > 8)
-                    {
-                        files.push_back("...");
-                        break;
-                    }
-
-                    files.push_back(file_info.value().second);
-                }
-                e.addMessage(fmt::format("While sending batch, nums: {}, files: {}", file_index_to_path.size(), fmt::join(files, "\n")));
-
+                e.addMessage(fmt::format("While sending a batch of {} files, files: {}", files.size(), fmt::join(files, "\n")));
                 throw;
             }
         }
 
         if (!batch_broken)
         {
-            LOG_TRACE(parent.log, "Sent a batch of {} files (took {} ms).", file_indices.size(), watch.elapsedMilliseconds());
+            LOG_TRACE(parent.log, "Sent a batch of {} files (took {} ms).", files.size(), watch.elapsedMilliseconds());
 
             auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, parent.disk, parent.relative_path);
-            for (UInt64 file_index : file_indices)
-                parent.markAsSend(file_index_to_path.at(file_index));
+            for (const auto & file : files)
+                parent.markAsSend(file);
         }
         else if (!batch_marked_as_broken)
         {
-            LOG_ERROR(parent.log, "Marking a batch of {} files as broken.", file_indices.size());
+            LOG_ERROR(parent.log, "Marking a batch of {} files as broken, files: {}", files.size(), fmt::join(files, "\n"));
 
-            for (UInt64 file_idx : file_indices)
-            {
-                auto file_path = file_index_to_path.find(file_idx);
-                if (file_path != file_index_to_path.end())
-                    parent.markAsBroken(file_path->second);
-            }
+            for (const auto & file : files)
+                parent.markAsBroken(file);
         }
 
-        file_indices.clear();
+        files.clear();
         total_rows = 0;
         total_bytes = 0;
         recovered = false;
@@ -842,8 +870,11 @@ struct StorageDistributedDirectoryMonitor::Batch
 
     void writeText(WriteBuffer & out)
     {
-        for (UInt64 file_idx : file_indices)
-            out << file_idx << '\n';
+        for (const auto & file : files)
+        {
+            UInt64 file_index = parse<UInt64>(fs::path(file).stem());
+            out << file_index << '\n';
+        }
     }
 
     void readText(ReadBuffer & in)
@@ -852,8 +883,9 @@ struct StorageDistributedDirectoryMonitor::Batch
         {
             UInt64 idx;
             in >> idx >> "\n";
-            file_indices.push_back(idx);
+            files.push_back(fmt::format("{}/{}.bin", parent.path, idx));
         }
+
         recovered = true;
     }
 
@@ -865,14 +897,9 @@ private:
 
         IConnectionPool::Entry connection;
 
-        for (UInt64 file_idx : file_indices)
+        for (const auto & file : files)
         {
-            auto file_path = file_index_to_path.find(file_idx);
-            if (file_path == file_index_to_path.end())
-                throw Exception(ErrorCodes::DISTRIBUTED_BROKEN_BATCH_INFO,
-                    "Failed to send batch: file with index {} is absent", file_idx);
-
-            ReadBufferFromFile in(file_path->second);
+            ReadBufferFromFile in(file);
             const auto & distributed_header = readDistributedHeader(in, parent.log);
 
             OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
@@ -886,7 +913,7 @@ private:
                 compression_expected = connection->getCompression() == Protocol::Compression::Enable;
 
                 LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).",
-                    file_indices.size(),
+                    files.size(),
                     connection->getDescription(),
                     formatReadableQuantity(total_rows),
                     formatReadableSizeWithBinarySuffix(total_bytes));
@@ -907,19 +934,11 @@ private:
     {
         size_t broken_files = 0;
 
-        for (UInt64 file_idx : file_indices)
+        for (const auto & file : files)
         {
-            auto file_path = file_index_to_path.find(file_idx);
-            if (file_path == file_index_to_path.end())
-            {
-                LOG_ERROR(parent.log, "Failed to send one file from batch: file with index {} is absent", file_idx);
-                ++broken_files;
-                continue;
-            }
-
             try
             {
-                ReadBufferFromFile in(file_path->second);
+                ReadBufferFromFile in(file);
                 const auto & distributed_header = readDistributedHeader(in, parent.log);
 
                 // this function is called in a separated thread, so we set up the trace context from the file
@@ -941,9 +960,11 @@ private:
             }
             catch (Exception & e)
             {
-                e.addMessage(fmt::format("While sending {}", file_path->second));
-                parent.maybeMarkAsBroken(file_path->second, e);
-                ++broken_files;
+                if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
+                {
+                    parent.markAsBroken(file);
+                    ++broken_files;
+                }
             }
         }
 
@@ -1023,13 +1044,18 @@ std::shared_ptr<ISource> StorageDistributedDirectoryMonitor::createSourceFromFil
     return std::make_shared<DirectoryMonitorSource>(file_name);
 }
 
-bool StorageDistributedDirectoryMonitor::addAndSchedule(size_t file_size, size_t ms)
+bool StorageDistributedDirectoryMonitor::addAndSchedule(const std::string & file_path, size_t file_size, size_t ms)
 {
-    if (quit)
+    /// NOTE: It is better not to throw in this case, since the file is already
+    /// on disk (see DistributedSink), and it will be processed next time.
+    if (pending_files.isFinished())
         return false;
 
+    if (!pending_files.push(file_path))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add pending file");
+
     {
-        std::lock_guard status_lock(status_mutex);
+        std::lock_guard lock(status_mutex);
         metric_pending_files.add();
         status.bytes_count += file_size;
         ++status.files_count;
@@ -1045,33 +1071,25 @@ StorageDistributedDirectoryMonitor::Status StorageDistributedDirectoryMonitor::g
     return current_status;
 }
 
-void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map<UInt64, std::string> & files)
+void StorageDistributedDirectoryMonitor::processFilesWithBatching()
 {
-    std::unordered_set<UInt64> file_indices_to_skip;
-
+    /// Possibly, we failed to send a batch on the previous iteration. Try to send exactly the same batch.
     if (fs::exists(current_batch_file_path))
     {
-        /// Possibly, we failed to send a batch on the previous iteration. Try to send exactly the same batch.
-        Batch batch(*this, files);
+        Batch batch(*this);
         ReadBufferFromFile in{current_batch_file_path};
         batch.readText(in);
-        file_indices_to_skip.insert(batch.file_indices.begin(), batch.file_indices.end());
         batch.send();
+
+        auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
+        fs::remove(current_batch_file_path);
     }
 
     std::unordered_map<BatchHeader, Batch, BatchHeader::Hash> header_to_batch;
 
-    for (const auto & file : files)
+    std::string file_path;
+    while (pending_files.tryPop(file_path))
     {
-        if (quit)
-            return;
-
-        UInt64 file_idx = file.first;
-        const String & file_path = file.second;
-
-        if (file_indices_to_skip.contains(file_idx))
-            continue;
-
         size_t total_rows = 0;
         size_t total_bytes = 0;
         Block header;
@@ -1110,8 +1128,9 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
         }
         catch (const Exception & e)
         {
-            if (maybeMarkAsBroken(file_path, e))
+            if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
             {
+                markAsBroken(file_path);
                 tryLogCurrentException(log, "File is marked broken due to");
                 continue;
             }
@@ -1125,9 +1144,9 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
             std::move(distributed_header.client_info),
             std::move(header)
         );
-        Batch & batch = header_to_batch.try_emplace(batch_header, *this, files).first->second;
+        Batch & batch = header_to_batch.try_emplace(batch_header, *this).first->second;
 
-        batch.file_indices.push_back(file_idx);
+        batch.files.push_back(file_path);
         batch.total_rows += total_rows;
         batch.total_bytes += total_bytes;
 
@@ -1155,16 +1174,10 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
 
 void StorageDistributedDirectoryMonitor::markAsBroken(const std::string & file_path)
 {
-    const auto last_path_separator_pos = file_path.rfind('/');
-    const auto & base_path = file_path.substr(0, last_path_separator_pos + 1);
-    const auto & file_name = file_path.substr(last_path_separator_pos + 1);
-    const String & broken_path = fs::path(base_path) / "broken/";
-    const String & broken_file_path = fs::path(broken_path) / file_name;
-
-    fs::create_directory(broken_path);
+    const String & broken_file_path = fs::path(broken_path) / fs::path(file_path).filename();
 
     auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
-    auto broken_dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, fs::path(relative_path) / "broken/");
+    auto broken_dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, broken_relative_path);
 
     {
         std::lock_guard status_lock(status_mutex);
@@ -1198,21 +1211,9 @@ void StorageDistributedDirectoryMonitor::markAsSend(const std::string & file_pat
     fs::remove(file_path);
 }
 
-bool StorageDistributedDirectoryMonitor::maybeMarkAsBroken(const std::string & file_path, const Exception & e)
-{
-    /// Mark file as broken if necessary.
-    if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
-    {
-        markAsBroken(file_path);
-        return true;
-    }
-    else
-        return false;
-}
-
 std::string StorageDistributedDirectoryMonitor::getLoggerName() const
 {
-    return storage.getStorageID().getFullTableName() + ".DirectoryMonitor";
+    return storage.getStorageID().getFullTableName() + ".DirectoryMonitor." + disk->getName();
 }
 
 void StorageDistributedDirectoryMonitor::updatePath(const std::string & new_relative_path)
diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h
index 7015fca0311..99e949ddcff 100644
--- a/src/Storages/Distributed/DirectoryMonitor.h
+++ b/src/Storages/Distributed/DirectoryMonitor.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Core/BackgroundSchedulePool.h>
+#include <Common/ConcurrentBoundedQueue.h>
 #include <Client/ConnectionPool.h>
 
 #include <atomic>
@@ -38,7 +39,8 @@ public:
         const std::string & relative_path_,
         ConnectionPoolPtr pool_,
         ActionBlocker & monitor_blocker_,
-        BackgroundSchedulePool & bg_pool);
+        BackgroundSchedulePool & bg_pool,
+        bool initialize_from_disk);
 
     ~StorageDistributedDirectoryMonitor();
 
@@ -53,7 +55,7 @@ public:
     static std::shared_ptr<ISource> createSourceFromFile(const String & file_name);
 
     /// For scheduling via DistributedSink.
-    bool addAndSchedule(size_t file_size, size_t ms);
+    bool addAndSchedule(const std::string & file_path, size_t file_size, size_t ms);
 
     struct InternalStatus
     {
@@ -78,14 +80,15 @@ public:
 private:
     void run();
 
-    std::map<UInt64, std::string> getFiles();
-    bool processFiles(const std::map<UInt64, std::string> & files);
+    bool hasPendingFiles() const;
+
+    void initializeFilesFromDisk();
+    void processFiles();
     void processFile(const std::string & file_path);
-    void processFilesWithBatching(const std::map<UInt64, std::string> & files);
+    void processFilesWithBatching();
 
     void markAsBroken(const std::string & file_path);
     void markAsSend(const std::string & file_path);
-    bool maybeMarkAsBroken(const std::string & file_path, const Exception & e);
 
     std::string getLoggerName() const;
 
@@ -95,25 +98,33 @@ private:
     DiskPtr disk;
     std::string relative_path;
     std::string path;
+    std::string broken_relative_path;
+    std::string broken_path;
 
     const bool should_batch_inserts = false;
     const bool split_batch_on_failure = true;
     const bool dir_fsync = false;
     const size_t min_batched_block_size_rows = 0;
     const size_t min_batched_block_size_bytes = 0;
-    String current_batch_file_path;
+
+    /// This is pending data (due to some error) for should_batch_inserts==true
+    std::string current_batch_file_path;
+    /// This is pending data (due to some error) for should_batch_inserts==false
+    std::string current_batch_file;
 
     struct BatchHeader;
     struct Batch;
 
     std::mutex status_mutex;
+
     InternalStatus status;
 
+    ConcurrentBoundedQueue<std::string> pending_files;
+
     const std::chrono::milliseconds default_sleep_time;
     std::chrono::milliseconds sleep_time;
     const std::chrono::milliseconds max_sleep_time;
     std::chrono::time_point<std::chrono::system_clock> last_decrease_time {std::chrono::system_clock::now()};
-    std::atomic<bool> quit {false};
     std::mutex mutex;
     Poco::Logger * log;
     ActionBlocker & monitor_blocker;
diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp
index 38ff06f4744..8cee3e9ee91 100644
--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@@ -724,6 +724,9 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
         return guard;
     };
 
+    std::vector<std::string> bin_files;
+    bin_files.reserve(dir_names.size());
+
     auto it = dir_names.begin();
     /// on first iteration write block to a temporary directory for subsequent
     /// hardlinking to ensure the inode is not freed until we're done
@@ -802,8 +805,8 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
         }
 
         // Create hardlink here to reuse increment number
-        const std::string block_file_path(fs::path(path) / file_name);
-        createHardLink(first_file_tmp_path, block_file_path);
+        bin_files.push_back(fs::path(path) / file_name);
+        createHardLink(first_file_tmp_path, bin_files.back());
         auto dir_sync_guard = make_directory_sync_guard(*it);
     }
     ++it;
@@ -814,8 +817,8 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
         const std::string path(fs::path(disk_path) / (data_path + *it));
         fs::create_directory(path);
 
-        const std::string block_file_path(fs::path(path) / (toString(storage.file_names_increment.get()) + ".bin"));
-        createHardLink(first_file_tmp_path, block_file_path);
+        bin_files.push_back(fs::path(path) / (toString(storage.file_names_increment.get()) + ".bin"));
+        createHardLink(first_file_tmp_path, bin_files.back());
         auto dir_sync_guard = make_directory_sync_guard(*it);
     }
 
@@ -826,10 +829,13 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
 
     /// Notify
     auto sleep_ms = context->getSettingsRef().distributed_directory_monitor_sleep_time_ms;
-    for (const auto & dir_name : dir_names)
+    for (size_t i = 0; i < dir_names.size(); ++i)
     {
+        const auto & dir_name = dir_names[i];
+        const auto & bin_file = bin_files[i];
+
         auto & directory_monitor = storage.requireDirectoryMonitor(disk, dir_name, /* startup= */ false);
-        directory_monitor.addAndSchedule(file_size, sleep_ms.totalMilliseconds());
+        directory_monitor.addAndSchedule(bin_file, file_size, sleep_ms.totalMilliseconds());
     }
 }
 
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index 66f91aa6cd2..1f5c688cf53 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -651,7 +651,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
             }
 #endif
 
-            LOG_WARNING(log, fmt::runtime(e.message() + " Will retry fetching part without zero-copy."));
+            LOG_WARNING(log, "Will retry fetching part without zero-copy: {}", e.message());
 
             /// It's important to release session from HTTP pool. Otherwise it's possible to get deadlock
             /// on http pool.
diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
index d5627774052..97226825629 100644
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@@ -109,10 +109,11 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
             /// 2. We have some larger merged part which covers new_part_name (and therefore it covers source_part_name too)
             /// 3. We have two intersecting parts, both cover source_part_name. It's logical error.
             /// TODO Why 1 and 2 can happen? Do we need more assertions here or somewhere else?
-            constexpr const char * message = "Part {} is covered by {} but should be merged into {}. This shouldn't happen often.";
-            LOG_WARNING(log, fmt::runtime(message), source_part_name, source_part_or_covering->name, entry.new_part_name);
+            constexpr auto fmt_string = "Part {} is covered by {} but should be merged into {}. This shouldn't happen often.";
+            String message;
+            LOG_WARNING(LogToStr(message, log), fmt_string, source_part_name, source_part_or_covering->name, entry.new_part_name);
             if (!source_part_or_covering->info.contains(MergeTreePartInfo::fromPartName(entry.new_part_name, storage.format_version)))
-                throw Exception(ErrorCodes::LOGICAL_ERROR, message, source_part_name, source_part_or_covering->name, entry.new_part_name);
+                throw Exception(ErrorCodes::LOGICAL_ERROR, message);
 
             return PrepareResult{
                 .prepared_successfully = false,
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 2a341b6f1de..b1384dc799f 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -18,6 +18,7 @@
 #include <Functions/IFunction.h>
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromString.h>
+#include <IO/S3Common.h>
 #include <Interpreters/Aggregator.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/PartLog.h>
@@ -155,7 +156,10 @@ namespace ErrorCodes
     extern const int INCORRECT_QUERY;
     extern const int CANNOT_RESTORE_TABLE;
     extern const int ZERO_COPY_REPLICATION_ERROR;
+    extern const int NOT_INITIALIZED;
     extern const int SERIALIZATION_ERROR;
+    extern const int NETWORK_ERROR;
+    extern const int SOCKET_TIMEOUT;
 }
 
 
@@ -947,13 +951,313 @@ Int64 MergeTreeData::getMaxBlockNumber() const
     return max_block_num;
 }
 
-void MergeTreeData::loadDataPartsFromDisk(
-    MutableDataPartsVector & broken_parts_to_detach,
-    MutableDataPartsVector & duplicate_parts_to_remove,
+void MergeTreeData::PartLoadingTree::add(const MergeTreePartInfo & info, const String & name, const DiskPtr & disk)
+{
+    auto & current_ptr = root_by_partition[info.partition_id];
+    if (!current_ptr)
+        current_ptr = std::make_shared<Node>(MergeTreePartInfo{}, "", disk);
+
+    auto * current = current_ptr.get();
+    while (true)
+    {
+        auto it = current->children.lower_bound(info);
+        if (it != current->children.begin())
+        {
+            auto prev = std::prev(it);
+            const auto & prev_info = prev->first;
+
+            if (prev_info.contains(info))
+            {
+                current = prev->second.get();
+                continue;
+            }
+            else if (!prev_info.isDisjoint(info))
+            {
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                    "Part {} intersects previous part {}. It is a bug!",
+                    name, prev->second->name);
+            }
+        }
+
+        if (it != current->children.end())
+        {
+            const auto & next_info = it->first;
+
+            if (next_info.contains(info))
+            {
+                current = it->second.get();
+                continue;
+            }
+            else if (!next_info.isDisjoint(info))
+            {
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                    "Part {} intersects next part {}. It is a bug!",
+                    name, it->second->name);
+            }
+        }
+
+        current->children.emplace(info, std::make_shared<Node>(info, name, disk));
+        break;
+    }
+}
+
+template <typename Func>
+void MergeTreeData::PartLoadingTree::traverse(bool recursive, Func && func)
+{
+    std::function<void(const NodePtr &)> traverse_impl = [&](const auto & node)
+    {
+        func(node);
+        if (recursive)
+            for (const auto & [_, child] : node->children)
+                traverse_impl(child);
+    };
+
+    for (const auto & elem : root_by_partition)
+        for (const auto & [_, node] : elem.second->children)
+            traverse_impl(node);
+}
+
+MergeTreeData::PartLoadingTree
+MergeTreeData::PartLoadingTree::build(PartLoadingInfos nodes)
+{
+    std::sort(nodes.begin(), nodes.end(), [](const auto & lhs, const auto & rhs)
+    {
+        return std::tie(lhs.info.level, lhs.info.mutation) > std::tie(rhs.info.level, rhs.info.mutation);
+    });
+
+    PartLoadingTree tree;
+    for (const auto & [info, name, disk] : nodes)
+        tree.add(info, name, disk);
+    return tree;
+}
+
+static std::optional<size_t> calculatePartSizeSafe(
+    const MergeTreeData::DataPartPtr & part, Poco::Logger * log)
+{
+    try
+    {
+        return part->getDataPartStorage().calculateTotalSizeOnDisk();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log, fmt::format("while calculating part size {} on path {}",
+            part->name, part->getDataPartStorage().getRelativePath()));
+        return {};
+    }
+}
+
+static void preparePartForRemoval(const MergeTreeMutableDataPartPtr & part)
+{
+    part->remove_time.store(part->modification_time, std::memory_order_relaxed);
+    auto creation_csn = part->version.creation_csn.load(std::memory_order_relaxed);
+    if (creation_csn != Tx::RolledBackCSN && creation_csn != Tx::PrehistoricCSN && !part->version.isRemovalTIDLocked())
+    {
+        /// It's possible that covering part was created without transaction,
+        /// but if covered part was created with transaction (i.e. creation_tid is not prehistoric),
+        /// then it must have removal tid in metadata file.
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Data part {} is Outdated and has creation TID {} and CSN {}, "
+                        "but does not have removal tid. It's a bug or a result of manual intervention.",
+                        part->name, part->version.creation_tid, creation_csn);
+    }
+
+    /// Explicitly set removal_tid_lock for parts w/o transaction (i.e. w/o txn_version.txt)
+    /// to avoid keeping part forever (see VersionMetadata::canBeRemoved())
+    if (!part->version.isRemovalTIDLocked())
+    {
+        TransactionInfoContext transaction_context{part->storage.getStorageID(), part->name};
+        part->version.lockRemovalTID(Tx::PrehistoricTID, transaction_context);
+    }
+}
+
+static bool isRetryableException(const Exception & e)
+{
+    if (isNotEnoughMemoryErrorCode(e.code()))
+        return true;
+
+    if (e.code() == ErrorCodes::NETWORK_ERROR || e.code() == ErrorCodes::SOCKET_TIMEOUT)
+        return true;
+
+#if USE_AWS_S3
+    const auto * s3_exception = dynamic_cast<const S3Exception *>(&e);
+    if (s3_exception && s3_exception->isRetryableError())
+        return true;
+#endif
+
+    /// In fact, there can be other similar situations.
+    /// But it is OK, because there is a safety guard against deleting too many parts.
+    return false;
+}
+
+MergeTreeData::LoadPartResult MergeTreeData::loadDataPart(
+    const MergeTreePartInfo & part_info,
+    const String & part_name,
+    const DiskPtr & part_disk_ptr,
+    MergeTreeDataPartState to_state,
+    std::mutex & part_loading_mutex)
+{
+    LOG_TRACE(log, "Loading {} part {} from disk {}", magic_enum::enum_name(to_state), part_name, part_disk_ptr->getName());
+
+    LoadPartResult res;
+    auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + part_name, part_disk_ptr, 0);
+    auto data_part_storage = std::make_shared<DataPartStorageOnDisk>(single_disk_volume, relative_data_path, part_name);
+
+    res.part = createPart(part_name, part_info, data_part_storage);
+
+    String part_path = fs::path(relative_data_path) / part_name;
+    String marker_path = fs::path(part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME;
+
+    if (part_disk_ptr->exists(marker_path))
+    {
+        /// NOTE: getBytesOnDisk() cannot be used here, since it may be zero if checksums.txt does not exist.
+        res.size_of_part = calculatePartSizeSafe(res.part, log);
+        res.is_broken = true;
+
+        auto part_size_str = res.size_of_part ? formatReadableSizeWithBinarySuffix(*res.size_of_part) : "failed to calculate size";
+
+        LOG_WARNING(log,
+            "Detaching stale part {} (size: {}), which should have been deleted after a move. "
+            "That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part.",
+            res.part->getDataPartStorage().getFullPath(), part_size_str);
+
+        return res;
+    }
+
+    try
+    {
+        res.part->loadColumnsChecksumsIndexes(require_part_metadata, true);
+    }
+    catch (const Exception & e)
+    {
+        /// Don't count the part as broken if there was a retryalbe error
+        /// during loading, such as "not enough memory" or network error.
+        if (isRetryableException(e))
+            throw;
+
+        res.is_broken = true;
+        tryLogCurrentException(log, fmt::format("while loading part {} on path {}", res.part->name, part_path));
+    }
+    catch (...)
+    {
+        res.is_broken = true;
+        tryLogCurrentException(log, fmt::format("while loading part {} on path {}", res.part->name, part_path));
+    }
+
+    /// Ignore broken parts that can appear as a result of hard server restart.
+    if (res.is_broken)
+    {
+        res.size_of_part = calculatePartSizeSafe(res.part, log);
+        auto part_size_str = res.size_of_part ? formatReadableSizeWithBinarySuffix(*res.size_of_part) : "failed to calculate size";
+
+        LOG_ERROR(log,
+            "Detaching broken part {}{} (size: {}). "
+            "If it happened after update, it is likely because of backward incompatibility. "
+            "You need to resolve this manually",
+            getFullPathOnDisk(part_disk_ptr), part_name, part_size_str);
+
+        return res;
+    }
+
+    res.part->modification_time = part_disk_ptr->getLastModified(fs::path(relative_data_path) / part_name).epochTime();
+    res.part->loadVersionMetadata();
+
+    if (res.part->wasInvolvedInTransaction())
+    {
+        /// Check if CSNs were written after committing transaction, update and write if needed.
+        bool version_updated = false;
+        auto & version = res.part->version;
+        chassert(!version.creation_tid.isEmpty());
+
+        if (!res.part->version.creation_csn)
+        {
+            auto min = TransactionLog::getCSN(res.part->version.creation_tid);
+            if (!min)
+            {
+                /// Transaction that created this part was not committed. Remove part.
+                TransactionLog::assertTIDIsNotOutdated(res.part->version.creation_tid);
+                min = Tx::RolledBackCSN;
+            }
+
+            LOG_TRACE(log, "Will fix version metadata of {} after unclean restart: part has creation_tid={}, setting creation_csn={}",
+                        res.part->name, res.part->version.creation_tid, min);
+
+            version.creation_csn = min;
+            version_updated = true;
+        }
+
+        if (!version.removal_tid.isEmpty() && !version.removal_csn)
+        {
+            auto max = TransactionLog::getCSN(version.removal_tid);
+            if (max)
+            {
+                LOG_TRACE(log, "Will fix version metadata of {} after unclean restart: part has removal_tid={}, setting removal_csn={}",
+                            res.part->name, version.removal_tid, max);
+                version.removal_csn = max;
+            }
+            else
+            {
+                TransactionLog::assertTIDIsNotOutdated(version.removal_tid);
+                /// Transaction that tried to remove this part was not committed. Clear removal_tid.
+                LOG_TRACE(log, "Will fix version metadata of {} after unclean restart: clearing removal_tid={}",
+                            res.part->name, version.removal_tid);
+                version.unlockRemovalTID(version.removal_tid, TransactionInfoContext{getStorageID(), res.part->name});
+            }
+
+            version_updated = true;
+        }
+
+        /// Sanity checks
+        bool csn_order = !version.removal_csn || version.creation_csn <= version.removal_csn || version.removal_csn == Tx::PrehistoricCSN;
+        bool min_start_csn_order = version.creation_tid.start_csn <= version.creation_csn;
+        bool max_start_csn_order = version.removal_tid.start_csn <= version.removal_csn;
+        bool creation_csn_known = version.creation_csn;
+        if (!csn_order || !min_start_csn_order || !max_start_csn_order || !creation_csn_known)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} has invalid version metadata: {}", res.part->name, version.toString());
+
+        if (version_updated)
+            res.part->storeVersionMetadata(/* force */ true);
+
+        /// Deactivate part if creation was not committed or if removal was.
+        if (version.creation_csn == Tx::RolledBackCSN || version.removal_csn)
+        {
+            preparePartForRemoval(res.part);
+            to_state = DataPartState::Outdated;
+        }
+    }
+
+    res.part->setState(to_state);
+
+    DataPartIteratorByInfo it;
+    bool inserted;
+
+    {
+        std::lock_guard lock(part_loading_mutex);
+        std::tie(it, inserted) = data_parts_indexes.insert(res.part);
+    }
+
+    /// Remove duplicate parts with the same checksum.
+    if (!inserted)
+    {
+        if ((*it)->checksums.getTotalChecksumHex() == res.part->checksums.getTotalChecksumHex())
+        {
+            LOG_ERROR(log, "Remove duplicate part {}", data_part_storage->getFullPath());
+            res.part->is_duplicate = true;
+            return res;
+        }
+        else
+            throw Exception("Part " + res.part->name + " already exists but with different checksums", ErrorCodes::DUPLICATE_DATA_PART);
+    }
+
+    if (to_state == DataPartState::Active)
+        addPartContributionToDataVolume(res.part);
+
+    LOG_TRACE(log, "Finished loading {} part {} on disk {}", magic_enum::enum_name(to_state), part_name, part_disk_ptr->getName());
+    return res;
+};
+
+std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(
     ThreadPool & pool,
     size_t num_parts,
-    std::queue<std::vector<std::pair<String, DiskPtr>>> & parts_queue,
-    bool skip_sanity_checks,
+    std::queue<PartLoadingTreeNodes> & parts_queue,
     const MergeTreeSettingsPtr & settings)
 {
     /// Parallel loading of data parts.
@@ -967,9 +1271,10 @@ void MergeTreeData::loadDataPartsFromDisk(
 
     /// Prepare data parts for parallel loading. Threads will focus on given disk first, then steal
     /// others' tasks when finish current disk part loading process.
-    std::vector<std::vector<std::pair<String, DiskPtr>>> threads_parts(num_threads);
+    std::vector<PartLoadingTreeNodes> threads_parts(num_threads);
     std::set<size_t> remaining_thread_parts;
     std::queue<size_t> threads_queue;
+
     for (size_t i = 0; i < num_threads; ++i)
     {
         remaining_thread_parts.insert(i);
@@ -982,11 +1287,12 @@ void MergeTreeData::loadDataPartsFromDisk(
         size_t i = threads_queue.front();
         auto & need_parts = parts_per_thread[i];
         assert(need_parts > 0);
+
         auto & thread_parts = threads_parts[i];
         auto & current_parts = parts_queue.front();
         assert(!current_parts.empty());
-        auto parts_to_grab = std::min(need_parts, current_parts.size());
 
+        auto parts_to_grab = std::min(need_parts, current_parts.size());
         thread_parts.insert(thread_parts.end(), current_parts.end() - parts_to_grab, current_parts.end());
         current_parts.resize(current_parts.size() - parts_to_grab);
         need_parts -= parts_to_grab;
@@ -998,140 +1304,27 @@ void MergeTreeData::loadDataPartsFromDisk(
         /// If current disk still has some parts, push it to the tail.
         if (!current_parts.empty())
             parts_queue.push(std::move(current_parts));
+
         parts_queue.pop();
 
         /// If current thread still want some parts, push it to the tail.
         if (need_parts > 0)
             threads_queue.push(i);
+
         threads_queue.pop();
     }
+
     assert(threads_queue.empty());
-    assert(std::all_of(threads_parts.begin(), threads_parts.end(), [](const std::vector<std::pair<String, DiskPtr>> & parts)
+    assert(std::all_of(threads_parts.begin(), threads_parts.end(), [](const auto & parts)
     {
         return !parts.empty();
     }));
 
-    size_t suspicious_broken_parts = 0;
-    size_t suspicious_broken_parts_bytes = 0;
-    std::atomic<bool> has_adaptive_parts = false;
-    std::atomic<bool> has_non_adaptive_parts = false;
-    std::atomic<bool> has_lightweight_deletes_in_parts = false;
-
-    std::mutex mutex;
-    auto load_part = [&](const String & part_name, const DiskPtr & part_disk_ptr)
-    {
-        auto part_opt = MergeTreePartInfo::tryParsePartName(part_name, format_version);
-        if (!part_opt)
-            return;
-
-        const auto & part_info = *part_opt;
-        auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + part_name, part_disk_ptr, 0);
-        auto data_part_storage = std::make_shared<DataPartStorageOnDisk>(single_disk_volume, relative_data_path, part_name);
-        auto part = createPart(part_name, part_info, data_part_storage);
-        bool broken = false;
-
-        LOG_TRACE(log, "Loading part {} ({}) from disk {}", part_name, part->getType().toString(), part_disk_ptr->getName());
-
-        String part_path = fs::path(relative_data_path) / part_name;
-        String marker_path = fs::path(part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME;
-        if (part_disk_ptr->exists(marker_path))
-        {
-            /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist
-            size_t size_of_part = data_part_storage->calculateTotalSizeOnDisk();
-            LOG_WARNING(log,
-                "Detaching stale part {}{} (size: {}), which should have been deleted after a move. "
-                "That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part.",
-                getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part));
-            std::lock_guard loading_lock(mutex);
-            broken_parts_to_detach.push_back(part);
-            ++suspicious_broken_parts;
-            suspicious_broken_parts_bytes += size_of_part;
-            return;
-        }
-
-        try
-        {
-            part->loadColumnsChecksumsIndexes(require_part_metadata, true);
-        }
-        catch (const Exception & e)
-        {
-            /// Don't count the part as broken if there is not enough memory to load it.
-            /// In fact, there can be many similar situations.
-            /// But it is OK, because there is a safety guard against deleting too many parts.
-            if (isNotEnoughMemoryErrorCode(e.code()))
-                throw;
-
-            broken = true;
-            tryLogCurrentException(log, fmt::format("while loading part {} on path {}", part->name, part_path));
-        }
-        catch (...)
-        {
-            broken = true;
-            tryLogCurrentException(log, fmt::format("while loading part {} on path {}", part->name, part_path));
-        }
-
-        /// Ignore broken parts that can appear as a result of hard server restart.
-        if (broken)
-        {
-            std::optional<size_t> size_of_part;
-            try
-            {
-                /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist
-                size_of_part = data_part_storage->calculateTotalSizeOnDisk();
-            }
-            catch (...)
-            {
-                tryLogCurrentException(log, fmt::format("while calculating part size {} on path {}", part->name, part_path));
-            }
-
-            std::string part_size_str = "failed to calculate size";
-            if (size_of_part.has_value())
-                part_size_str = formatReadableSizeWithBinarySuffix(*size_of_part);
-
-            LOG_ERROR(log,
-                "Detaching broken part {}{} (size: {}). "
-                "If it happened after update, it is likely because of backward incompatibility. "
-                "You need to resolve this manually",
-                getFullPathOnDisk(part_disk_ptr), part_name, part_size_str);
-            std::lock_guard loading_lock(mutex);
-            broken_parts_to_detach.push_back(part);
-            ++suspicious_broken_parts;
-            if (size_of_part.has_value())
-                suspicious_broken_parts_bytes += *size_of_part;
-            return;
-        }
-        if (!part->index_granularity_info.mark_type.adaptive)
-            has_non_adaptive_parts.store(true, std::memory_order_relaxed);
-        else
-            has_adaptive_parts.store(true, std::memory_order_relaxed);
-
-        /// Check if there is lightweight delete in part
-        if (part->hasLightweightDelete())
-            has_lightweight_deletes_in_parts.store(true, std::memory_order_relaxed);
-
-        part->modification_time = part_disk_ptr->getLastModified(fs::path(relative_data_path) / part_name).epochTime();
-        /// Assume that all parts are Active, covered parts will be detected and marked as Outdated later
-        part->setState(DataPartState::Active);
-
-        std::lock_guard loading_lock(mutex);
-        auto [it, inserted] = data_parts_indexes.insert(part);
-        /// Remove duplicate parts with the same checksum.
-        if (!inserted)
-        {
-            if ((*it)->checksums.getTotalChecksumHex() == part->checksums.getTotalChecksumHex())
-            {
-                LOG_ERROR(log, "Remove duplicate part {}", data_part_storage->getFullPath());
-                duplicate_parts_to_remove.push_back(part);
-            }
-            else
-                throw Exception("Part " + part->name + " already exists but with different checksums", ErrorCodes::DUPLICATE_DATA_PART);
-        }
-
-        addPartContributionToDataVolume(part);
-        LOG_TRACE(log, "Finished part {} load on disk {}", part_name, part_disk_ptr->getName());
-    };
-
     std::mutex part_select_mutex;
+    std::mutex part_loading_mutex;
+
+    std::vector<LoadPartResult> loaded_parts;
+
     try
     {
         for (size_t thread = 0; thread < num_threads; ++thread)
@@ -1147,15 +1340,16 @@ void MergeTreeData::loadDataPartsFromDisk(
 
                 while (true)
                 {
-                    std::pair<String, DiskPtr> thread_part;
+                    PartLoadingTree::NodePtr thread_part;
+                    size_t thread_idx = thread;
+
                     {
-                        const std::lock_guard lock{part_select_mutex};
+                        std::lock_guard lock{part_select_mutex};
 
                         if (remaining_thread_parts.empty())
                             return;
 
                         /// Steal task if nothing to do
-                        auto thread_idx = thread;
                         if (threads_parts[thread].empty())
                         {
                             // Try random steal tasks from the next thread
@@ -1164,13 +1358,34 @@ void MergeTreeData::loadDataPartsFromDisk(
                             std::advance(it, distribution(thread_local_rng));
                             thread_idx = *it;
                         }
+
                         auto & thread_parts = threads_parts[thread_idx];
                         thread_part = thread_parts.back();
                         thread_parts.pop_back();
                         if (thread_parts.empty())
                             remaining_thread_parts.erase(thread_idx);
                     }
-                    load_part(thread_part.first, thread_part.second);
+
+                    /// Pass a separate mutex to guard the set of parts, because this lambda
+                    /// is called concurrently but with already locked @data_parts_mutex.
+                    auto res = loadDataPart(thread_part->info, thread_part->name, thread_part->disk, DataPartState::Active, part_loading_mutex);
+                    thread_part->is_loaded = true;
+
+                    bool is_active_part = res.part->getState() == DataPartState::Active;
+                    /// If part is broken or duplicate or should be removed according to transaction
+                    /// and it has any covered parts then try to load them to replace this part.
+                    if (!is_active_part && !thread_part->children.empty())
+                    {
+                        std::lock_guard lock{part_select_mutex};
+                        for (const auto & [_, node] : thread_part->children)
+                            threads_parts[thread].push_back(node);
+                        remaining_thread_parts.insert(thread);
+                    }
+
+                    {
+                        std::lock_guard lock(part_loading_mutex);
+                        loaded_parts.push_back(std::move(res));
+                    }
                 }
             });
         }
@@ -1183,57 +1398,42 @@ void MergeTreeData::loadDataPartsFromDisk(
     }
 
     pool.wait();
-
-    if (has_non_adaptive_parts && has_adaptive_parts && !settings->enable_mixed_granularity_parts)
-        throw Exception(
-            "Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled",
-            ErrorCodes::LOGICAL_ERROR);
-
-    has_non_adaptive_index_granularity_parts = has_non_adaptive_parts;
-
-    if (has_lightweight_deletes_in_parts)
-        has_lightweight_delete_parts.store(true);
-
-    if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks)
-        throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS,
-            "Suspiciously many ({} parts, {} in total) broken parts to remove while maximum allowed broken parts count is {}. You can change the maximum value "
-                        "with merge tree setting 'max_suspicious_broken_parts' in <merge_tree> configuration section or in table settings in .sql file "
-                        "(don't forget to return setting back to default value)",
-            suspicious_broken_parts, formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes), settings->max_suspicious_broken_parts);
-
-    if (suspicious_broken_parts_bytes > settings->max_suspicious_broken_parts_bytes && !skip_sanity_checks)
-        throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS,
-            "Suspiciously big size ({} parts, {} in total) of all broken parts to remove while maximum allowed broken parts size is {}. "
-            "You can change the maximum value with merge tree setting 'max_suspicious_broken_parts_bytes' in <merge_tree> configuration "
-            "section or in table settings in .sql file (don't forget to return setting back to default value)",
-            suspicious_broken_parts, formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes),
-            formatReadableSizeWithBinarySuffix(settings->max_suspicious_broken_parts_bytes));
+    return loaded_parts;
 }
 
 
-void MergeTreeData::loadDataPartsFromWAL(
-    MutableDataPartsVector & duplicate_parts_to_remove,
-    MutableDataPartsVector & parts_from_wal)
+void MergeTreeData::loadDataPartsFromWAL(MutableDataPartsVector & parts_from_wal)
 {
+    std::sort(parts_from_wal.begin(), parts_from_wal.end(), [](const auto & lhs, const auto & rhs)
+    {
+        return std::tie(lhs->info.level, lhs->info.mutation) > std::tie(rhs->info.level, rhs->info.mutation);
+    });
+
     for (auto & part : parts_from_wal)
     {
         part->modification_time = time(nullptr);
-        /// Assume that all parts are Active, covered parts will be detected and marked as Outdated later
-        part->setState(DataPartState::Active);
+        auto lo = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo{DataPartState::Active, part->info});
 
+        if (lo != data_parts_by_state_and_info.begin() && (*std::prev(lo))->info.contains(part->info))
+            continue;
+
+        if (lo != data_parts_by_state_and_info.end() && (*lo)->info.contains(part->info))
+            continue;
+
+        part->setState(DataPartState::Active);
         auto [it, inserted] = data_parts_indexes.insert(part);
+
         if (!inserted)
         {
             if ((*it)->checksums.getTotalChecksumHex() == part->checksums.getTotalChecksumHex())
-            {
                 LOG_ERROR(log, "Remove duplicate part {}", part->getDataPartStorage().getFullPath());
-                duplicate_parts_to_remove.push_back(part);
-            }
             else
                 throw Exception("Part " + part->name + " already exists but with different checksums", ErrorCodes::DUPLICATE_DATA_PART);
         }
-
-        addPartContributionToDataVolume(part);
+        else
+        {
+            addPartContributionToDataVolume(part);
+        }
     }
 }
 
@@ -1298,16 +1498,16 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
         }
     }
 
-    /// Collect part names by disk.
-    std::map<String, std::vector<std::pair<String, DiskPtr>>> disk_part_map;
     ThreadPool pool(disks.size());
+    std::vector<PartLoadingTree::PartLoadingInfos> parts_to_load_by_disk(disks.size());
 
-    for (const auto & disk_ptr : disks)
+    for (size_t i = 0; i < disks.size(); ++i)
     {
+        const auto & disk_ptr = disks[i];
         if (disk_ptr->isBroken())
             continue;
 
-        auto & disk_parts = disk_part_map[disk_ptr->getName()];
+        auto & disk_parts = parts_to_load_by_disk[i];
 
         pool.scheduleOrThrowOnError([&, disk_ptr]()
         {
@@ -1315,24 +1515,42 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
             {
                 /// Skip temporary directories, file 'format_version.txt' and directory 'detached'.
                 if (startsWith(it->name(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME
-                    || it->name() == MergeTreeData::DETACHED_DIR_NAME)
+                    || it->name() == MergeTreeData::DETACHED_DIR_NAME
+                    || startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME))
                     continue;
 
-                if (!startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME))
-                    disk_parts.emplace_back(std::make_pair(it->name(), disk_ptr));
+                if (auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version))
+                    disk_parts.emplace_back(*part_info, it->name(), disk_ptr);
             }
         });
     }
+
     pool.wait();
 
+    PartLoadingTree::PartLoadingInfos parts_to_load;
+    for (auto & disk_parts : parts_to_load_by_disk)
+        std::move(disk_parts.begin(), disk_parts.end(), std::back_inserter(parts_to_load));
+
+    auto loading_tree = PartLoadingTree::build(std::move(parts_to_load));
+    /// Collect parts by disks' names.
+    std::map<String, PartLoadingTreeNodes> disk_part_map;
+
+    /// Collect only "the most covering" parts from the top level of the tree.
+    loading_tree.traverse(/*recursive=*/ false, [&](const auto & node)
+    {
+        disk_part_map[node->disk->getName()].emplace_back(node);
+    });
+
     size_t num_parts = 0;
-    std::queue<std::vector<std::pair<String, DiskPtr>>> parts_queue;
+    std::queue<PartLoadingTreeNodes> parts_queue;
+
     for (auto & [disk_name, disk_parts] : disk_part_map)
     {
         LOG_INFO(log, "Found {} parts for disk '{}' to load", disk_parts.size(), disk_name);
 
         if (disk_parts.empty())
             continue;
+
         num_parts += disk_parts.size();
         parts_queue.push(std::move(disk_parts));
     }
@@ -1343,23 +1561,57 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
     MutableDataPartsVector broken_parts_to_detach;
     MutableDataPartsVector duplicate_parts_to_remove;
 
-    if (num_parts > 0)
-        loadDataPartsFromDisk(
-            broken_parts_to_detach, duplicate_parts_to_remove, pool, num_parts, parts_queue, skip_sanity_checks, settings);
+    size_t suspicious_broken_parts = 0;
+    size_t suspicious_broken_parts_bytes = 0;
+    bool have_adaptive_parts = false;
+    bool have_non_adaptive_parts = false;
+    bool have_lightweight_in_parts = false;
+    bool have_parts_with_version_metadata = false;
 
     bool is_static_storage = isStaticStorage();
 
+    if (num_parts > 0)
+    {
+        auto loaded_parts = loadDataPartsFromDisk(pool, num_parts, parts_queue, settings);
+
+        for (const auto & res : loaded_parts)
+        {
+            if (res.is_broken)
+            {
+                broken_parts_to_detach.push_back(res.part);
+                ++suspicious_broken_parts;
+                if (res.size_of_part)
+                    suspicious_broken_parts_bytes += *res.size_of_part;
+            }
+            else if (res.part->is_duplicate)
+            {
+                if (!is_static_storage)
+                    res.part->remove();
+            }
+            else
+            {
+                bool is_adaptive = res.part->index_granularity_info.mark_type.adaptive;
+                have_adaptive_parts |= is_adaptive;
+                have_non_adaptive_parts |= !is_adaptive;
+                have_lightweight_in_parts |= res.part->hasLightweightDelete();
+                have_parts_with_version_metadata |= res.part->wasInvolvedInTransaction();
+            }
+        }
+    }
+
     if (settings->in_memory_parts_enable_wal)
     {
-        std::map<String, MutableDataPartsVector> disk_wal_part_map;
-
+        pool.setMaxThreads(disks.size());
+        std::vector<MutableDataPartsVector> disks_wal_parts(disks.size());
         std::mutex wal_init_lock;
-        for (const auto & disk_ptr : disks)
+
+        for (size_t i = 0; i < disks.size(); ++i)
         {
+            const auto & disk_ptr = disks[i];
             if (disk_ptr->isBroken())
                 continue;
 
-            auto & disk_wal_parts = disk_wal_part_map[disk_ptr->getName()];
+            auto & disk_wal_parts = disks_wal_parts[i];
 
             pool.scheduleOrThrowOnError([&, disk_ptr]()
             {
@@ -1393,12 +1645,10 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
         pool.wait();
 
         MutableDataPartsVector parts_from_wal;
-        for (auto & [_, disk_wal_parts] : disk_wal_part_map)
-            parts_from_wal.insert(
-                parts_from_wal.end(), std::make_move_iterator(disk_wal_parts.begin()), std::make_move_iterator(disk_wal_parts.end()));
-
-        loadDataPartsFromWAL(duplicate_parts_to_remove, parts_from_wal);
+        for (auto & disk_wal_parts : disks_wal_parts)
+            std::move(disk_wal_parts.begin(), disk_wal_parts.end(), std::back_inserter(parts_from_wal));
 
+        loadDataPartsFromWAL(parts_from_wal);
         num_parts += parts_from_wal.size();
     }
 
@@ -1409,187 +1659,170 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
         return;
     }
 
+    if (have_non_adaptive_parts && have_adaptive_parts && !settings->enable_mixed_granularity_parts)
+        throw Exception(
+            "Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled",
+            ErrorCodes::LOGICAL_ERROR);
+
+    has_non_adaptive_index_granularity_parts = have_non_adaptive_parts;
+    has_lightweight_delete_parts = have_lightweight_in_parts;
+    transactions_enabled = have_parts_with_version_metadata;
+
+    if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks)
+        throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS,
+            "Suspiciously many ({} parts, {} in total) broken parts to remove while maximum allowed broken parts count is {}. You can change the maximum value "
+                        "with merge tree setting 'max_suspicious_broken_parts' in <merge_tree> configuration section or in table settings in .sql file "
+                        "(don't forget to return setting back to default value)",
+            suspicious_broken_parts, formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes), settings->max_suspicious_broken_parts);
+
+    if (suspicious_broken_parts_bytes > settings->max_suspicious_broken_parts_bytes && !skip_sanity_checks)
+        throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS,
+            "Suspiciously big size ({} parts, {} in total) of all broken parts to remove while maximum allowed broken parts size is {}. "
+            "You can change the maximum value with merge tree setting 'max_suspicious_broken_parts_bytes' in <merge_tree> configuration "
+            "section or in table settings in .sql file (don't forget to return setting back to default value)",
+            suspicious_broken_parts, formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes),
+            formatReadableSizeWithBinarySuffix(settings->max_suspicious_broken_parts_bytes));
+
     if (!is_static_storage)
-    {
         for (auto & part : broken_parts_to_detach)
-        {
-            /// detached parts must not have '_' in prefixes
-            part->renameToDetached("broken-on-start");
-        }
-
-        for (auto & part : duplicate_parts_to_remove)
-            part->remove();
-    }
-
-    auto deactivate_part = [&] (DataPartIteratorByStateAndInfo it)
-    {
-        const DataPartPtr & part = *it;
-
-        part->remove_time.store(part->modification_time, std::memory_order_relaxed);
-        auto creation_csn = part->version.creation_csn.load(std::memory_order_relaxed);
-        if (creation_csn != Tx::RolledBackCSN && creation_csn != Tx::PrehistoricCSN && !part->version.isRemovalTIDLocked())
-        {
-            /// It's possible that covering part was created without transaction,
-            /// but if covered part was created with transaction (i.e. creation_tid is not prehistoric),
-            /// then it must have removal tid in metadata file.
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Data part {} is Outdated and has creation TID {} and CSN {}, "
-                            "but does not have removal tid. It's a bug or a result of manual intervention.",
-                            part->name, part->version.creation_tid, creation_csn);
-        }
-        modifyPartState(it, DataPartState::Outdated);
-        removePartContributionToDataVolume(part);
-
-        /// Explicitly set removal_tid_lock for parts w/o transaction (i.e. w/o txn_version.txt)
-        /// to avoid keeping part forever (see VersionMetadata::canBeRemoved())
-        if (!part->version.isRemovalTIDLocked())
-        {
-            TransactionInfoContext transaction_context{getStorageID(), part->name};
-            part->version.lockRemovalTID(Tx::PrehistoricTID, transaction_context);
-        }
-    };
-
-    /// All parts are in "Active" state after loading
-    assert(std::find_if(data_parts_by_state_and_info.begin(), data_parts_by_state_and_info.end(),
-    [](const auto & part)
-    {
-        return part->getState() != DataPartState::Active;
-    }) == data_parts_by_state_and_info.end());
-
-    bool have_parts_with_version_metadata = false;
-    auto iter = data_parts_by_state_and_info.begin();
-    while (iter != data_parts_by_state_and_info.end() && (*iter)->getState() == DataPartState::Active)
-    {
-        const DataPartPtr & part = *iter;
-        part->loadVersionMetadata();
-        VersionMetadata & version = part->version;
-        if (part->wasInvolvedInTransaction())
-        {
-            have_parts_with_version_metadata = true;
-        }
-        else
-        {
-            ++iter;
-            continue;
-        }
-
-        /// Check if CSNs were written after committing transaction, update and write if needed.
-        bool version_updated = false;
-        chassert(!version.creation_tid.isEmpty());
-        if (!part->version.creation_csn)
-        {
-            auto min = TransactionLog::getCSN(version.creation_tid);
-            if (!min)
-            {
-                /// Transaction that created this part was not committed. Remove part.
-                TransactionLog::assertTIDIsNotOutdated(version.creation_tid);
-                min = Tx::RolledBackCSN;
-            }
-            LOG_TRACE(log, "Will fix version metadata of {} after unclean restart: part has creation_tid={}, setting creation_csn={}",
-                      part->name, version.creation_tid, min);
-            version.creation_csn = min;
-            version_updated = true;
-        }
-        if (!version.removal_tid.isEmpty() && !part->version.removal_csn)
-        {
-            auto max = TransactionLog::getCSN(version.removal_tid);
-            if (max)
-            {
-                LOG_TRACE(log, "Will fix version metadata of {} after unclean restart: part has removal_tid={}, setting removal_csn={}",
-                          part->name, version.removal_tid, max);
-                version.removal_csn = max;
-            }
-            else
-            {
-                TransactionLog::assertTIDIsNotOutdated(version.removal_tid);
-                /// Transaction that tried to remove this part was not committed. Clear removal_tid.
-                LOG_TRACE(log, "Will fix version metadata of {} after unclean restart: clearing removal_tid={}",
-                          part->name, version.removal_tid);
-                version.unlockRemovalTID(version.removal_tid, TransactionInfoContext{getStorageID(), part->name});
-            }
-            version_updated = true;
-        }
-
-        /// Sanity checks
-        bool csn_order = !version.removal_csn || version.creation_csn <= version.removal_csn || version.removal_csn == Tx::PrehistoricCSN;
-        bool min_start_csn_order = version.creation_tid.start_csn <= version.creation_csn;
-        bool max_start_csn_order = version.removal_tid.start_csn <= version.removal_csn;
-        bool creation_csn_known = version.creation_csn;
-        if (!csn_order || !min_start_csn_order || !max_start_csn_order || !creation_csn_known)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} has invalid version metadata: {}", part->name, version.toString());
-
-        if (version_updated)
-            part->storeVersionMetadata(/* force */ true);
-
-        /// Deactivate part if creation was not committed or if removal was.
-        if (version.creation_csn == Tx::RolledBackCSN || version.removal_csn)
-        {
-            auto next_it = std::next(iter);
-            deactivate_part(iter);
-            iter = next_it;
-        }
-        else
-        {
-            ++iter;
-        }
-    }
-
-    if (have_parts_with_version_metadata)
-        transactions_enabled.store(true);
-
-    /// Delete from the set of current parts those parts that are covered by another part (those parts that
-    /// were merged), but that for some reason are still not deleted from the filesystem.
-    /// Deletion of files will be performed later in the clearOldParts() method.
-
-    auto active_parts_range = getDataPartsStateRange(DataPartState::Active);
-    auto prev_it = active_parts_range.begin();
-    auto end_it = active_parts_range.end();
-
-    bool less_than_two_active_parts = prev_it == end_it || std::next(prev_it) == end_it;
-
-    if (!less_than_two_active_parts)
-    {
-        (*prev_it)->assertState({DataPartState::Active});
-        auto curr_it = std::next(prev_it);
-
-        while (curr_it != data_parts_by_state_and_info.end() && (*curr_it)->getState() == DataPartState::Active)
-        {
-            (*curr_it)->assertState({DataPartState::Active});
-
-            /// Don't consider data parts belonging to different partitions.
-            if ((*curr_it)->info.partition_id != (*prev_it)->info.partition_id)
-            {
-                ++prev_it;
-                ++curr_it;
-                continue;
-            }
-
-            if ((*curr_it)->contains(**prev_it))
-            {
-                deactivate_part(prev_it);
-                prev_it = curr_it;
-                ++curr_it;
-            }
-            else if ((*prev_it)->contains(**curr_it))
-            {
-                auto next = std::next(curr_it);
-                deactivate_part(curr_it);
-                curr_it = next;
-            }
-            else
-            {
-                ++prev_it;
-                ++curr_it;
-            }
-        }
-    }
+            part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
 
     resetObjectColumnsFromActiveParts(part_lock);
     calculateColumnAndSecondaryIndexSizesImpl();
 
+    PartLoadingTreeNodes unloaded_parts;
+    loading_tree.traverse(/*recursive=*/ true, [&](const auto & node)
+    {
+        if (!node->is_loaded)
+            unloaded_parts.push_back(node);
+    });
+
+    if (!unloaded_parts.empty())
+    {
+        LOG_DEBUG(log, "Found {} outdated data parts. They will be loaded asynchronously", unloaded_parts.size());
+
+        {
+            std::lock_guard lock(outdated_data_parts_mutex);
+            outdated_unloaded_data_parts = std::move(unloaded_parts);
+        }
+
+        outdated_data_parts_loading_task = getContext()->getSchedulePool().createTask(
+            "MergeTreeData::loadOutdatedDataParts",
+            [this] { loadOutdatedDataParts(/*is_async=*/ true); });
+    }
+
     LOG_DEBUG(log, "Loaded data parts ({} items)", data_parts_indexes.size());
     data_parts_loading_finished = true;
 }
 
+void MergeTreeData::loadOutdatedDataParts(bool is_async)
+try
+{
+    {
+        std::lock_guard lock(outdated_data_parts_mutex);
+        if (outdated_unloaded_data_parts.empty())
+            return;
+
+        LOG_DEBUG(log, "Loading {} outdated data parts {}",
+            outdated_unloaded_data_parts.size(),
+            is_async ? "asynchronously" : "synchronously");
+    }
+
+    /// Acquire shared lock because 'relative_data_path' is used while loading parts.
+    TableLockHolder shared_lock;
+    if (is_async)
+        shared_lock = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations);
+
+    size_t num_loaded_parts = 0;
+    while (true)
+    {
+        PartLoadingTree::NodePtr part;
+
+        {
+            std::lock_guard lock(outdated_data_parts_mutex);
+
+            if (is_async && outdated_data_parts_loading_canceled)
+            {
+                LOG_DEBUG(log,
+                    "Stopped loading outdated data parts because task was canceled. "
+                    "Loaded {} parts, {} left unloaded", num_loaded_parts, outdated_unloaded_data_parts.size());
+                return;
+            }
+
+            if (outdated_unloaded_data_parts.empty())
+                break;
+
+            part = std::move(outdated_unloaded_data_parts.back());
+            outdated_unloaded_data_parts.pop_back();
+        }
+
+        auto res = loadDataPart(part->info, part->name, part->disk, MergeTreeDataPartState::Outdated, data_parts_mutex);
+        ++num_loaded_parts;
+
+        if (res.is_broken)
+            res.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
+        else if (res.part->is_duplicate)
+            res.part->remove();
+        else
+            preparePartForRemoval(res.part);
+    }
+
+    LOG_DEBUG(log, "Loaded {} outdated data parts {}",
+        num_loaded_parts, is_async ? "asynchronously" : "synchronously");
+    outdated_data_parts_cv.notify_all();
+}
+catch (...)
+{
+    LOG_ERROR(log, "Loading of outdated parts failed. "
+        "Will terminate to avoid undefined behaviour due to inconsistent set of parts. "
+        "Exception: ", getCurrentExceptionMessage(true));
+    std::terminate();
+}
+
+/// No TSA because of std::unique_lock and std::condition_variable.
+void MergeTreeData::waitForOutdatedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_ANALYSIS
+{
+    /// Background tasks are not run if storage is static.
+    if (isStaticStorage())
+        return;
+
+    std::unique_lock lock(outdated_data_parts_mutex);
+    if (outdated_unloaded_data_parts.empty())
+        return;
+
+    LOG_TRACE(log, "Will wait for outdated data parts to be loaded");
+
+    outdated_data_parts_cv.wait(lock, [this]() TSA_NO_THREAD_SAFETY_ANALYSIS
+    {
+        return outdated_unloaded_data_parts.empty() || outdated_data_parts_loading_canceled;
+    });
+
+    if (outdated_data_parts_loading_canceled)
+        throw Exception(ErrorCodes::NOT_INITIALIZED, "Loading of outdated data parts was canceled");
+
+    LOG_TRACE(log, "Finished waiting for outdated data parts to be loaded");
+}
+
+void MergeTreeData::startOutdatedDataPartsLoadingTask()
+{
+    if (outdated_data_parts_loading_task)
+        outdated_data_parts_loading_task->activateAndSchedule();
+}
+
+void MergeTreeData::stopOutdatedDataPartsLoadingTask()
+{
+    if (!outdated_data_parts_loading_task)
+        return;
+
+    {
+        std::lock_guard lock(outdated_data_parts_mutex);
+        outdated_data_parts_loading_canceled = true;
+    }
+
+    outdated_data_parts_loading_task->deactivate();
+    outdated_data_parts_cv.notify_all();
+}
+
 /// Is the part directory old.
 /// True if its modification time and the modification time of all files inside it is less then threshold.
 /// (Only files on the first level of nesting are considered).
@@ -1694,12 +1927,12 @@ scope_guard MergeTreeData::getTemporaryPartDirectoryHolder(const String & part_d
     return [this, part_dir_name]() { temporary_parts.remove(part_dir_name); };
 }
 
-MergeTreeData::MutableDataPartPtr MergeTreeData::preparePartForRemoval(const DataPartPtr & part)
+MergeTreeData::MutableDataPartPtr MergeTreeData::asMutableDeletingPart(const DataPartPtr & part)
 {
     auto state = part->getState();
     if (state != DataPartState::Deleting && state != DataPartState::DeleteOnDestroy)
         throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "Cannot remove part {}, because it has state: {}", part->name, magic_enum::enum_name(part->getState()));
+            "Cannot remove part {}, because it has state: {}", part->name, magic_enum::enum_name(state));
 
     return std::const_pointer_cast<IMergeTreeDataPart>(part);
 }
@@ -1975,7 +2208,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
             log, "Removing {} parts from filesystem (serially): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", "));
         for (const DataPartPtr & part : parts_to_remove)
         {
-            preparePartForRemoval(part)->remove();
+            asMutableDeletingPart(part)->remove();
             if (part_names_succeed)
                 part_names_succeed->insert(part->name);
         }
@@ -2019,7 +2252,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
                 if (thread_group)
                     CurrentThread::attachToIfDetached(thread_group);
 
-                preparePartForRemoval(part)->remove();
+                asMutableDeletingPart(part)->remove();
                 if (part_names_succeed)
                 {
                     std::lock_guard lock(part_names_mutex);
@@ -2084,7 +2317,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
 
             for (const auto & part : batch)
             {
-                preparePartForRemoval(part)->remove();
+                asMutableDeletingPart(part)->remove();
                 if (part_names_succeed)
                 {
                     std::lock_guard lock(part_names_mutex);
@@ -3473,6 +3706,13 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange(
 MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(
         MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock)
 {
+#ifndef NDEBUG
+    {
+        /// All parts (including outdated) must be loaded at this moment.
+        std::lock_guard outdated_parts_lock(outdated_data_parts_mutex);
+        assert(outdated_unloaded_data_parts.empty());
+    }
+#endif
 
     auto parts_to_remove = grabActivePartsToRemoveForDropRange(txn, drop_range, lock);
 
@@ -3555,7 +3795,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
     }
 
     modifyPartState(it_part, DataPartState::Deleting);
-    preparePartForRemoval(part)->renameToDetached(prefix);
+    asMutableDeletingPart(part)->renameToDetached(prefix);
     data_parts_indexes.erase(it_part);
 
     if (restore_covered && part->info.level == 0)
@@ -3659,7 +3899,9 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
 
         if (error)
         {
-            LOG_ERROR(log, "The set of parts restored in place of {} looks incomplete. There might or might not be a data loss.{}", part->name, (error_parts.empty() ? "" : " Suspicious parts: " + error_parts));
+            LOG_WARNING(log, "The set of parts restored in place of {} looks incomplete. "
+                             "SELECT queries may observe gaps in data until this replica is synchronized with other replicas.{}",
+                        part->name, (error_parts.empty() ? "" : " Suspicious parts: " + error_parts));
         }
     }
 
@@ -3709,7 +3951,7 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part)
 
     try
     {
-        preparePartForRemoval(part_to_delete)->remove();
+        asMutableDeletingPart(part_to_delete)->remove();
     }
     catch (...)
     {
@@ -3977,7 +4219,7 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy)
             /// All other locks are taken in StorageReplicatedMergeTree
             lockSharedData(*part_copy);
 
-            preparePartForRemoval(original_active_part)->writeDeleteOnDestroyMarker();
+            asMutableDeletingPart(original_active_part)->writeDeleteOnDestroyMarker();
             return;
         }
     }
@@ -4375,6 +4617,11 @@ Pipe MergeTreeData::alterPartition(
     const PartitionCommands & commands,
     ContextPtr query_context)
 {
+    /// Wait for loading of outdated parts
+    /// because partition commands (DROP, MOVE, etc.)
+    /// must be applied to all parts on disk.
+    waitForOutdatedPartsToBeLoaded();
+
     PartitionCommandsResultInfo result;
     for (const PartitionCommand & command : commands)
     {
@@ -4420,6 +4667,14 @@ Pipe MergeTreeData::alterPartition(
                     {
                         String dest_database = query_context->resolveDatabase(command.to_database);
                         auto dest_storage = DatabaseCatalog::instance().getTable({dest_database, command.to_table}, query_context);
+
+                        auto * dest_storage_merge_tree = dynamic_cast<MergeTreeData *>(dest_storage.get());
+                        if (!dest_storage_merge_tree)
+                            throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                                "Cannot move partition from table {} to table {} with storage {}",
+                                getStorageID().getNameForLogs(), dest_storage->getStorageID().getNameForLogs(), dest_storage->getName());
+
+                        dest_storage_merge_tree->waitForOutdatedPartsToBeLoaded();
                         movePartitionToTable(dest_storage, command.partition, query_context);
                     }
                     break;
@@ -4441,8 +4696,17 @@ Pipe MergeTreeData::alterPartition(
             {
                 if (command.replace)
                     checkPartitionCanBeDropped(command.partition, query_context);
+
                 String from_database = query_context->resolveDatabase(command.from_database);
                 auto from_storage = DatabaseCatalog::instance().getTable({from_database, command.from_table}, query_context);
+
+                auto * from_storage_merge_tree = dynamic_cast<MergeTreeData *>(from_storage.get());
+                if (!from_storage_merge_tree)
+                    throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Cannot replace partition from table {} with storage {} to table {}",
+                        from_storage->getStorageID().getNameForLogs(), from_storage->getName(), getStorageID().getNameForLogs());
+
+                from_storage_merge_tree->waitForOutdatedPartsToBeLoaded();
                 replacePartitionFrom(from_storage, command.partition, command.replace, query_context);
             }
             break;
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index e09af181591..8ced76b0bc5 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -1052,6 +1052,8 @@ public:
     /// Returns an object that protects temporary directory from cleanup
     scope_guard getTemporaryPartDirectoryHolder(const String & part_dir_name) const;
 
+    void waitForOutdatedPartsToBeLoaded() const;
+
 protected:
     friend class IMergeTreeDataPart;
     friend class MergeTreeDataMergerMutator;
@@ -1068,7 +1070,6 @@ protected:
     /// under lockForShare if rename is possible.
     String relative_data_path;
 
-
     /// Current column sizes in compressed and uncompressed form.
     ColumnSizeByName column_sizes;
 
@@ -1330,6 +1331,88 @@ protected:
     void resetObjectColumnsFromActiveParts(const DataPartsLock & lock);
     void updateObjectColumns(const DataPartPtr & part, const DataPartsLock & lock);
 
+    /** A structure that explicitly represents a "merge tree" of parts
+     *  which is implicitly presented by min-max block numbers and levels of parts.
+     *  The children of node are parts which are covered by parent part.
+     *  This tree provides the order of loading of parts.
+     *
+     *  We start to traverse tree from the top level and load parts
+     *  corresposponded to nodes. If part is loaded successfully then
+     *  we stop traversal at this node. Otherwise part is broken and we
+     *  traverse its children and try to load covered parts which will
+     *  replace broken covering part. Unloaded nodes represent outdated parts
+     *  nd they are pushed to background task and loaded asynchronoulsy.
+     */
+    class PartLoadingTree
+    {
+    public:
+        struct Node
+        {
+            Node(const MergeTreePartInfo & info_, const String & name_, const DiskPtr & disk_)
+                : info(info_), name(name_), disk(disk_)
+            {
+            }
+
+            const MergeTreePartInfo info;
+            const String name;
+            const DiskPtr disk;
+
+            bool is_loaded = false;
+            std::map<MergeTreePartInfo, std::shared_ptr<Node>> children;
+        };
+
+        struct PartLoadingInfo
+        {
+            PartLoadingInfo(const MergeTreePartInfo & info_, const String & name_, const DiskPtr & disk_)
+                : info(info_), name(name_), disk(disk_)
+            {
+            }
+
+            /// Store name explicitly because it cannot be easily
+            /// retrieved from info in tables with old syntax.
+            MergeTreePartInfo info;
+            String name;
+            DiskPtr disk;
+        };
+
+        using NodePtr = std::shared_ptr<Node>;
+        using PartLoadingInfos = std::vector<PartLoadingInfo>;
+
+        /// Builds a tree from the list of part infos.
+        static PartLoadingTree build(PartLoadingInfos nodes);
+
+        /// Traverses a tree and call @func on each node.
+        /// If recursive is false traverses only the top level.
+        template <typename Func>
+        void traverse(bool recursive, Func && func);
+
+    private:
+        /// NOTE: Parts should be added in descending order of their levels
+        /// because rearranging tree to the new root is not supported.
+        void add(const MergeTreePartInfo & info, const String & name, const DiskPtr & disk);
+        std::unordered_map<String, NodePtr> root_by_partition;
+    };
+
+    using PartLoadingTreeNodes = std::vector<PartLoadingTree::NodePtr>;
+
+    struct LoadPartResult
+    {
+        bool is_broken = false;
+        std::optional<size_t> size_of_part;
+        MutableDataPartPtr part;
+    };
+
+    mutable std::mutex outdated_data_parts_mutex;
+    mutable std::condition_variable outdated_data_parts_cv;
+
+    BackgroundSchedulePool::TaskHolder outdated_data_parts_loading_task;
+    PartLoadingTreeNodes outdated_unloaded_data_parts TSA_GUARDED_BY(outdated_data_parts_mutex);
+    bool outdated_data_parts_loading_canceled TSA_GUARDED_BY(outdated_data_parts_mutex) = false;
+
+    void loadOutdatedDataParts(bool is_async);
+    void startOutdatedDataPartsLoadingTask();
+    void stopOutdatedDataPartsLoadingTask();
+
     static void incrementInsertedPartsProfileEvent(MergeTreeDataPartType type);
     static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type);
 
@@ -1408,18 +1491,20 @@ private:
     /// Returns default settings for storage with possible changes from global config.
     virtual std::unique_ptr<MergeTreeSettings> getDefaultSettings() const = 0;
 
-    void loadDataPartsFromDisk(
-        MutableDataPartsVector & broken_parts_to_detach,
-        MutableDataPartsVector & duplicate_parts_to_remove,
+    LoadPartResult loadDataPart(
+        const MergeTreePartInfo & part_info,
+        const String & part_name,
+        const DiskPtr & part_disk_ptr,
+        MergeTreeDataPartState to_state,
+        std::mutex & part_loading_mutex);
+
+    std::vector<LoadPartResult> loadDataPartsFromDisk(
         ThreadPool & pool,
         size_t num_parts,
-        std::queue<std::vector<std::pair<String, DiskPtr>>> & parts_queue,
-        bool skip_sanity_checks,
+        std::queue<PartLoadingTreeNodes> & parts_queue,
         const MergeTreeSettingsPtr & settings);
 
-    void loadDataPartsFromWAL(
-        MutableDataPartsVector & duplicate_parts_to_remove,
-        MutableDataPartsVector & parts_from_wal);
+    void loadDataPartsFromWAL(MutableDataPartsVector & parts_from_wal);
 
     /// Create zero-copy exclusive lock for part and disk. Useful for coordination of
     /// distributed operations which can lead to data duplication. Implemented only in ReplicatedMergeTree.
@@ -1430,7 +1515,7 @@ private:
     /// Otherwise, in non-parallel case will break and return.
     void clearPartsFromFilesystemImpl(const DataPartsVector & parts, NameSet * part_names_succeed);
 
-    static MutableDataPartPtr preparePartForRemoval(const DataPartPtr & part);
+    static MutableDataPartPtr asMutableDeletingPart(const DataPartPtr & part);
 
     mutable TemporaryParts temporary_parts;
 };
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index cff6da85efc..03a3d4fbd72 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -193,7 +193,8 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
     if (!metadata_snapshot->hasPartitionKey()) /// Table is not partitioned.
     {
         result.emplace_back(Block(block), Row{});
-        result[0].offsets = chunk_offsets;
+        if (chunk_offsets != nullptr)
+            result[0].offsets = std::move(chunk_offsets->offsets);
         return result;
     }
 
@@ -230,7 +231,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
         /// do not interfere with possible calculated primary key columns of the same name.
         result.emplace_back(Block(block), get_partition(0));
         if (!chunk_offsets_with_partition.empty())
-            result[0].offsets = chunk_offsets_with_partition[0];
+            result[0].offsets = std::move(chunk_offsets_with_partition[0]->offsets);
         return result;
     }
 
@@ -245,7 +246,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
     }
 
     for (size_t i = 0; i < chunk_offsets_with_partition.size(); ++i)
-        result[i].offsets = chunk_offsets_with_partition[i];
+        result[i].offsets = std::move(chunk_offsets_with_partition[i]->offsets);
 
     return result;
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h
index cbf8094f7fd..5dc7bf40922 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.h
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.h
@@ -22,15 +22,15 @@ struct BlockWithPartition
 {
     Block block;
     Row partition;
-    ChunkOffsetsPtr offsets;
+    std::vector<size_t> offsets;
 
     BlockWithPartition(Block && block_, Row && partition_)
         : block(block_), partition(std::move(partition_))
     {
     }
 
-    BlockWithPartition(Block && block_, Row && partition_, ChunkOffsetsPtr chunk_offsets_)
-        : block(block_), partition(std::move(partition_)), offsets(chunk_offsets_)
+    BlockWithPartition(Block && block_, Row && partition_, std::vector<size_t> && offsets_)
+        : block(block_), partition(std::move(partition_)), offsets(std::move(offsets_))
     {
     }
 };
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index 71f69cb25cd..6f1d8dd93e7 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -373,9 +373,9 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
                     throw;
 
                 tryLogCurrentException(log, __PRETTY_FUNCTION__);
-
-                String message = "Part " + part_name + " looks broken. Removing it and will try to fetch.";
-                LOG_ERROR(log, fmt::runtime(message));
+                constexpr auto fmt_string = "Part {} looks broken. Removing it and will try to fetch.";
+                String message = fmt::format(fmt_string, part_name);
+                LOG_ERROR(log, fmt_string, part_name);
 
                 /// Delete part locally.
                 storage.outdateBrokenPartAndCloneToDetached(part, "broken");
@@ -392,9 +392,9 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
             /// Probably, someone just wrote down the part, and has not yet added to ZK.
             /// Therefore, delete only if the part is old (not very reliable).
             ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
-
-            String message = "Unexpected part " + part_name + " in filesystem. Removing.";
-            LOG_ERROR(log, fmt::runtime(message));
+            constexpr auto fmt_string = "Unexpected part {} in filesystem. Removing.";
+            String message = fmt::format(fmt_string, part_name);
+            LOG_ERROR(log, fmt_string, part_name);
             storage.outdateBrokenPartAndCloneToDetached(part, "unexpected");
             return {part_name, false, message};
         }
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index bde9ce33224..bd75d76109a 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1191,12 +1191,10 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry
     if (entry_for_same_part_it != future_parts.end())
     {
         const LogEntry & another_entry = *entry_for_same_part_it->second;
-        out_reason = fmt::format(
-            "Not executing log entry {} of type {} for part {} "
-            "because another log entry {} of type {} for the same part ({}) is being processed.",
-            entry.znode_name, entry.type, entry.new_part_name,
-            another_entry.znode_name, another_entry.type, another_entry.new_part_name);
-        LOG_INFO(log, fmt::runtime(out_reason));
+        constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} "
+                                    "because another log entry {} of type {} for the same part ({}) is being processed.";
+        LOG_INFO(LogToStr(out_reason, log), fmt_string, entry.znode_name, entry.type, entry.new_part_name,
+                 another_entry.znode_name, another_entry.type, another_entry.new_part_name);
         return true;
 
         /** When the corresponding action is completed, then `isNotCoveredByFuturePart` next time, will succeed,
@@ -1238,11 +1236,9 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry
         {
             if (entry.znode_name < future_part_elem.second->znode_name)
             {
-                out_reason = fmt::format(
-                    "Not executing log entry {} for part {} "
-                    "because it is not disjoint with part {} that is currently executing and another entry {} is newer.",
-                    entry.znode_name, new_part_name, future_part_elem.first, future_part_elem.second->znode_name);
-                LOG_TRACE(log, fmt::runtime(out_reason));
+                constexpr auto fmt_string = "Not executing log entry {} for part {} "
+                                            "because it is not disjoint with part {} that is currently executing and another entry {} is newer.";
+                LOG_TRACE(LogToStr(out_reason, log), fmt_string, entry.znode_name, new_part_name, future_part_elem.first, future_part_elem.second->znode_name);
                 return true;
             }
 
@@ -1250,11 +1246,9 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry
             continue;
         }
 
-        out_reason = fmt::format(
-            "Not executing log entry {} for part {} "
-            "because it is not disjoint with part {} that is currently executing.",
-            entry.znode_name, new_part_name, future_part_elem.first);
-        LOG_TRACE(log, fmt::runtime(out_reason));
+        constexpr auto fmt_string = "Not executing log entry {} for part {} "
+                                    "because it is not disjoint with part {} that is currently executing.";
+        LOG_TEST(LogToStr(out_reason, log), fmt_string, entry.znode_name, new_part_name, future_part_elem.first);
         return true;
     }
 
@@ -1337,11 +1331,9 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
         {
             if (future_parts.contains(name))
             {
-                out_postpone_reason = fmt::format(
-                    "Not executing log entry {} of type {} for part {} "
-                    "because part {} is not ready yet (log entry for that part is being processed).",
-                    entry.znode_name, entry.typeToString(), entry.new_part_name, name);
-                LOG_TRACE(log, fmt::runtime(out_postpone_reason));
+                constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} "
+                      "because part {} is not ready yet (log entry for that part is being processed).";
+                LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.typeToString(), entry.new_part_name, name);
                 return false;
             }
 
@@ -1357,10 +1349,8 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
 
         if (merger_mutator.merges_blocker.isCancelled())
         {
-            out_postpone_reason = fmt::format(
-                "Not executing log entry {} of type {} for part {} because merges and mutations are cancelled now.",
-                entry.znode_name, entry.typeToString(), entry.new_part_name);
-            LOG_DEBUG(log, fmt::runtime(out_postpone_reason));
+            constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} because merges and mutations are cancelled now.";
+            LOG_DEBUG(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.typeToString(), entry.new_part_name);
             return false;
         }
 
@@ -1375,8 +1365,8 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
 
             if (!disks.empty() && only_s3_storage && storage.checkZeroCopyLockExists(entry.new_part_name, disks[0]))
             {
-                out_postpone_reason =  "Not executing merge/mutation for the part " + entry.new_part_name
-                    +  ", waiting other replica to execute it and will fetch after.";
+                constexpr auto fmt_string = "Not executing merge/mutation for the part {}, waiting other replica to execute it and will fetch after.";
+                out_postpone_reason = fmt::format(fmt_string, entry.new_part_name);
                 return false;
             }
         }
@@ -1387,9 +1377,8 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
 
             if (replica_to_execute_merge && !merge_strategy_picker.isMergeFinishedByReplica(replica_to_execute_merge.value(), entry))
             {
-                String reason = "Not executing merge for the part " + entry.new_part_name
-                    +  ", waiting for " + replica_to_execute_merge.value() + " to execute merge.";
-                out_postpone_reason = reason;
+                constexpr auto fmt_string = "Not executing merge for the part {}, waiting for {} to execute merge.";
+                out_postpone_reason = fmt::format(fmt_string, entry.new_part_name, replica_to_execute_merge.value());
                 return false;
             }
         }
@@ -1411,20 +1400,16 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
             {
                 if (merger_mutator.ttl_merges_blocker.isCancelled())
                 {
-                    out_postpone_reason = fmt::format(
-                        "Not executing log entry {} for part {} because merges with TTL are cancelled now.",
-                        entry.znode_name, entry.new_part_name);
-                    LOG_DEBUG(log, fmt::runtime(out_postpone_reason));
+                    constexpr auto fmt_string = "Not executing log entry {} for part {} because merges with TTL are cancelled now.";
+                    LOG_DEBUG(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.new_part_name);
                     return false;
                 }
                 size_t total_merges_with_ttl = data.getTotalMergesWithTTLInMergeList();
                 if (total_merges_with_ttl >= data_settings->max_number_of_merges_with_ttl_in_pool)
                 {
-                    out_postpone_reason = fmt::format(
-                        "Not executing log entry {} for part {} because {} merges with TTL already executing, maximum {}.",
-                        entry.znode_name, entry.new_part_name, total_merges_with_ttl,
-                        data_settings->max_number_of_merges_with_ttl_in_pool);
-                    LOG_DEBUG(log, fmt::runtime(out_postpone_reason));
+                    constexpr auto fmt_string = "Not executing log entry {} for part {} because {} merges with TTL already executing, maximum {}.";
+                    LOG_DEBUG(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.new_part_name, total_merges_with_ttl,
+                              data_settings->max_number_of_merges_with_ttl_in_pool);
                     return false;
                 }
             }
@@ -1432,12 +1417,10 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
 
         if (!ignore_max_size && sum_parts_size_in_bytes > max_source_parts_size)
         {
-            out_postpone_reason = fmt::format("Not executing log entry {} of type {} for part {}"
-                " because source parts size ({}) is greater than the current maximum ({}).",
-                entry.znode_name, entry.typeToString(), entry.new_part_name,
-                ReadableSize(sum_parts_size_in_bytes), ReadableSize(max_source_parts_size));
-
-            LOG_DEBUG(log, fmt::runtime(out_postpone_reason));
+            constexpr auto fmt_string = "Not executing log entry {} of type {} for part {}"
+                                        " because source parts size ({}) is greater than the current maximum ({}).";
+            LOG_DEBUG(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.typeToString(), entry.new_part_name,
+                      ReadableSize(sum_parts_size_in_bytes), ReadableSize(max_source_parts_size));
 
             return false;
         }
@@ -1450,10 +1433,8 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
         if (!alter_sequence.canExecuteMetaAlter(entry.alter_version, state_lock))
         {
             int head_alter = alter_sequence.getHeadAlterVersion(state_lock);
-            out_postpone_reason = fmt::format(
-                "Cannot execute alter metadata {} with version {} because another alter {} must be executed before",
-                entry.znode_name, entry.alter_version, head_alter);
-            LOG_TRACE(log, fmt::runtime(out_postpone_reason));
+            constexpr auto fmt_string = "Cannot execute alter metadata {} with version {} because another alter {} must be executed before";
+            LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.alter_version, head_alter);
             return false;
         }
     }
@@ -1466,17 +1447,13 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
             int head_alter = alter_sequence.getHeadAlterVersion(state_lock);
             if (head_alter == entry.alter_version)
             {
-                out_postpone_reason = fmt::format(
-                    "Cannot execute alter data {} with version {} because metadata still not altered",
-                    entry.znode_name, entry.alter_version);
-                LOG_TRACE(log, fmt::runtime(out_postpone_reason));
+                constexpr auto fmt_string = "Cannot execute alter data {} with version {} because metadata still not altered";
+                LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.alter_version);
             }
             else
             {
-                out_postpone_reason = fmt::format(
-                    "Cannot execute alter data {} with version {} because another alter {} must be executed before",
-                    entry.znode_name, entry.alter_version, head_alter);
-                LOG_TRACE(log, fmt::runtime(out_postpone_reason));
+                constexpr auto fmt_string = "Cannot execute alter data {} with version {} because another alter {} must be executed before";
+                LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.alter_version, head_alter);
             }
 
             return false;
@@ -1498,14 +1475,12 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
             {
                 if (drop_range_info.isDisjoint(info))
                     continue;
-                out_postpone_reason = fmt::format(
-                    "Not executing log entry {} of type {} for part {} "
-                    "because another DROP_RANGE or REPLACE_RANGE entry with not disjoint range {} is currently executing.",
-                    entry.znode_name,
-                    entry.typeToString(),
-                    entry.new_part_name,
-                    info.getPartNameForLogs());
-                LOG_TRACE(log, fmt::runtime(out_postpone_reason));
+                constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} "
+                    "because another DROP_RANGE or REPLACE_RANGE entry with not disjoint range {} is currently executing.";
+                LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name,
+                          entry.typeToString(),
+                          entry.new_part_name,
+                          info.getPartNameForLogs());
                 return false;
             }
         }
@@ -1531,11 +1506,10 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
                     auto new_part_info = MergeTreePartInfo::fromPartName(new_part_name, format_version);
                     if (!new_part_info.isDisjoint(drop_part_info))
                     {
-                        out_postpone_reason = fmt::format(
-                            "Not executing log entry {} of type {} for part {} "
-                            "because it probably depends on {} (REPLACE_RANGE).",
-                            entry.znode_name, entry.typeToString(), entry.new_part_name, replace_entry->znode_name);
-                        LOG_TRACE(log, fmt::runtime(out_postpone_reason));
+                        constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} "
+                            "because it probably depends on {} (REPLACE_RANGE).";
+                        LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.typeToString(),
+                                  entry.new_part_name, replace_entry->znode_name);
                         return false;
                     }
                 }
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
index 29528e9ff80..b63abfcb6ab 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
@@ -147,7 +147,6 @@ bool ReplicatedMergeTreeRestartingThread::runImpl()
     storage.part_check_thread.start();
 
     LOG_DEBUG(log, "Table started successfully");
-
     return true;
 }
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
index 7bd5df2b1dc..35e9cc46ebe 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@@ -41,15 +41,17 @@ struct ReplicatedMergeTreeSinkImpl<async_insert>::DelayedChunk
 {
     struct Partition
     {
+        Poco::Logger * log;
         MergeTreeDataWriter::TemporaryPart temp_part;
         UInt64 elapsed_ns;
         BlockIDsType block_id;
         BlockWithPartition block_with_partition;
-        std::unordered_map<String, size_t> block_id_to_offset_idx;
+        std::unordered_map<String, std::vector<size_t>> block_id_to_offset_idx;
 
         Partition() = default;
-        Partition(MergeTreeDataWriter::TemporaryPart && temp_part_, UInt64 elapsed_ns_, BlockIDsType && block_id_, BlockWithPartition && block_)
-            : temp_part(std::move(temp_part_)),
+        Partition(Poco::Logger * log_, MergeTreeDataWriter::TemporaryPart && temp_part_, UInt64 elapsed_ns_, BlockIDsType && block_id_, BlockWithPartition && block_)
+            : log(log_),
+              temp_part(std::move(temp_part_)),
               elapsed_ns(elapsed_ns_),
               block_id(std::move(block_id_)),
               block_with_partition(std::move(block_))
@@ -64,11 +66,105 @@ struct ReplicatedMergeTreeSinkImpl<async_insert>::DelayedChunk
                 block_id_to_offset_idx.clear();
                 for (size_t i = 0; i < block_id.size(); ++i)
                 {
-                    block_id_to_offset_idx[block_id[i]] = i;
+                    block_id_to_offset_idx[block_id[i]].push_back(i);
                 }
             }
         }
 
+        /// this function check if the block contains duplicate inserts.
+        /// if so, we keep only one insert for every duplicate ones.
+        bool filterSelfDuplicate()
+        {
+            if constexpr (async_insert)
+            {
+                std::vector<String> dup_block_ids;
+                for (const auto & [hash_id, offset_indexes] : block_id_to_offset_idx)
+                {
+                    /// It means more than one inserts have the same hash id, in this case, we should keep only one of them.
+                    if (offset_indexes.size() > 1)
+                        dup_block_ids.push_back(hash_id);
+                }
+                if (dup_block_ids.empty())
+                    return false;
+
+                filterBlockDuplicate(dup_block_ids, true);
+                return true;
+            }
+            return false;
+        }
+
+        /// remove the conflict parts of block for rewriting again.
+        void filterBlockDuplicate(const std::vector<String> & block_paths, bool self_dedup)
+        {
+            if constexpr (async_insert)
+            {
+                std::vector<size_t> offset_idx;
+                for (const auto & raw_path : block_paths)
+                {
+                    std::filesystem::path p(raw_path);
+                    String conflict_block_id = p.filename();
+                    auto it = block_id_to_offset_idx.find(conflict_block_id);
+                    if (it == block_id_to_offset_idx.end())
+                        throw Exception("Unknown conflict path " + conflict_block_id, ErrorCodes::LOGICAL_ERROR);
+                    /// if this filter is for self_dedup, that means the block paths is selected by `filterSelfDuplicate`, which is a self purge.
+                    /// in this case, we don't know if zk has this insert, then we should keep one insert, to avoid missing this insert.
+                    offset_idx.insert(std::end(offset_idx), std::begin(it->second) + self_dedup, std::end(it->second));
+                }
+                std::sort(offset_idx.begin(), offset_idx.end());
+
+                auto & offsets = block_with_partition.offsets;
+                size_t idx = 0, remove_count = 0;
+                auto it = offset_idx.begin();
+                std::vector<size_t> new_offsets;
+                std::vector<String> new_block_ids;
+
+                /// construct filter
+                size_t rows = block_with_partition.block.rows();
+                auto filter_col = ColumnUInt8::create(rows, 1u);
+                ColumnUInt8::Container & vec = filter_col->getData();
+                UInt8 * pos = vec.data();
+                for (auto & offset : offsets)
+                {
+                    if (it != offset_idx.end() && *it == idx)
+                    {
+                        size_t start_pos = idx > 0 ? offsets[idx - 1] : 0;
+                        size_t end_pos = offset;
+                        remove_count += end_pos - start_pos;
+                        while (start_pos < end_pos)
+                        {
+                            *(pos + start_pos) = 0;
+                            start_pos++;
+                        }
+                        it++;
+                    }
+                    else
+                    {
+                        new_offsets.push_back(offset - remove_count);
+                        new_block_ids.push_back(block_id[idx]);
+                    }
+                    idx++;
+                }
+
+                LOG_TRACE(log, "New block IDs: {}, new offsets: {}, size: {}", toString(new_block_ids), toString(new_offsets), new_offsets.size());
+
+                block_with_partition.offsets = std::move(new_offsets);
+                block_id = std::move(new_block_ids);
+                auto cols = block_with_partition.block.getColumns();
+                for (auto & col : cols)
+                {
+                    col = col->filter(vec, rows - remove_count);
+                }
+                block_with_partition.block.setColumns(cols);
+
+                LOG_TRACE(log, "New block rows {}", block_with_partition.block.rows());
+
+                initBlockIDMap();
+            }
+            else
+            {
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "sync insert should not call rewriteBlock");
+            }
+        }
     };
 
     DelayedChunk() = default;
@@ -79,6 +175,30 @@ struct ReplicatedMergeTreeSinkImpl<async_insert>::DelayedChunk
     std::vector<Partition> partitions;
 };
 
+std::vector<Int64> testSelfDeduplicate(std::vector<Int64> data, std::vector<size_t> offsets, std::vector<String> hashes)
+{
+    MutableColumnPtr column = DataTypeInt64().createColumn();
+    for (auto datum : data)
+    {
+        column->insert(datum);
+    }
+    Block block({ColumnWithTypeAndName(std::move(column), DataTypePtr(new DataTypeInt64()), "a")});
+
+    BlockWithPartition block1(std::move(block), Row(), std::move(offsets));
+    ReplicatedMergeTreeSinkImpl<true>::DelayedChunk::Partition part(
+        &Poco::Logger::get("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1));
+
+    part.filterSelfDuplicate();
+
+    ColumnPtr col = part.block_with_partition.block.getColumns()[0];
+    std::vector<Int64> result;
+    for (size_t i = 0; i < col->size(); i++)
+    {
+        result.push_back(col->getInt(i));
+    }
+    return result;
+}
+
 namespace
 {
     /// Convert block id vector to string. Output at most 50 ids.
@@ -90,76 +210,12 @@ namespace
         return fmt::format("({})", fmt::join(vec.begin(), vec.begin() + size, ","));
     }
 
-    /// remove the conflict parts of block for rewriting again.
-    void rewriteBlock(Poco::Logger * log, typename ReplicatedMergeTreeSinkImpl<true>::DelayedChunk::Partition & partition, const std::vector<String> & block_paths)
-    {
-        std::vector<size_t> offset_idx;
-        for (const auto & raw_path : block_paths)
-        {
-            std::filesystem::path p(raw_path);
-            String conflict_block_id = p.filename();
-            auto it = partition.block_id_to_offset_idx.find(conflict_block_id);
-            if (it == partition.block_id_to_offset_idx.end())
-                throw Exception("Unknown conflict path " + conflict_block_id, ErrorCodes::LOGICAL_ERROR);
-            offset_idx.push_back(it->second);
-        }
-        std::sort(offset_idx.begin(), offset_idx.end());
-
-        auto & offsets = partition.block_with_partition.offsets->offsets;
-        size_t idx = 0, remove_count = 0;
-        auto it = offset_idx.begin();
-        std::vector<size_t> new_offsets;
-        std::vector<String> new_block_ids;
-
-        /// construct filter
-        size_t rows = partition.block_with_partition.block.rows();
-        auto filter_col = ColumnUInt8::create(rows, 1u);
-        ColumnUInt8::Container & vec = filter_col->getData();
-        UInt8 * pos = vec.data();
-        for (auto & offset : offsets)
-        {
-            if (it != offset_idx.end() && *it == idx)
-            {
-                size_t start_pos = idx > 0 ? offsets[idx - 1] : 0;
-                size_t end_pos = offset;
-                remove_count += end_pos - start_pos;
-                while (start_pos < end_pos)
-                {
-                    *(pos + start_pos) = 0;
-                    start_pos ++;
-                }
-                it++;
-            }
-            else
-            {
-                new_offsets.push_back(offset - remove_count);
-                new_block_ids.push_back(partition.block_id[idx]);
-            }
-            idx++;
-        }
-
-        LOG_TRACE(log, "New block IDs: {}, new offsets: {}, size: {}", toString(new_block_ids), toString(new_offsets), new_offsets.size());
-
-        offsets = std::move(new_offsets);
-        partition.block_id = std::move(new_block_ids);
-        auto cols = partition.block_with_partition.block.getColumns();
-        for (auto & col : cols)
-        {
-            col = col->filter(vec, rows - remove_count);
-        }
-        partition.block_with_partition.block.setColumns(cols);
-
-        LOG_TRACE(log, "New block rows {}", partition.block_with_partition.block.rows());
-
-        partition.initBlockIDMap();
-    }
-
     std::vector<String> getHashesForBlocks(BlockWithPartition & block, String partition_id)
     {
         size_t start = 0;
         auto cols = block.block.getColumns();
         std::vector<String> block_id_vec;
-        for (auto offset : block.offsets->offsets)
+        for (auto offset : block.offsets)
         {
             SipHash hash;
             for (size_t i = start; i < offset; ++i)
@@ -369,7 +425,7 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
         {
             /// TODO consider insert_deduplication_token
             block_id = getHashesForBlocks(current_block, temp_part.part->info.partition_id);
-            LOG_TRACE(log, "async insert part, part id {}, block id {}, offsets {}, size {}", temp_part.part->info.partition_id, toString(block_id), toString(current_block.offsets->offsets), current_block.offsets->offsets.size());
+            LOG_TRACE(log, "async insert part, part id {}, block id {}, offsets {}, size {}", temp_part.part->info.partition_id, toString(block_id), toString(current_block.offsets), current_block.offsets.size());
         }
         else if (deduplicate)
         {
@@ -416,6 +472,7 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
         }
 
         partitions.emplace_back(DelayedPartition(
+            log,
             std::move(temp_part),
             elapsed_ns,
             std::move(block_id),
@@ -479,6 +536,14 @@ void ReplicatedMergeTreeSinkImpl<true>::finishDelayedChunk(const ZooKeeperWithFa
     for (auto & partition: delayed_chunk->partitions)
     {
         int retry_times = 0;
+        /// users may have lots of same inserts. It will be helpful to deduplicate in advance.
+        if (partition.filterSelfDuplicate())
+        {
+            LOG_TRACE(log, "found duplicated inserts in the block");
+            partition.block_with_partition.partition = std::move(partition.temp_part.part->partition.value);
+            partition.temp_part = storage.writer.writeTempPart(partition.block_with_partition, metadata_snapshot, context);
+        }
+
         while (true)
         {
             partition.temp_part.finalize();
@@ -488,7 +553,7 @@ void ReplicatedMergeTreeSinkImpl<true>::finishDelayedChunk(const ZooKeeperWithFa
             ++retry_times;
             LOG_DEBUG(log, "Found duplicate block IDs: {}, retry times {}", toString(conflict_block_ids), retry_times);
             /// partition clean conflict
-            rewriteBlock(log, partition, conflict_block_ids);
+            partition.filterBlockDuplicate(conflict_block_ids, false);
             if (partition.block_id.empty())
                 break;
             partition.block_with_partition.partition = std::move(partition.temp_part.part->partition.value);
diff --git a/src/Storages/MergeTree/tests/gtest_async_inserts.cpp b/src/Storages/MergeTree/tests/gtest_async_inserts.cpp
index 348da5f2ccd..f67c2f7fb0f 100644
--- a/src/Storages/MergeTree/tests/gtest_async_inserts.cpp
+++ b/src/Storages/MergeTree/tests/gtest_async_inserts.cpp
@@ -1,3 +1,4 @@
+#include "Storages/MergeTree/ReplicatedMergeTreeSink.h"
 #include "config.h"
 
 #include <gtest/gtest.h>
@@ -42,4 +43,21 @@ TEST(AsyncInsertsTest, testScatterOffsetsBySelector)
     test_impl({3,6,10}, {1,1,1,2,2,2,0,0,0,0}, 3, {{4},{3},{3}});
 }
 
+std::vector<Int64> testSelfDeduplicate(std::vector<Int64> data, std::vector<size_t> offsets, std::vector<String> hashes);
+
+TEST(AsyncInsertsTest, testSelfDeduplicate)
+{
+    auto test_impl = [](std::vector<Int64> data, std::vector<size_t> offsets, std::vector<String> hashes, std::vector<Int64> answer)
+    {
+        auto result = testSelfDeduplicate(data, offsets, hashes);
+        ASSERT_EQ(answer.size(), result.size());
+        for (size_t i = 0; i < result.size(); i++)
+            ASSERT_EQ(answer[i], result[i]);
+    };
+    test_impl({1,2,3,1,2,3,4,5,6,1,2,3},{3,6,9,12},{"a","a","b","a"},{1,2,3,4,5,6});
+    test_impl({1,2,3,1,2,3,1,2,3,1,2,3},{2,3,5,6,8,9,11,12},{"a","b","a","b","a","b","a","b"},{1,2,3});
+    test_impl({1,2,3,1,2,4,1,2,5,1,2},{2,3,5,6,8,9,11},{"a","b","a","c","a","d","a"},{1,2,3,4,5});
+    test_impl({1,2,1,2,1,2,1,2,1,2},{2,4,6,8,10},{"a","a","a","a","a"},{1,2});
+}
+
 }
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index febf9773f71..888c9fffa48 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -1205,12 +1205,15 @@ void StorageDistributed::createDirectoryMonitors(const DiskPtr & disk)
         const auto & dir_path = it->path();
         if (std::filesystem::is_directory(dir_path))
         {
+            /// Created by DistributedSink
             const auto & tmp_path = dir_path / "tmp";
-
-            /// "tmp" created by DistributedSink
             if (std::filesystem::is_directory(tmp_path) && std::filesystem::is_empty(tmp_path))
                 std::filesystem::remove(tmp_path);
 
+            const auto & broken_path = dir_path / "broken";
+            if (std::filesystem::is_directory(broken_path) && std::filesystem::is_empty(broken_path))
+                std::filesystem::remove(broken_path);
+
             if (std::filesystem::is_empty(dir_path))
             {
                 LOG_DEBUG(log, "Removing {} (used for async INSERT into Distributed)", dir_path.string());
@@ -1239,7 +1242,8 @@ StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(
             *this, disk, relative_data_path + name,
             data.connection_pool,
             monitors_blocker,
-            getContext()->getDistributedSchedulePool());
+            getContext()->getDistributedSchedulePool(),
+            /* initialize_from_disk= */ startup);
         return data;
     };
 
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index d4b9020f27f..c8dc53f5036 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -112,7 +112,6 @@ StorageMergeTree::StorageMergeTree(
     increment.set(getMaxBlockNumber());
 
     loadMutations();
-
     loadDeduplicationLog();
 }
 
@@ -138,6 +137,7 @@ void StorageMergeTree::startup()
     {
         background_operations_assignee.start();
         startBackgroundMovesIfNeeded();
+        startOutdatedDataPartsLoadingTask();
     }
     catch (...)
     {
@@ -171,6 +171,8 @@ void StorageMergeTree::shutdown()
     if (shutdown_called.exchange(true))
         return;
 
+    stopOutdatedDataPartsLoadingTask();
+
     /// Unlock all waiting mutations
     {
         std::lock_guard lock(mutation_wait_mutex);
@@ -1189,6 +1191,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign
         scheduled = true;
     }
 
+
     return scheduled;
 }
 
@@ -1300,10 +1303,10 @@ bool StorageMergeTree::optimize(
                     &disable_reason,
                     local_context->getSettingsRef().optimize_skip_merged_partitions))
             {
-                constexpr const char * message = "Cannot OPTIMIZE table: {}";
+                constexpr auto message = "Cannot OPTIMIZE table: {}";
                 if (disable_reason.empty())
                     disable_reason = "unknown reason";
-                LOG_INFO(log, fmt::runtime(message), disable_reason);
+                LOG_INFO(log, message, disable_reason);
 
                 if (local_context->getSettingsRef().optimize_throw_if_noop)
                     throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason);
@@ -1327,10 +1330,10 @@ bool StorageMergeTree::optimize(
                 &disable_reason,
                 local_context->getSettingsRef().optimize_skip_merged_partitions))
         {
-            constexpr const char * message = "Cannot OPTIMIZE table: {}";
+            constexpr auto message = "Cannot OPTIMIZE table: {}";
             if (disable_reason.empty())
                 disable_reason = "unknown reason";
-            LOG_INFO(log, fmt::runtime(message), disable_reason);
+            LOG_INFO(log, message, disable_reason);
 
             if (local_context->getSettingsRef().optimize_throw_if_noop)
                 throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason);
@@ -1509,6 +1512,7 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont
     /// Asks to complete merges and does not allow them to start.
     /// This protects against "revival" of data for a removed partition after completion of merge.
     auto merge_blocker = stopMergesAndWait();
+    waitForOutdatedPartsToBeLoaded();
 
     Stopwatch watch;
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 350f5aa0bc1..036b46c5e28 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -907,6 +907,16 @@ void StorageReplicatedMergeTree::drop()
         dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings());
     }
 
+    /// Wait for loading of all outdated parts because
+    /// in case of zero copy recursive removal of directory
+    /// is not supported and table cannot be dropped.
+    if (canUseZeroCopyReplication())
+    {
+        /// Load remaining parts synchronously because task
+        /// for loading is already cancelled in shutdown().
+        loadOutdatedDataParts(/*is_async=*/ false);
+    }
+
     dropAllData();
 }
 
@@ -1262,12 +1272,12 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
     const auto storage_settings_ptr = getSettings();
     bool insane = uncovered_unexpected_parts_rows > total_rows_on_filesystem * storage_settings_ptr->replicated_max_ratio_of_wrong_parts;
 
-    constexpr const char * sanity_report_fmt = "The local set of parts of table {} doesn't look like the set of parts in ZooKeeper: "
+    constexpr auto sanity_report_fmt = "The local set of parts of table {} doesn't look like the set of parts in ZooKeeper: "
                                                "{} rows of {} total rows in filesystem are suspicious. "
                                                "There are {} uncovered unexpected parts with {} rows ({} of them is not just-written with {} rows), "
                                                "{} missing parts (with {} blocks), {} covered unexpected parts (with {} rows).";
 
-    constexpr const char * sanity_report_debug_fmt = "Uncovered unexpected parts: {}. Missing parts: {}. Covered unexpected parts: {}. Expected parts: {}.";
+    constexpr auto sanity_report_debug_fmt = "Uncovered unexpected parts: {}. Missing parts: {}. Covered unexpected parts: {}. Expected parts: {}.";
 
     if (insane && !skip_sanity_checks)
     {
@@ -1283,7 +1293,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
     {
         LOG_DEBUG(log, sanity_report_debug_fmt, fmt::join(uncovered_unexpected_parts, ", "), fmt::join(parts_to_fetch, ", "),
                   fmt::join(covered_unexpected_parts, ", "), fmt::join(expected_parts, ", "));
-        LOG_WARNING(log, fmt::runtime(sanity_report_fmt), getStorageID().getNameForLogs(),
+        LOG_WARNING(log, sanity_report_fmt, getStorageID().getNameForLogs(),
                     formatReadableQuantity(uncovered_unexpected_parts_rows), formatReadableQuantity(total_rows_on_filesystem),
                     uncovered_unexpected_parts.size(), uncovered_unexpected_parts_rows, unexpected_parts_nonnew, unexpected_parts_nonnew_rows,
                     parts_to_fetch.size(), parts_to_fetch_blocks, covered_unexpected_parts.size(), unexpected_parts_rows - uncovered_unexpected_parts_rows);
@@ -1866,6 +1876,11 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::executeFetchShared(
 void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
 {
     LOG_TRACE(log, "Executing DROP_RANGE {}", entry.new_part_name);
+
+    /// Wait for loading of outdated parts because DROP_RANGE
+    /// command must be applied to all parts on disk.
+    waitForOutdatedPartsToBeLoaded();
+
     auto drop_range_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version);
     getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range_info.partition_id, drop_range_info.max_block);
     queue.removePartProducingOpsInRange(getZooKeeper(), drop_range_info, entry, /* fetch_entry_znode= */ {});
@@ -1930,6 +1945,11 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
     LOG_DEBUG(log, "Executing log entry {} to replace parts range {} with {} parts from {}.{}",
               entry.znode_name, entry_replace.drop_range_part_name, entry_replace.new_part_names.size(),
               entry_replace.from_database, entry_replace.from_table);
+
+    /// Wait for loading of outdated parts because REPLACE_RANGE
+    /// command must be applied to all parts on disk.
+    waitForOutdatedPartsToBeLoaded();
+
     auto metadata_snapshot = getInMemoryMetadataPtr();
     auto storage_settings_ptr = getSettings();
 
@@ -4286,6 +4306,7 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart(
 
 void StorageReplicatedMergeTree::startup()
 {
+    startOutdatedDataPartsLoadingTask();
     if (attach_thread)
     {
         attach_thread->start();
@@ -4402,6 +4423,7 @@ void StorageReplicatedMergeTree::shutdown()
         return;
 
     session_expired_callback_handler.reset();
+    stopOutdatedDataPartsLoadingTask();
 
     /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP.
     fetcher.blocker.cancelForever();
@@ -5295,7 +5317,6 @@ void StorageReplicatedMergeTree::restoreMetadataInZooKeeper()
     if (!is_readonly)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica must be readonly");
 
-
     if (getZooKeeper()->exists(replica_path))
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
                         "Replica path is present at {} - nothing to restore. "
@@ -5312,6 +5333,7 @@ void StorageReplicatedMergeTree::restoreMetadataInZooKeeper()
 
     auto metadata_snapshot = getInMemoryMetadataPtr();
 
+    waitForOutdatedPartsToBeLoaded();
     const DataPartsVector all_parts = getAllDataPartsVector();
     Strings active_parts_names;
 
@@ -5426,6 +5448,7 @@ void StorageReplicatedMergeTree::truncate(
     if (!is_leader)
         throw Exception("TRUNCATE cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER);
 
+    waitForOutdatedPartsToBeLoaded();
     zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly();
     dropAllPartitionsImpl(zookeeper, /* detach */ false, query_context);
 }
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 9cb992bd24f..3257ccb5022 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -444,7 +444,7 @@ public:
             /// (which means we eventually need this info anyway, so it should be ok to do it now)
             if (object_infos_)
             {
-                info = S3::getObjectInfo(client_, bucket, key, version_id_, true, false);
+                info = S3::getObjectInfo(client_, bucket, key, version_id_);
                 total_size += info->size;
 
                 String path = fs::path(bucket) / key;
@@ -569,9 +569,7 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader()
     if (current_key.empty())
         return {};
 
-    size_t object_size = info
-        ? info->size
-        : S3::getObjectSize(*client, bucket, current_key, version_id, true, false);
+    size_t object_size = info ? info->size : S3::getObjectSize(*client, bucket, current_key, version_id);
 
     int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
     auto read_buf = wrapReadBufferWithCompressionMethod(
@@ -1523,7 +1521,7 @@ std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
                 /// Note that in case of exception in getObjectInfo returned info will be empty,
                 /// but schema cache will handle this case and won't return columns from cache
                 /// because we can't say that it's valid without last modification time.
-                info = S3::getObjectInfo(*s3_configuration.client, s3_configuration.uri.bucket, *it, s3_configuration.uri.version_id, false, false);
+                info = S3::getObjectInfo(*s3_configuration.client, s3_configuration.uri.bucket, *it, s3_configuration.uri.version_id, {}, /* throw_on_error= */ false);
                 if (object_infos)
                     (*object_infos)[path] = info;
             }
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 0f6fc013d51..02fcb7d624c 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -176,6 +176,24 @@ private:
         }
 
         ReaderHolder() = default;
+        ReaderHolder(const ReaderHolder & other) = delete;
+        ReaderHolder & operator=(const ReaderHolder & other) = delete;
+
+        ReaderHolder(ReaderHolder && other) noexcept
+        {
+            *this = std::move(other);
+        }
+
+        ReaderHolder & operator=(ReaderHolder && other) noexcept
+        {
+            /// The order of destruction is important.
+            /// reader uses pipeline, pipeline uses read_buf.
+            reader = std::move(other.reader);
+            pipeline = std::move(other.pipeline);
+            read_buf = std::move(other.read_buf);
+            path = std::move(other.path);
+            return *this;
+        }
 
         explicit operator bool() const { return reader != nullptr; }
         PullingPipelineExecutor * operator->() { return reader.get(); }
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index dbe99d0ce9a..7559d9c720e 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -345,9 +345,11 @@ namespace
                             // to check if Range header is supported, we need to send a request with it set
                             const bool supports_ranges = (res.has("Accept-Ranges") && res.get("Accept-Ranges") == "bytes")
                                 || (res.has("Content-Range") && res.get("Content-Range").starts_with("bytes"));
-                            LOG_TRACE(
-                                &Poco::Logger::get("StorageURLSource"),
-                                fmt::runtime(supports_ranges ? "HTTP Range is supported" : "HTTP Range is not supported"));
+
+                            if (supports_ranges)
+                                LOG_TRACE(&Poco::Logger::get("StorageURLSource"), "HTTP Range is supported");
+                            else
+                                LOG_TRACE(&Poco::Logger::get("StorageURLSource"), "HTTP Range is not supported");
 
 
                             if (supports_ranges && res.getStatus() == Poco::Net::HTTPResponse::HTTP_PARTIAL_CONTENT
diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp
index 02b9308ed22..5b2d0e7b78f 100644
--- a/src/TableFunctions/TableFunctionExplain.cpp
+++ b/src/TableFunctions/TableFunctionExplain.cpp
@@ -1,6 +1,9 @@
 #include <Interpreters/InterpreterSelectWithUnionQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
+#include <Parsers/ASTSetQuery.h>
+#include <Parsers/ParserSetQuery.h>
+#include <Parsers/parseQuery.h>
 #include <Parsers/queryToString.h>
 #include <Storages/StorageValues.h>
 #include <TableFunctions/ITableFunction.h>
@@ -20,22 +23,58 @@ namespace ErrorCodes
 void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/)
 {
     const auto * function = ast_function->as<ASTFunction>();
-    if (function && function->arguments && function->arguments->children.size() == 1)
-    {
-        const auto & query_arg = function->arguments->children[0];
-
-        if (!query_arg->as<ASTExplainQuery>())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                "Table function '{}' requires a explain query argument, got '{}'",
-                getName(), queryToString(query_arg));
-
-        query = query_arg;
-    }
-    else
-    {
+    if (!function || !function->arguments)
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
             "Table function '{}' cannot be called directly, use `SELECT * FROM (EXPLAIN ...)` syntax", getName());
+
+    size_t num_args = function->arguments->children.size();
+    if (num_args != 2 && num_args != 3)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "Table function '{}' requires 2 or 3 arguments, got {}", getName(), num_args);
+
+    const auto & kind_arg = function->arguments->children[0];
+    const auto * kind_literal = kind_arg->as<ASTLiteral>();
+    if (!kind_literal || kind_literal->value.getType() != Field::Types::String)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "Table function '{}' requires a String argument for EXPLAIN kind, got '{}'",
+            getName(), queryToString(kind_arg));
+
+    ASTExplainQuery::ExplainKind kind = ASTExplainQuery::fromString(kind_literal->value.get<String>());
+    auto explain_query = std::make_shared<ASTExplainQuery>(kind);
+
+    const auto * settings_arg = function->arguments->children[1]->as<ASTLiteral>();
+    if (!settings_arg || settings_arg->value.getType() != Field::Types::String)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "Table function '{}' requires a serialized string settings argument, got '{}'",
+            getName(), queryToString(function->arguments->children[1]));
+
+    const auto & settings_str = settings_arg->value.get<String>();
+    if (!settings_str.empty())
+    {
+        constexpr UInt64 max_size = 4096;
+        constexpr UInt64 max_depth = 16;
+
+        /// parse_only_internals_ = true - we don't want to parse `SET` keyword
+        ParserSetQuery settings_parser(/* parse_only_internals_ = */ true);
+        ASTPtr settings_ast = parseQuery(settings_parser, settings_str, max_size, max_depth);
+        explain_query->setSettings(std::move(settings_ast));
     }
+
+    if (function->arguments->children.size() > 2)
+    {
+        const auto & query_arg = function->arguments->children[2];
+        if (!query_arg->as<ASTSelectWithUnionQuery>())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Table function '{}' requires a EXPLAIN SELECT query argument, got EXPLAIN '{}'",
+                getName(), queryToString(query_arg));
+        explain_query->setExplainedQuery(query_arg);
+    }
+    else if (kind != ASTExplainQuery::ExplainKind::CurrentTransaction)
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires a query argument", getName());
+    }
+
+    query = std::move(explain_query);
 }
 
 ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context) const
diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp
index 1ddbb48962d..bed3397152b 100644
--- a/src/TableFunctions/TableFunctionGenerateRandom.cpp
+++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp
@@ -25,6 +25,7 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int LOGICAL_ERROR;
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }
 
 void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/)
@@ -37,9 +38,7 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co
     ASTs & args = args_func.at(0)->children;
 
     if (args.empty())
-        throw Exception("Table function '" + getName() + "' requires at least one argument: "
-                        " structure, [random_seed, max_string_length, max_array_length].",
-                        ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+        return;
 
     if (args.size() > 4)
         throw Exception("Table function '" + getName() + "' requires at most four arguments: "
@@ -77,12 +76,23 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co
 
 ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextPtr context) const
 {
+    if (structure == "auto")
+    {
+        if (structure_hint.empty())
+            throw Exception(
+                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                "Table function '{}' was used without structure argument but structure could not be determined automatically. Please, "
+                "provide structure manually",
+                getName());
+        return structure_hint;
+    }
+
     return parseColumnsListFromString(structure, context);
 }
 
 StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
 {
-    auto columns = getActualTableStructure(context);
+    ColumnsDescription columns = getActualTableStructure(context);
     auto res = std::make_shared<StorageGenerateRandom>(
         StorageID(getDatabaseName(), table_name), columns, String{}, max_array_length, max_string_length, random_seed);
     res->startup();
diff --git a/src/TableFunctions/TableFunctionGenerateRandom.h b/src/TableFunctions/TableFunctionGenerateRandom.h
index bcad11156be..584d65311f4 100644
--- a/src/TableFunctions/TableFunctionGenerateRandom.h
+++ b/src/TableFunctions/TableFunctionGenerateRandom.h
@@ -5,7 +5,7 @@
 namespace DB
 {
 
-/* generateRandom(structure, [max_array_length, max_string_length, random_seed])
+/* generateRandom([structure, max_array_length, max_string_length, random_seed])
  * - creates a temporary storage that generates columns with random data
  */
 class TableFunctionGenerateRandom : public ITableFunction
@@ -13,7 +13,11 @@ class TableFunctionGenerateRandom : public ITableFunction
 public:
     static constexpr auto name = "generateRandom";
     std::string getName() const override { return name; }
-    bool hasStaticStructure() const override { return true; }
+    bool hasStaticStructure() const override { return structure != "auto"; }
+
+    bool needStructureHint() const override { return structure == "auto"; }
+    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
+
 private:
     StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override;
     const char * getStorageTypeName() const override { return "GenerateRandom"; }
@@ -21,11 +25,11 @@ private:
     ColumnsDescription getActualTableStructure(ContextPtr context) const override;
     void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
 
-    String structure;
+    String structure = "auto";
     UInt64 max_string_length = 10;
     UInt64 max_array_length = 10;
     std::optional<UInt64> random_seed;
-
+    ColumnsDescription structure_hint;
 };
 
 
diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py
index 66c593e50ee..932964ddcd6 100644
--- a/tests/ci/ast_fuzzer_check.py
+++ b/tests/ci/ast_fuzzer_check.py
@@ -120,6 +120,7 @@ if __name__ == "__main__":
         "fuzzer.log": os.path.join(workspace_path, "fuzzer.log"),
         "report.html": os.path.join(workspace_path, "report.html"),
         "core.zst": os.path.join(workspace_path, "core.zst"),
+        "dmesg.log": os.path.join(workspace_path, "dmesg.log"),
     }
 
     s3_helper = S3Helper()
diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py
new file mode 100644
index 00000000000..847b49cbb28
--- /dev/null
+++ b/tests/ci/merge_pr.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python
+
+"""Script to check if PR is mergeable and merge it"""
+
+import argparse
+import logging
+
+from datetime import datetime
+from os import getenv
+from typing import Dict, List
+
+from github.PullRequestReview import PullRequestReview
+
+from commit_status_helper import get_commit_filtered_statuses
+from get_robot_token import get_best_robot_token
+from github_helper import GitHub, NamedUser, PullRequest
+from pr_info import PRInfo
+
+
+# The team name for accepted approvals
+TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core")
+
+
+class Reviews:
+    STATES = ["CHANGES_REQUESTED", "APPROVED"]
+
+    def __init__(self, pr: PullRequest):
+        """The reviews are proceed in the next logic:
+        - if review for an author does not exist, set it
+        - the review status can be changed from CHANGES_REQUESTED and APPROVED
+            only to either one
+        """
+        logging.info("Checking the PR for approvals")
+        self.pr = pr
+        self.reviews = pr.get_reviews()
+        # the reviews are ordered by time
+        self._review_per_user = {}  # type: Dict[NamedUser, PullRequestReview]
+        self.approved_at = datetime.fromtimestamp(0)
+        for r in self.reviews:
+            user = r.user
+            if r.state not in self.STATES:
+                continue
+
+            if r.state == "APPROVED":
+                self.approved_at = max(r.submitted_at, self.approved_at)
+
+            if not self._review_per_user.get(user):
+                self._review_per_user[user] = r
+                continue
+
+            if r.submitted_at < self._review_per_user[user].submitted_at:
+                self._review_per_user[user] = r
+
+    def is_approved(self, team: List[NamedUser]) -> bool:
+        """Checks if the PR is approved, and no changes made after the last approval"""
+        if not self.reviews:
+            logging.info("There aren't reviews for PR #%s", self.pr.number)
+            return False
+
+        # We consider reviews only from the given list of users
+        statuses = {
+            r.state
+            for user, r in self._review_per_user.items()
+            if r.state == "CHANGES_REQUESTED"
+            or (r.state == "APPROVED" and user in team)
+        }
+
+        if "CHANGES_REQUESTED" in statuses:
+            logging.info(
+                "The following users requested changes for the PR: %s",
+                ", ".join(
+                    user.login
+                    for user, r in self._review_per_user.items()
+                    if r.state == "CHANGES_REQUESTED"
+                ),
+            )
+            return False
+
+        if "APPROVED" in statuses:
+            logging.info(
+                "The following users from %s team approved the PR: %s",
+                TEAM_NAME,
+                ", ".join(
+                    user.login
+                    for user, r in self._review_per_user.items()
+                    if r.state == "APPROVED" and user in team
+                ),
+            )
+            # The only reliable place to get the 100% accurate last_modified
+            # info is when the commit was pushed to GitHub. The info is
+            # available as a header 'last-modified' of /{org}/{repo}/commits/{sha}.
+            # Unfortunately, it's formatted as 'Wed, 04 Jan 2023 11:05:13 GMT'
+
+            commit = self.pr.head.repo.get_commit(self.pr.head.sha)
+            if commit.stats.last_modified is None:
+                logging.warning(
+                    "Unable to get info about the commit %s", self.pr.head.sha
+                )
+                return False
+
+            last_changed = datetime.strptime(
+                commit.stats.last_modified, "%a, %d %b %Y %H:%M:%S GMT"
+            )
+            if self.approved_at < last_changed:
+                logging.info(
+                    "There are changes after approve at %s",
+                    self.approved_at.isoformat(),
+                )
+                return False
+            return True
+
+        logging.info("The PR #%s is not approved", self.pr.number)
+        return False
+
+
+def parse_args() -> argparse.Namespace:
+    pr_info = PRInfo()
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="Script to merge the given PR. Additional checks for approved "
+        "status and green commit statuses could be done",
+    )
+    parser.add_argument(
+        "--check-approved",
+        action="store_true",
+        help="if set, checks that the PR is approved and no changes required",
+    )
+    parser.add_argument("--check-green", default=True, help=argparse.SUPPRESS)
+    parser.add_argument(
+        "--no-check-green",
+        dest="check_green",
+        action="store_false",
+        default=argparse.SUPPRESS,
+        help="(dangerous) if set, skip check commit to having all green statuses",
+    )
+    parser.add_argument(
+        "--repo",
+        default=pr_info.repo_full_name,
+        help="PR number to check",
+    )
+    parser.add_argument(
+        "--pr",
+        type=int,
+        default=pr_info.number,
+        help="PR number to check",
+    )
+    parser.add_argument(
+        "--token",
+        type=str,
+        default="",
+        help="a token to use for GitHub API requests, will be received from SSM "
+        "if empty",
+    )
+    args = parser.parse_args()
+    args.pr_info = pr_info
+    return args
+
+
+def main():
+    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
+    args = parse_args()
+    logging.info("Going to process PR #%s in repo %s", args.pr, args.repo)
+    token = args.token or get_best_robot_token()
+    gh = GitHub(token, per_page=100)
+    repo = gh.get_repo(args.repo)
+    # An ugly and not nice fix to patch the wrong organization URL,
+    # see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710
+    # pylint: disable=protected-access
+    repo.organization._url.value = repo.organization.url.replace(  # type: ignore
+        "/users/", "/orgs/", 1
+    )
+    # pylint: enable=protected-access
+    pr = repo.get_pull(args.pr)
+    if pr.is_merged():
+        logging.info("The PR #%s is already merged", pr.number)
+        return
+
+    not_ready_to_merge = pr.draft or "WIP" in pr.title
+    if not_ready_to_merge:
+        logging.info("The PR #%s is not ready for merge, stopping", pr.number)
+        return
+
+    if args.check_green:
+        logging.info("Checking that all PR's statuses are green")
+        commit = repo.get_commit(pr.head.sha)
+        failed_statuses = [
+            status.context
+            for status in get_commit_filtered_statuses(commit)
+            if status.state != "success"
+        ]
+        if failed_statuses:
+            logging.warning(
+                "Some statuses aren't success:\n  %s", ",\n  ".join(failed_statuses)
+            )
+            return
+
+    if args.check_approved:
+        reviews = Reviews(pr)
+        team = repo.organization.get_team_by_slug(TEAM_NAME)
+        members = list(team.get_members())
+        if not reviews.is_approved(members):
+            logging.warning("We don't merge the PR")
+            return
+
+    logging.info("Merging the PR")
+    pr.merge()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/report.py b/tests/ci/report.py
index c2d82ee38a1..da04411632d 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -3,6 +3,7 @@ from ast import literal_eval
 from dataclasses import dataclass
 from pathlib import Path
 from typing import List, Optional, Tuple
+from html import escape
 import csv
 import os
 import datetime
@@ -372,9 +373,10 @@ def create_test_html_report(
             row += "</tr>"
             rows_part += row
             if test_result.raw_logs is not None:
+                raw_logs = escape(test_result.raw_logs)
                 row = (
                     '<tr class="failed-content">'
-                    f'<td colspan="{colspan}"><pre>{test_result.raw_logs}</pre></td>'
+                    f'<td colspan="{colspan}"><pre>{raw_logs}</pre></td>'
                     "</tr>"
                 )
                 rows_part += row
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 211ddbccd53..e08028319f9 100644
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -232,13 +232,35 @@ def need_retry(args, stdout, stderr, total_time):
 
 
 def get_processlist_with_stacktraces(args):
-    if args.replicated_database:
-        return clickhouse_execute_json(
-            args,
-            """
-        SELECT materialize(hostName() || '::' || tcpPort()::String) as host_port, *
-        -- NOTE: view() here to do JOIN on shards, instead of initiator
-        FROM clusterAllReplicas('test_cluster_database_replicated', view(
+    try:
+        if args.replicated_database:
+            return clickhouse_execute_json(
+                args,
+                """
+            SELECT materialize(hostName() || '::' || tcpPort()::String) as host_port, *
+            -- NOTE: view() here to do JOIN on shards, instead of initiator
+            FROM clusterAllReplicas('test_cluster_database_replicated', view(
+                SELECT
+                    groupArray((s.thread_id, arrayStringConcat(arrayMap(
+                        x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))),
+                        s.trace), '\n') AS stacktrace
+                    )) AS stacktraces,
+                    p.*
+                FROM system.processes p
+                JOIN system.stack_trace s USING (query_id)
+                WHERE query NOT LIKE '%system.processes%'
+                GROUP BY p.*
+            ))
+            ORDER BY elapsed DESC
+            """,
+                settings={
+                    "allow_introspection_functions": 1,
+                },
+            )
+        else:
+            return clickhouse_execute_json(
+                args,
+                """
             SELECT
                 groupArray((s.thread_id, arrayStringConcat(arrayMap(
                     x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))),
@@ -249,33 +271,14 @@ def get_processlist_with_stacktraces(args):
             JOIN system.stack_trace s USING (query_id)
             WHERE query NOT LIKE '%system.processes%'
             GROUP BY p.*
-        ))
-        ORDER BY elapsed DESC
-        """,
-            settings={
-                "allow_introspection_functions": 1,
-            },
-        )
-    else:
-        return clickhouse_execute_json(
-            args,
-            """
-        SELECT
-            groupArray((s.thread_id, arrayStringConcat(arrayMap(
-                x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))),
-                s.trace), '\n') AS stacktrace
-            )) AS stacktraces,
-            p.*
-        FROM system.processes p
-        JOIN system.stack_trace s USING (query_id)
-        WHERE query NOT LIKE '%system.processes%'
-        GROUP BY p.*
-        ORDER BY elapsed DESC
-        """,
-            settings={
-                "allow_introspection_functions": 1,
-            },
-        )
+            ORDER BY elapsed DESC
+            """,
+                settings={
+                    "allow_introspection_functions": 1,
+                },
+            )
+    except Exception as e:
+        return "Failed to get processlist: " + str(e)
 
 
 def get_transactions_list(args):
@@ -1852,6 +1855,56 @@ def reportCoverage(args):
         )
     )
 
+def reportLogStats(args):
+    query = """
+        WITH
+            120 AS mins,
+            (
+                SELECT (count(), sum(length(message)))
+                FROM system.text_log
+                WHERE (now() - toIntervalMinute(mins)) < event_time
+            ) AS total
+        SELECT
+            count() AS count,
+            round(count / (total.1), 3) AS `count_%`,
+            formatReadableSize(sum(length(message))) AS size,
+            round(sum(length(message)) / (total.2), 3) AS `size_%`,
+            countDistinct(logger_name) AS uniq_loggers,
+            countDistinct(thread_id) AS uniq_threads,
+            groupArrayDistinct(toString(level)) AS levels,
+            round(sum(query_id = '') / count, 3) AS `background_%`,
+            message_format_string
+        FROM system.text_log
+        WHERE (now() - toIntervalMinute(mins)) < event_time
+        GROUP BY message_format_string
+        ORDER BY count DESC
+        LIMIT 100
+        FORMAT TSVWithNamesAndTypes
+    """
+    value = clickhouse_execute(args, query).decode()
+    print("\nTop patterns of log messages:\n")
+    print(value)
+    print("\n")
+
+    query = """
+        WITH
+            120 AS mins
+        SELECT
+            count() AS count,
+            substr(replaceRegexpAll(message, '[^A-Za-z]+', ''), 1, 32) AS pattern,
+            substr(any(message), 1, 256) as runtime_message
+        FROM system.text_log
+        WHERE (now() - toIntervalMinute(mins)) < event_time AND message_format_string = ''
+        GROUP BY pattern
+        ORDER BY count DESC
+        LIMIT 50
+        FORMAT TSVWithNamesAndTypes
+    """
+    value = clickhouse_execute(args, query).decode()
+    print("\nTop messages without format string (fmt::runtime):\n")
+    print(value)
+    print("\n")
+
 
 def main(args):
     global server_died
@@ -1986,6 +2039,9 @@ def main(args):
     else:
         print("All tests have finished.")
 
+    if args.report_logs_stats:
+        reportLogStats(args)
+
     if args.report_coverage and not reportCoverage(args):
         exit_code.value = 1
 
@@ -1993,19 +2049,22 @@ def main(args):
 
 
 def find_binary(name):
-    if os.path.exists(name) and os.access(name, os.X_OK):
-        return True
+    if os.access(name, os.X_OK):
+        return name
     paths = os.environ.get("PATH").split(":")
     for path in paths:
-        if os.access(os.path.join(path, name), os.X_OK):
-            return True
+        bin_path = os.path.join(path, name)
+        if os.access(bin_path, os.X_OK):
+            return bin_path
 
     # maybe it wasn't in PATH
-    if os.access(os.path.join("/usr/local/bin", name), os.X_OK):
-        return True
-    if os.access(os.path.join("/usr/bin", name), os.X_OK):
-        return True
-    return False
+    bin_path = os.path.join("/usr/local/bin", name)
+    if os.access(bin_path, os.X_OK):
+        return bin_path
+    bin_path = os.path.join("/usr/bin", name)
+    if os.access(bin_path, os.X_OK):
+        return bin_path
+    return None
 
 
 def get_additional_client_options(args):
@@ -2045,9 +2104,8 @@ if __name__ == "__main__":
     parser.add_argument(
         "-b",
         "--binary",
-        default="clickhouse",
-        help="Path to clickhouse"
-        "binary or name of binary in PATH",
+        default=find_binary("clickhouse"),
+        help="Path to clickhouse binary or name of binary in PATH",
     )
 
     parser.add_argument(
@@ -2275,6 +2333,12 @@ if __name__ == "__main__":
         default=False,
         help="Check what high-level server components were covered by tests",
     )
+    parser.add_argument(
+        "--report-logs-stats",
+        action="store_true",
+        default=False,
+        help="Report statistics about log messages",
+    )
     args = parser.parse_args()
 
     if args.queries and not os.path.isdir(args.queries):
@@ -2317,18 +2381,13 @@ if __name__ == "__main__":
     if args.tmp is None:
         args.tmp = args.queries
     if args.client is None:
-        if find_binary(args.binary + "-client"):
-            args.client = args.binary + "-client"
-
-            print("Using " + args.client + " as client program")
-        elif find_binary(args.binary):
+        client_bin = find_binary(args.binary + "-client")
+        if client_bin is not None:
+            args.client = client_bin
+            print("Using {args.client} as client program")
+        elif args.binary:
             args.client = args.binary + " client"
-
-            print(
-                "Using "
-                + args.client
-                + " as client program (expecting monolithic build)"
-            )
+            print(f"Using {args.client} as client program (expecting monolithic build)")
         else:
             print(
                 "No 'clickhouse' or 'clickhouse-client' client binary found",
diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py
index d0a2f2ab933..b7cb9281ba6 100644
--- a/tests/integration/test_grant_and_revoke/test.py
+++ b/tests/integration/test_grant_and_revoke/test.py
@@ -181,7 +181,7 @@ def test_grant_all_on_table():
         == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, "
         "DROP TABLE, DROP VIEW, DROP DICTIONARY, TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, "
         "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, "
-        "SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n"
+        "SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n"
     )
     instance.query("REVOKE ALL ON test.table FROM B", user="A")
     assert instance.query("SHOW GRANTS FOR B") == ""
diff --git a/tests/integration/test_merge_tree_load_parts/__init__.py b/tests/integration/test_merge_tree_load_parts/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_merge_tree_load_parts/configs/fast_background_pool.xml b/tests/integration/test_merge_tree_load_parts/configs/fast_background_pool.xml
new file mode 100644
index 00000000000..74038c5472f
--- /dev/null
+++ b/tests/integration/test_merge_tree_load_parts/configs/fast_background_pool.xml
@@ -0,0 +1,9 @@
+<clickhouse>
+    <background_processing_pool_thread_sleep_seconds>1</background_processing_pool_thread_sleep_seconds>
+    <background_processing_pool_thread_sleep_seconds_random_part>0</background_processing_pool_thread_sleep_seconds_random_part>
+    <background_processing_pool_thread_sleep_seconds_if_nothing_to_do>0.0</background_processing_pool_thread_sleep_seconds_if_nothing_to_do>
+    <background_processing_pool_task_sleep_seconds_when_no_work_min>0</background_processing_pool_task_sleep_seconds_when_no_work_min>
+    <background_processing_pool_task_sleep_seconds_when_no_work_max>1</background_processing_pool_task_sleep_seconds_when_no_work_max>
+    <background_processing_pool_task_sleep_seconds_when_no_work_multiplier>1</background_processing_pool_task_sleep_seconds_when_no_work_multiplier>
+    <background_processing_pool_task_sleep_seconds_when_no_work_random_part>0</background_processing_pool_task_sleep_seconds_when_no_work_random_part>
+</clickhouse>
diff --git a/tests/integration/test_merge_tree_load_parts/test.py b/tests/integration/test_merge_tree_load_parts/test.py
new file mode 100644
index 00000000000..777b6f14fc6
--- /dev/null
+++ b/tests/integration/test_merge_tree_load_parts/test.py
@@ -0,0 +1,196 @@
+import pytest
+import helpers.client
+import helpers.cluster
+import time
+from helpers.corrupt_part_data_on_disk import corrupt_part_data_on_disk
+
+
+cluster = helpers.cluster.ClickHouseCluster(__file__)
+node1 = cluster.add_instance(
+    "node1",
+    main_configs=["configs/fast_background_pool.xml"],
+    with_zookeeper=True,
+    stay_alive=True,
+)
+node2 = cluster.add_instance(
+    "node2",
+    main_configs=["configs/fast_background_pool.xml"],
+    with_zookeeper=True,
+    stay_alive=True,
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_merge_tree_load_parts(started_cluster):
+    node1.query(
+        """
+        CREATE TABLE mt_load_parts (pk UInt32, id UInt32, s String)
+        ENGINE = MergeTree ORDER BY id PARTITION BY pk"""
+    )
+
+    node1.query("SYSTEM STOP MERGES mt_load_parts")
+
+    for i in range(20):
+        node1.query(
+            f"INSERT INTO mt_load_parts VALUES (44, {i}, randomPrintableASCII(10))"
+        )
+
+    node1.restart_clickhouse(kill=True)
+    for i in range(1, 21):
+        assert node1.contains_in_log(f"Loading Active part 44_{i}_{i}_0")
+
+    node1.query("OPTIMIZE TABLE mt_load_parts FINAL")
+    node1.restart_clickhouse(kill=True)
+
+    node1.query("SYSTEM WAIT LOADING PARTS mt_load_parts")
+
+    assert node1.contains_in_log("Loading Active part 44_1_20")
+    for i in range(1, 21):
+        assert not node1.contains_in_log(f"Loading Active part 44_{i}_{i}_0")
+        assert node1.contains_in_log(f"Loading Outdated part 44_{i}_{i}_0")
+
+    assert node1.query("SELECT count() FROM mt_load_parts") == "20\n"
+
+    assert (
+        node1.query(
+            "SELECT count() FROM system.parts WHERE table = 'mt_load_parts' AND active"
+        )
+        == "1\n"
+    )
+
+    node1.query("ALTER TABLE mt_load_parts MODIFY SETTING old_parts_lifetime = 1")
+    node1.query("DETACH TABLE mt_load_parts")
+    node1.query("ATTACH TABLE mt_load_parts")
+
+    node1.query("SYSTEM WAIT LOADING PARTS mt_load_parts")
+
+    table_path = node1.query(
+        "SELECT data_paths[1] FROM system.tables WHERE table = 'mt_load_parts'"
+    ).strip()
+
+    part_dirs = node1.exec_in_container(["bash", "-c", f"ls {table_path}"], user="root")
+
+    part_dirs = list(
+        set(part_dirs.strip().split("\n")) - {"detached", "format_version.txt"}
+    )
+
+    MAX_RETRY = 10
+    part_dirs_ok = False
+    for _ in range(MAX_RETRY):
+        part_dirs = node1.exec_in_container(
+            ["bash", "-c", f"ls {table_path}"], user="root"
+        )
+        part_dirs = list(
+            set(part_dirs.strip().split("\n")) - {"detached", "format_version.txt"}
+        )
+        part_dirs_ok = len(part_dirs) == 1 and part_dirs[0].startswith("44_1_20")
+        if part_dirs_ok:
+            break
+        time.sleep(2)
+
+    assert part_dirs_ok
+
+
+def test_merge_tree_load_parts_corrupted(started_cluster):
+    for i, node in enumerate([node1, node2]):
+        node.query(
+            f"""
+            CREATE TABLE mt_load_parts_2 (pk UInt32, id UInt32, s String)
+            ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/mt_load_parts_2', '{i}') ORDER BY id PARTITION BY pk"""
+        )
+
+    """min-max blocks in created parts: 1_1_0, 2_2_0, 1_2_1, 3_3_0, 1_3_2"""
+    for partition in [111, 222, 333]:
+        node1.query(
+            f"INSERT INTO mt_load_parts_2 VALUES ({partition}, 0, randomPrintableASCII(10))"
+        )
+
+        node1.query(
+            f"INSERT INTO mt_load_parts_2 VALUES ({partition}, 1, randomPrintableASCII(10))"
+        )
+
+        node1.query(f"OPTIMIZE TABLE mt_load_parts_2 PARTITION {partition} FINAL")
+
+        node1.query(
+            f"INSERT INTO mt_load_parts_2 VALUES ({partition}, 2, randomPrintableASCII(10))"
+        )
+
+        node1.query(f"OPTIMIZE TABLE mt_load_parts_2 PARTITION {partition} FINAL")
+
+    node2.query("SYSTEM SYNC REPLICA mt_load_parts_2", timeout=30)
+
+    def get_part_name(node, partition, min_block, max_block):
+        return node.query(
+            f"""
+            SELECT name FROM system.parts
+            WHERE table = 'mt_load_parts_2'
+            AND partition = '{partition}'
+            AND min_block_number = {min_block}
+            AND max_block_number = {max_block}"""
+        ).strip()
+
+    corrupt_part_data_on_disk(node1, "mt_load_parts_2", get_part_name(node1, 111, 0, 2))
+    corrupt_part_data_on_disk(node1, "mt_load_parts_2", get_part_name(node1, 222, 0, 2))
+    corrupt_part_data_on_disk(node1, "mt_load_parts_2", get_part_name(node1, 222, 0, 1))
+    corrupt_part_data_on_disk(node1, "mt_load_parts_2", get_part_name(node1, 333, 0, 1))
+    corrupt_part_data_on_disk(node1, "mt_load_parts_2", get_part_name(node1, 333, 2, 2))
+
+    node1.restart_clickhouse(kill=True)
+    node1.query("SYSTEM WAIT LOADING PARTS mt_load_parts_2")
+
+    def check_parts_loading(node, partition, loaded, failed, skipped):
+        for (min_block, max_block) in loaded:
+            part_name = f"{partition}_{min_block}_{max_block}"
+            assert node.contains_in_log(f"Loading Active part {part_name}")
+            assert node.contains_in_log(f"Finished loading Active part {part_name}")
+
+        for (min_block, max_block) in failed:
+            part_name = f"{partition}_{min_block}_{max_block}"
+            assert node.contains_in_log(f"Loading Active part {part_name}")
+            assert not node.contains_in_log(f"Finished loading Active part {part_name}")
+
+        for (min_block, max_block) in skipped:
+            part_name = f"{partition}_{min_block}_{max_block}"
+            assert not node.contains_in_log(f"Loading Active part {part_name}")
+            assert not node.contains_in_log(f"Finished loading Active part {part_name}")
+
+    check_parts_loading(
+        node1, 111, loaded=[(0, 1), (2, 2)], failed=[(0, 2)], skipped=[(0, 0), (1, 1)]
+    )
+    check_parts_loading(
+        node1, 222, loaded=[(0, 0), (1, 1), (2, 2)], failed=[(0, 2), (0, 1)], skipped=[]
+    )
+    check_parts_loading(
+        node1, 333, loaded=[(0, 2)], failed=[], skipped=[(0, 0), (1, 1), (2, 2), (0, 1)]
+    )
+
+    node1.query("SYSTEM SYNC REPLICA mt_load_parts_2", timeout=30)
+    node1.query("OPTIMIZE TABLE mt_load_parts_2 FINAL")
+    node1.query("SYSTEM SYNC REPLICA mt_load_parts_2", timeout=30)
+
+    assert (
+        node1.query(
+            """
+            SELECT pk, count() FROM mt_load_parts_2
+            GROUP BY pk ORDER BY pk"""
+        )
+        == "111\t3\n222\t3\n333\t3\n"
+    )
+    assert (
+        node1.query(
+            """
+            SELECT partition, count()
+            FROM system.parts WHERE table = 'mt_load_parts_2' AND active
+            GROUP BY partition ORDER BY partition"""
+        )
+        == "111\t1\n222\t1\n333\t1\n"
+    )
diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py
index 83817ba7a3d..221d830f62e 100644
--- a/tests/integration/test_multiple_disks/test.py
+++ b/tests/integration/test_multiple_disks/test.py
@@ -1625,6 +1625,12 @@ def test_rename(start_cluster):
         """
         )
 
+        # We want to check that after inserts, some parts were moved to external disk
+        # and some parts are still on the main disk, but because of merge all parts
+        # might end up on external disk.
+        node1.query("SYSTEM STOP MERGES default.renaming_table")
+
+        # jbod1 disk is 40mb
         for _ in range(5):
             data = []
             for i in range(10):
@@ -1635,8 +1641,14 @@ def test_rename(start_cluster):
                 )
             )
 
-        disks = get_used_disks_for_table(node1, "renaming_table")
-        assert len(disks) > 1
+        # data is moved in the background, so check with retries
+        num_try = 0
+        while get_used_disks_for_table(node1, "renaming_table") == 1:
+            time.sleep(1)
+            num_try += 1
+            if num_try == 20:
+                break
+        assert len(get_used_disks_for_table(node1, "renaming_table")) > 1
         assert node1.query("SELECT COUNT() FROM default.renaming_table") == "50\n"
 
         node1.query("RENAME TABLE default.renaming_table TO default.renaming_table1")
diff --git a/tests/integration/test_transactions/test.py b/tests/integration/test_transactions/test.py
index 7902d168707..a12d30915dd 100644
--- a/tests/integration/test_transactions/test.py
+++ b/tests/integration/test_transactions/test.py
@@ -26,7 +26,7 @@ def tx(session, query):
 
 def test_rollback_unfinished_on_restart1(start_cluster):
     node.query(
-        "create table mt (n int, m int) engine=MergeTree order by n partition by n % 2"
+        "create table mt (n int, m int) engine=MergeTree order by n partition by n % 2 settings remove_empty_parts = 0"
     )
     node.query("insert into mt values (1, 10), (2, 20)")
     tid0 = "(1,1,'00000000-0000-0000-0000-000000000000')"
@@ -82,6 +82,7 @@ def test_rollback_unfinished_on_restart1(start_cluster):
     ).strip()
 
     node.restart_clickhouse(kill=True)
+    node.query("SYSTEM WAIT LOADING PARTS mt")
 
     assert (
         node.query("select *, _part from mt order by n")
@@ -116,7 +117,7 @@ def test_rollback_unfinished_on_restart1(start_cluster):
 
 def test_rollback_unfinished_on_restart2(start_cluster):
     node.query(
-        "create table mt2 (n int, m int) engine=MergeTree order by n partition by n % 2"
+        "create table mt2 (n int, m int) engine=MergeTree order by n partition by n % 2 settings remove_empty_parts = 0"
     )
     node.query("insert into mt2 values (1, 10), (2, 20)")
     tid0 = "(1,1,'00000000-0000-0000-0000-000000000000')"
@@ -171,6 +172,7 @@ def test_rollback_unfinished_on_restart2(start_cluster):
     ).strip()
 
     node.restart_clickhouse(kill=True)
+    node.query("SYSTEM WAIT LOADING PARTS mt2")
 
     assert (
         node.query("select *, _part from mt2 order by n")
diff --git a/tests/queries/0_stateless/00315_quantile_off_by_one.reference b/tests/queries/0_stateless/00315_quantile_off_by_one.reference
index 97ccf188bf4..9b9ab8b3532 100644
--- a/tests/queries/0_stateless/00315_quantile_off_by_one.reference
+++ b/tests/queries/0_stateless/00315_quantile_off_by_one.reference
@@ -1,3 +1,4 @@
 10	[1,1,1,1,10,10,10,10,100,100,100]
+10	[1,1,2,4,7,10,35,61,87,100,100]
 100	100
 61	61
diff --git a/tests/queries/0_stateless/00315_quantile_off_by_one.sql b/tests/queries/0_stateless/00315_quantile_off_by_one.sql
index a1c162eaefb..50383183d5e 100644
--- a/tests/queries/0_stateless/00315_quantile_off_by_one.sql
+++ b/tests/queries/0_stateless/00315_quantile_off_by_one.sql
@@ -1,3 +1,4 @@
 SELECT quantileExactWeighted(0.5)(x, 1) AS q5, quantilesExactWeighted(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)(x, 1) AS qs FROM (SELECT arrayJoin([1, 1, 1, 10, 10, 10, 10, 100, 100, 100]) AS x);
+SELECT quantileInterpolatedWeighted(0.5)(x, 1) AS q5, quantilesInterpolatedWeighted(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)(x, 1) AS qs FROM (SELECT arrayJoin([1, 1, 1, 10, 10, 10, 10, 100, 100, 100]) AS x);
 SELECT quantileExact(0)(x), quantileTiming(0)(x) FROM (SELECT number + 100 AS x FROM system.numbers LIMIT 10000);
 SELECT quantileExact(x), quantileTiming(x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 10000);
diff --git a/tests/queries/0_stateless/00647_histogram_negative.reference b/tests/queries/0_stateless/00647_histogram_negative.reference
new file mode 100644
index 00000000000..c4d6fd93024
--- /dev/null
+++ b/tests/queries/0_stateless/00647_histogram_negative.reference
@@ -0,0 +1 @@
+[(-1,-1,2)]
diff --git a/tests/queries/0_stateless/00647_histogram_negative.sql b/tests/queries/0_stateless/00647_histogram_negative.sql
new file mode 100644
index 00000000000..2112f93bca9
--- /dev/null
+++ b/tests/queries/0_stateless/00647_histogram_negative.sql
@@ -0,0 +1,6 @@
+drop table if exists histogram;
+create table histogram(num Int64) engine=TinyLog;
+insert into histogram values(-1);
+insert into histogram values(-1);
+select histogram(2)(num) from histogram;
+drop table if exists histogram;
diff --git a/tests/queries/0_stateless/00753_quantile_format.reference b/tests/queries/0_stateless/00753_quantile_format.reference
index cd662caf5c7..2b267b640a0 100644
--- a/tests/queries/0_stateless/00753_quantile_format.reference
+++ b/tests/queries/0_stateless/00753_quantile_format.reference
@@ -6,6 +6,8 @@
 ['2016-06-15 23:00:00']
 2016-06-15 23:00:00
 ['2016-06-15 23:00:00']
+2016-06-15 23:00:00
+['2016-06-15 23:00:00']
 30000
 [30000]
 30000
diff --git a/tests/queries/0_stateless/00753_quantile_format.sql b/tests/queries/0_stateless/00753_quantile_format.sql
index 8caa3f31368..fc3358ae271 100644
--- a/tests/queries/0_stateless/00753_quantile_format.sql
+++ b/tests/queries/0_stateless/00753_quantile_format.sql
@@ -15,6 +15,9 @@ SELECT quantilesExact(0.2)(d) FROM datetime;
 SELECT quantileExactWeighted(0.2)(d, 1) FROM datetime;
 SELECT quantilesExactWeighted(0.2)(d, 1) FROM datetime;
 
+SELECT quantileInterpolatedWeighted(0.2)(d, 1) FROM datetime;
+SELECT quantilesInterpolatedWeighted(0.2)(d, 1) FROM datetime;
+
 SELECT quantileTiming(0.2)(d) FROM datetime;
 SELECT quantilesTiming(0.2)(d) FROM datetime;
 
diff --git a/tests/queries/0_stateless/01256_negative_generate_random.sql b/tests/queries/0_stateless/01256_negative_generate_random.sql
index 11c169edd75..14f1d947108 100644
--- a/tests/queries/0_stateless/01256_negative_generate_random.sql
+++ b/tests/queries/0_stateless/01256_negative_generate_random.sql
@@ -1,5 +1,5 @@
 SELECT * FROM generateRandom('i8', 1, 10, 10); -- { serverError 62 }
 SELECT * FROM generateRandom; -- { serverError 60 }
-SELECT * FROM generateRandom(); -- { serverError 42 }
+SELECT * FROM generateRandom(); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE }
 SELECT * FROM generateRandom('i8 UInt8', 1, 10, 10, 10, 10); -- { serverError 42 }
 SELECT * FROM generateRandom('', 1, 10, 10); -- { serverError 62 }
diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference
index f2c3e8eda9d..f088cfaf00c 100644
--- a/tests/queries/0_stateless/01271_show_privileges.reference
+++ b/tests/queries/0_stateless/01271_show_privileges.reference
@@ -121,6 +121,7 @@ SYSTEM DROP REPLICA	['DROP REPLICA']	TABLE	SYSTEM
 SYSTEM SYNC REPLICA	['SYNC REPLICA']	TABLE	SYSTEM
 SYSTEM RESTART REPLICA	['RESTART REPLICA']	TABLE	SYSTEM
 SYSTEM RESTORE REPLICA	['RESTORE REPLICA']	TABLE	SYSTEM
+SYSTEM WAIT LOADING PARTS	['WAIT LOADING PARTS']	TABLE	SYSTEM
 SYSTEM SYNC DATABASE REPLICA	['SYNC DATABASE REPLICA']	DATABASE	SYSTEM
 SYSTEM SYNC TRANSACTION LOG	['SYNC TRANSACTION LOG']	GLOBAL	SYSTEM
 SYSTEM FLUSH DISTRIBUTED	['FLUSH DISTRIBUTED']	TABLE	SYSTEM FLUSH
diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
index b32ad433730..c69f8bb2c46 100644
--- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
+++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
@@ -1,6 +1,15 @@
 [0,1,2]
 [0,1,2]
 [0,1,2]
+[0,1,2]
+[0,1,2]
+[0,1,2]
+0	0	0	0
+0	1	1	1
+2	2	2	2
+3	3	3	3
+4	0		0
+5	0		0
 0	0	0	0
 0	1	1	1
 2	2	2	2
@@ -15,6 +24,14 @@
 1	1
 2	2
 3	3
+0	0
+1	1
+2	2
+3	3
+0	0
+1	1
+2	2
+3	3
 SELECT groupArray(x)
 FROM
 (
@@ -22,6 +39,32 @@ FROM
     FROM numbers(3)
     ORDER BY x ASC
 )
+QUERY id: 0
+  PROJECTION COLUMNS
+    groupArray(x) Array(UInt64)
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64)
+        ARGUMENTS
+          LIST id: 3, nodes: 1
+            COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5
+  JOIN TREE
+    QUERY id: 5, is_subquery: 1
+      PROJECTION COLUMNS
+        x UInt64
+      PROJECTION
+        LIST id: 6, nodes: 1
+          COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
+      JOIN TREE
+        TABLE_FUNCTION id: 8, table_function_name: numbers
+          ARGUMENTS
+            LIST id: 9, nodes: 1
+              CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
+      ORDER BY
+        LIST id: 11, nodes: 1
+          SORT id: 12, sort_direction: ASCENDING, with_fill: 0
+            EXPRESSION
+              COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
 SELECT groupArray(x)
 FROM
 (
@@ -29,6 +72,32 @@ FROM
     FROM numbers(3)
     ORDER BY x ASC
 )
+QUERY id: 0
+  PROJECTION COLUMNS
+    groupArray(x) Array(UInt64)
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64)
+        ARGUMENTS
+          LIST id: 3, nodes: 1
+            COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5
+  JOIN TREE
+    QUERY id: 5, is_subquery: 1
+      PROJECTION COLUMNS
+        x UInt64
+      PROJECTION
+        LIST id: 6, nodes: 1
+          COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
+      JOIN TREE
+        TABLE_FUNCTION id: 8, table_function_name: numbers
+          ARGUMENTS
+            LIST id: 9, nodes: 1
+              CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
+      ORDER BY
+        LIST id: 11, nodes: 1
+          SORT id: 12, sort_direction: ASCENDING, with_fill: 0
+            EXPRESSION
+              COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
 SELECT groupArray(x)
 FROM
 (
@@ -38,6 +107,38 @@ FROM
         exp(x) ASC,
         x ASC
 )
+QUERY id: 0
+  PROJECTION COLUMNS
+    groupArray(x) Array(UInt64)
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64)
+        ARGUMENTS
+          LIST id: 3, nodes: 1
+            COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5
+  JOIN TREE
+    QUERY id: 5, is_subquery: 1
+      PROJECTION COLUMNS
+        x UInt64
+      PROJECTION
+        LIST id: 6, nodes: 1
+          COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
+      JOIN TREE
+        TABLE_FUNCTION id: 8, table_function_name: numbers
+          ARGUMENTS
+            LIST id: 9, nodes: 1
+              CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
+      ORDER BY
+        LIST id: 11, nodes: 2
+          SORT id: 12, sort_direction: ASCENDING, with_fill: 0
+            EXPRESSION
+              FUNCTION id: 13, function_name: exp, function_type: ordinary, result_type: Float64
+                ARGUMENTS
+                  LIST id: 14, nodes: 1
+                    COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
+          SORT id: 15, sort_direction: ASCENDING, with_fill: 0
+            EXPRESSION
+              COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
 SELECT
     key,
     a,
@@ -52,6 +153,53 @@ ALL FULL OUTER JOIN test AS t USING (key)
 ORDER BY
     key ASC,
     t.key ASC
+QUERY id: 0
+  PROJECTION COLUMNS
+    key UInt64
+    a UInt8
+    b String
+    c Float64
+  PROJECTION
+    LIST id: 1, nodes: 4
+      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 5
+      COLUMN id: 6, column_name: b, result_type: String, source_id: 5
+      COLUMN id: 7, column_name: c, result_type: Float64, source_id: 5
+  JOIN TREE
+    JOIN id: 8, strictness: ALL, kind: FULL
+      LEFT TABLE EXPRESSION
+        QUERY id: 3, alias: s, is_subquery: 1
+          PROJECTION COLUMNS
+            key UInt64
+          PROJECTION
+            LIST id: 9, nodes: 1
+              FUNCTION id: 10, function_name: plus, function_type: ordinary, result_type: UInt64
+                ARGUMENTS
+                  LIST id: 11, nodes: 2
+                    COLUMN id: 12, column_name: number, result_type: UInt64, source_id: 13
+                    CONSTANT id: 14, constant_value: UInt64_2, constant_value_type: UInt8
+          JOIN TREE
+            TABLE_FUNCTION id: 13, table_function_name: numbers
+              ARGUMENTS
+                LIST id: 15, nodes: 1
+                  CONSTANT id: 16, constant_value: UInt64_4, constant_value_type: UInt8
+      RIGHT TABLE EXPRESSION
+        TABLE id: 5, alias: t, table_name: default.test
+      JOIN EXPRESSION
+        LIST id: 17, nodes: 1
+          COLUMN id: 18, column_name: key, result_type: UInt64, source_id: 8
+            EXPRESSION
+              LIST id: 19, nodes: 2
+                COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+                COLUMN id: 20, column_name: key, result_type: UInt64, source_id: 5
+  ORDER BY
+    LIST id: 21, nodes: 2
+      SORT id: 22, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 23, column_name: key, result_type: UInt64, source_id: 3
+      SORT id: 24, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 25, column_name: key, result_type: UInt64, source_id: 5
 SELECT
     key,
     a
@@ -59,6 +207,24 @@ FROM test
 ORDER BY
     key ASC,
     a ASC
+QUERY id: 0
+  PROJECTION COLUMNS
+    key UInt64
+    a UInt8
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.test
+  ORDER BY
+    LIST id: 5, nodes: 2
+      SORT id: 6, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+      SORT id: 7, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3
 SELECT
     key,
     a
@@ -66,6 +232,81 @@ FROM test
 ORDER BY
     key ASC,
     exp(key + a) ASC
+QUERY id: 0
+  PROJECTION COLUMNS
+    key UInt64
+    a UInt8
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.test
+  ORDER BY
+    LIST id: 5, nodes: 2
+      SORT id: 6, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+      SORT id: 7, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          FUNCTION id: 8, function_name: exp, function_type: ordinary, result_type: Float64
+            ARGUMENTS
+              LIST id: 9, nodes: 1
+                FUNCTION id: 10, function_name: plus, function_type: ordinary, result_type: UInt64
+                  ARGUMENTS
+                    LIST id: 11, nodes: 2
+                      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+                      COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3
+QUERY id: 0
+  PROJECTION COLUMNS
+    key UInt64
+  PROJECTION
+    LIST id: 1, nodes: 1
+      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.test
+  GROUP BY
+    LIST id: 4, nodes: 1
+      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+  ORDER BY
+    LIST id: 5, nodes: 2
+      SORT id: 6, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          FUNCTION id: 7, function_name: avg, function_type: aggregate, result_type: Float64
+            ARGUMENTS
+              LIST id: 8, nodes: 1
+                COLUMN id: 9, column_name: a, result_type: UInt8, source_id: 3
+      SORT id: 10, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+QUERY id: 0
+  PROJECTION COLUMNS
+    t1.id UInt64
+    t2.id UInt64
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5
+  JOIN TREE
+    JOIN id: 6, strictness: ALL, kind: INNER
+      LEFT TABLE EXPRESSION
+        TABLE id: 3, table_name: default.t1
+      RIGHT TABLE EXPRESSION
+        TABLE id: 5, table_name: default.t2
+      JOIN EXPRESSION
+        FUNCTION id: 7, function_name: equals, function_type: ordinary, result_type: UInt8
+          ARGUMENTS
+            LIST id: 8, nodes: 2
+              COLUMN id: 9, column_name: id, result_type: UInt64, source_id: 3
+              COLUMN id: 10, column_name: id, result_type: UInt64, source_id: 5
+  ORDER BY
+    LIST id: 11, nodes: 2
+      SORT id: 12, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 13, column_name: id, result_type: UInt64, source_id: 3
+      SORT id: 14, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 15, column_name: id, result_type: UInt64, source_id: 5
 [0,1,2]
 [0,1,2]
 [0,1,2]
diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
index c810567f73a..5cdc4164d56 100644
--- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
+++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
@@ -6,17 +6,37 @@ INSERT INTO test SELECT number, number, toString(number), number from numbers(4)
 set optimize_redundant_functions_in_order_by = 1;
 
 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x));
+SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)) SETTINGS allow_experimental_analyzer=1;
 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x)));
+SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))) SETTINGS allow_experimental_analyzer=1;
 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x);
+SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x) SETTINGS allow_experimental_analyzer=1;
 SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key;
+SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key SETTINGS allow_experimental_analyzer=1;
 SELECT key, a FROM test ORDER BY key, a, exp(key + a);
+SELECT key, a FROM test ORDER BY key, a, exp(key + a) SETTINGS allow_experimental_analyzer=1;
 SELECT key, a FROM test ORDER BY key, exp(key + a);
+SELECT key, a FROM test ORDER BY key, exp(key + a) SETTINGS allow_experimental_analyzer=1;
 EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x));
+EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x));
 EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x)));
+EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x)));
 EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x);
+EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x);
 EXPLAIN SYNTAX SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key;
+EXPLAIN QUERY TREE run_passes=1 SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key;
 EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, a, exp(key + a);
+EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, a, exp(key + a);
 EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a);
+EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, exp(key + a);
+EXPLAIN QUERY TREE run_passes=1 SELECT key FROM test GROUP BY key ORDER BY avg(a), key;
+
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t1 (id UInt64) ENGINE = MergeTree() ORDER BY id;
+CREATE TABLE t2 (id UInt64) ENGINE = MergeTree() ORDER BY id;
+
+EXPLAIN QUERY TREE run_passes=1 SELECT * FROM t1 INNER JOIN t2 ON t1.id = t2.id ORDER BY t1.id, t2.id;
 
 set optimize_redundant_functions_in_order_by = 0;
 
@@ -33,4 +53,6 @@ EXPLAIN SYNTAX SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL J
 EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, a, exp(key + a);
 EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a);
 
+DROP TABLE t1;
+DROP TABLE t2;
 DROP TABLE test;
diff --git a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference
index f3be69d3279..b0d8284faa5 100644
--- a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference
+++ b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference
@@ -1,7 +1,7 @@
 <Warning> DistributedSink: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done.
 <Warning> DistributedSink: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done.
-<Warning> default.dist_01683.DirectoryMonitor: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done
-<Warning> default.dist_01683.DirectoryMonitor: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done
+<Warning> default.dist_01683.DirectoryMonitor.default: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done
+<Warning> default.dist_01683.DirectoryMonitor.default: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done
 1
 1
 2
diff --git a/tests/queries/0_stateless/02028_create_select_settings.sql b/tests/queries/0_stateless/02028_create_select_settings.sql
index 5275d335f53..35102dd68c7 100644
--- a/tests/queries/0_stateless/02028_create_select_settings.sql
+++ b/tests/queries/0_stateless/02028_create_select_settings.sql
@@ -1 +1,3 @@
+-- Tags: no-ordinary-database
+
 create table test_table engine MergeTree order by a as select a_table.a, b_table.b_arr from (select arrayJoin(range(10000)) as a) a_table cross join (select range(10000) as b_arr) b_table settings max_memory_usage = 1; -- { serverError MEMORY_LIMIT_EXCEEDED }
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index c866f3e7b52..9e065c455e9 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -286,7 +286,7 @@ CREATE TABLE system.grants
 (
     `user_name` Nullable(String),
     `role_name` Nullable(String),
-    `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM SYNC DATABASE REPLICA' = 123, 'SYSTEM SYNC TRANSACTION LOG' = 124, 'SYSTEM FLUSH DISTRIBUTED' = 125, 'SYSTEM FLUSH LOGS' = 126, 'SYSTEM FLUSH' = 127, 'SYSTEM THREAD FUZZER' = 128, 'SYSTEM UNFREEZE' = 129, 'SYSTEM' = 130, 'dictGet' = 131, 'addressToLine' = 132, 'addressToLineWithInlines' = 133, 'addressToSymbol' = 134, 'demangle' = 135, 'INTROSPECTION' = 136, 'FILE' = 137, 'URL' = 138, 'REMOTE' = 139, 'MONGO' = 140, 'MEILISEARCH' = 141, 'MYSQL' = 142, 'POSTGRES' = 143, 'SQLITE' = 144, 'ODBC' = 145, 'JDBC' = 146, 'HDFS' = 147, 'S3' = 148, 'HIVE' = 149, 'SOURCES' = 150, 'CLUSTER' = 151, 'ALL' = 152, 'NONE' = 153),
+    `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM WAIT LOADING PARTS' = 123, 'SYSTEM SYNC DATABASE REPLICA' = 124, 'SYSTEM SYNC TRANSACTION LOG' = 125, 'SYSTEM FLUSH DISTRIBUTED' = 126, 'SYSTEM FLUSH LOGS' = 127, 'SYSTEM FLUSH' = 128, 'SYSTEM THREAD FUZZER' = 129, 'SYSTEM UNFREEZE' = 130, 'SYSTEM' = 131, 'dictGet' = 132, 'addressToLine' = 133, 'addressToLineWithInlines' = 134, 'addressToSymbol' = 135, 'demangle' = 136, 'INTROSPECTION' = 137, 'FILE' = 138, 'URL' = 139, 'REMOTE' = 140, 'MONGO' = 141, 'MEILISEARCH' = 142, 'MYSQL' = 143, 'POSTGRES' = 144, 'SQLITE' = 145, 'ODBC' = 146, 'JDBC' = 147, 'HDFS' = 148, 'S3' = 149, 'HIVE' = 150, 'SOURCES' = 151, 'CLUSTER' = 152, 'ALL' = 153, 'NONE' = 154),
     `database` Nullable(String),
     `table` Nullable(String),
     `column` Nullable(String),
@@ -567,10 +567,10 @@ ENGINE = SystemPartsColumns
 COMMENT 'SYSTEM TABLE is built on the fly.'
 CREATE TABLE system.privileges
 (
-    `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM SYNC DATABASE REPLICA' = 123, 'SYSTEM SYNC TRANSACTION LOG' = 124, 'SYSTEM FLUSH DISTRIBUTED' = 125, 'SYSTEM FLUSH LOGS' = 126, 'SYSTEM FLUSH' = 127, 'SYSTEM THREAD FUZZER' = 128, 'SYSTEM UNFREEZE' = 129, 'SYSTEM' = 130, 'dictGet' = 131, 'addressToLine' = 132, 'addressToLineWithInlines' = 133, 'addressToSymbol' = 134, 'demangle' = 135, 'INTROSPECTION' = 136, 'FILE' = 137, 'URL' = 138, 'REMOTE' = 139, 'MONGO' = 140, 'MEILISEARCH' = 141, 'MYSQL' = 142, 'POSTGRES' = 143, 'SQLITE' = 144, 'ODBC' = 145, 'JDBC' = 146, 'HDFS' = 147, 'S3' = 148, 'HIVE' = 149, 'SOURCES' = 150, 'CLUSTER' = 151, 'ALL' = 152, 'NONE' = 153),
+    `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM WAIT LOADING PARTS' = 123, 'SYSTEM SYNC DATABASE REPLICA' = 124, 'SYSTEM SYNC TRANSACTION LOG' = 125, 'SYSTEM FLUSH DISTRIBUTED' = 126, 'SYSTEM FLUSH LOGS' = 127, 'SYSTEM FLUSH' = 128, 'SYSTEM THREAD FUZZER' = 129, 'SYSTEM UNFREEZE' = 130, 'SYSTEM' = 131, 'dictGet' = 132, 'addressToLine' = 133, 'addressToLineWithInlines' = 134, 'addressToSymbol' = 135, 'demangle' = 136, 'INTROSPECTION' = 137, 'FILE' = 138, 'URL' = 139, 'REMOTE' = 140, 'MONGO' = 141, 'MEILISEARCH' = 142, 'MYSQL' = 143, 'POSTGRES' = 144, 'SQLITE' = 145, 'ODBC' = 146, 'JDBC' = 147, 'HDFS' = 148, 'S3' = 149, 'HIVE' = 150, 'SOURCES' = 151, 'CLUSTER' = 152, 'ALL' = 153, 'NONE' = 154),
     `aliases` Array(String),
     `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5)),
-    `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM SYNC DATABASE REPLICA' = 123, 'SYSTEM SYNC TRANSACTION LOG' = 124, 'SYSTEM FLUSH DISTRIBUTED' = 125, 'SYSTEM FLUSH LOGS' = 126, 'SYSTEM FLUSH' = 127, 'SYSTEM THREAD FUZZER' = 128, 'SYSTEM UNFREEZE' = 129, 'SYSTEM' = 130, 'dictGet' = 131, 'addressToLine' = 132, 'addressToLineWithInlines' = 133, 'addressToSymbol' = 134, 'demangle' = 135, 'INTROSPECTION' = 136, 'FILE' = 137, 'URL' = 138, 'REMOTE' = 139, 'MONGO' = 140, 'MEILISEARCH' = 141, 'MYSQL' = 142, 'POSTGRES' = 143, 'SQLITE' = 144, 'ODBC' = 145, 'JDBC' = 146, 'HDFS' = 147, 'S3' = 148, 'HIVE' = 149, 'SOURCES' = 150, 'CLUSTER' = 151, 'ALL' = 152, 'NONE' = 153))
+    `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM WAIT LOADING PARTS' = 123, 'SYSTEM SYNC DATABASE REPLICA' = 124, 'SYSTEM SYNC TRANSACTION LOG' = 125, 'SYSTEM FLUSH DISTRIBUTED' = 126, 'SYSTEM FLUSH LOGS' = 127, 'SYSTEM FLUSH' = 128, 'SYSTEM THREAD FUZZER' = 129, 'SYSTEM UNFREEZE' = 130, 'SYSTEM' = 131, 'dictGet' = 132, 'addressToLine' = 133, 'addressToLineWithInlines' = 134, 'addressToSymbol' = 135, 'demangle' = 136, 'INTROSPECTION' = 137, 'FILE' = 138, 'URL' = 139, 'REMOTE' = 140, 'MONGO' = 141, 'MEILISEARCH' = 142, 'MYSQL' = 143, 'POSTGRES' = 144, 'SQLITE' = 145, 'ODBC' = 146, 'JDBC' = 147, 'HDFS' = 148, 'S3' = 149, 'HIVE' = 150, 'SOURCES' = 151, 'CLUSTER' = 152, 'ALL' = 153, 'NONE' = 154))
 )
 ENGINE = SystemPrivileges
 COMMENT 'SYSTEM TABLE is built on the fly.'
diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh
index 6044177ad76..237bbe9edd9 100755
--- a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh
+++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long, distributed
+# Tags: long, distributed, no-tsan
 
 # These tests don't use `current_database = currentDatabase()` condition, because database name isn't propagated during remote queries.
 
diff --git a/tests/queries/0_stateless/02319_quantile_interpolated_weighted.reference b/tests/queries/0_stateless/02319_quantile_interpolated_weighted.reference
new file mode 100644
index 00000000000..88919ca8aad
--- /dev/null
+++ b/tests/queries/0_stateless/02319_quantile_interpolated_weighted.reference
@@ -0,0 +1,12 @@
+quantileInterpolatedWeighted
+0	0	0	Decimal(38, 8)
+-25.5	-8.49999999	-5.1	Decimal(38, 8)
+0	0	0
+9.7	3.23333333	1.94
+19.9	6.63333332	3.98
+30.1	10.03333333	6.02
+40.3	13.43333332	8.06
+50	16.66666666	10
+[-50,-40.4,-30.3,-20.2,-10.1,0,10.1,20.2,30.3,40.4,50]
+[-16.66666666,-13.46666666,-10.09999999,-6.73333332,-3.36666666,0,3.36666666,6.73333332,10.09999999,13.46666666,16.66666666]
+[-10,-8.08,-6.06,-4.04,-2.02,0,2.02,4.04,6.06,8.08,10]
diff --git a/tests/queries/0_stateless/02319_quantile_interpolated_weighted.sql b/tests/queries/0_stateless/02319_quantile_interpolated_weighted.sql
new file mode 100644
index 00000000000..e2da1de9bbf
--- /dev/null
+++ b/tests/queries/0_stateless/02319_quantile_interpolated_weighted.sql
@@ -0,0 +1,27 @@
+DROP TABLE IF EXISTS decimal;
+
+CREATE TABLE decimal
+(
+    a Decimal32(4),
+    b Decimal64(8),
+    c Decimal128(8)
+) ENGINE = Memory;
+
+INSERT INTO decimal (a, b, c)
+SELECT toDecimal32(number - 50, 4), toDecimal64(number - 50, 8) / 3, toDecimal128(number - 50, 8) / 5
+FROM system.numbers LIMIT 101;
+
+SELECT 'quantileInterpolatedWeighted';
+SELECT medianInterpolatedWeighted(a, 1), medianInterpolatedWeighted(b, 2), medianInterpolatedWeighted(c, 3) as x, toTypeName(x) FROM decimal;
+SELECT quantileInterpolatedWeighted(a, 1), quantileInterpolatedWeighted(b, 2), quantileInterpolatedWeighted(c, 3) as x, toTypeName(x) FROM decimal WHERE a < 0;
+SELECT quantileInterpolatedWeighted(0.0)(a, 1), quantileInterpolatedWeighted(0.0)(b, 2), quantileInterpolatedWeighted(0.0)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantileInterpolatedWeighted(0.2)(a, 1), quantileInterpolatedWeighted(0.2)(b, 2), quantileInterpolatedWeighted(0.2)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantileInterpolatedWeighted(0.4)(a, 1), quantileInterpolatedWeighted(0.4)(b, 2), quantileInterpolatedWeighted(0.4)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantileInterpolatedWeighted(0.6)(a, 1), quantileInterpolatedWeighted(0.6)(b, 2), quantileInterpolatedWeighted(0.6)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantileInterpolatedWeighted(0.8)(a, 1), quantileInterpolatedWeighted(0.8)(b, 2), quantileInterpolatedWeighted(0.8)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantileInterpolatedWeighted(1.0)(a, 1), quantileInterpolatedWeighted(1.0)(b, 2), quantileInterpolatedWeighted(1.0)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantilesInterpolatedWeighted(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(a, 1) FROM decimal;
+SELECT quantilesInterpolatedWeighted(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(b, 2) FROM decimal;
+SELECT quantilesInterpolatedWeighted(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(c, 3) FROM decimal;
+
+DROP TABLE IF EXISTS decimal;
diff --git a/tests/queries/0_stateless/02421_explain_subquery.reference b/tests/queries/0_stateless/02421_explain_subquery.reference
index c18b4e9b082..58501cbd0fc 100644
--- a/tests/queries/0_stateless/02421_explain_subquery.reference
+++ b/tests/queries/0_stateless/02421_explain_subquery.reference
@@ -6,3 +6,11 @@
 1
 1
 1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/tests/queries/0_stateless/02421_explain_subquery.sql b/tests/queries/0_stateless/02421_explain_subquery.sql
index af80e51bca3..9f83b2f56c7 100644
--- a/tests/queries/0_stateless/02421_explain_subquery.sql
+++ b/tests/queries/0_stateless/02421_explain_subquery.sql
@@ -1,3 +1,5 @@
+SET allow_experimental_analyzer = 0;
+
 SELECT count() > 3 FROM (EXPLAIN PIPELINE header = 1 SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain LIKE '%Header: number UInt64%';
 SELECT count() > 0 FROM (EXPLAIN PLAN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%';
 SELECT count() > 0 FROM (EXPLAIN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%';
@@ -15,9 +17,69 @@ SELECT * FROM (
     )
 ) FORMAT Null;
 
+SELECT (EXPLAIN SYNTAX oneline = 1 SELECT 1) == 'SELECT 1';
+
+SELECT * FROM viewExplain('', ''); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM viewExplain('EXPLAIN AST', ''); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM viewExplain('EXPLAIN AST', '', 1); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM viewExplain('EXPLAIN AST', '', ''); -- { serverError BAD_ARGUMENTS }
+
 CREATE TABLE t1 ( a UInt64 ) Engine = MergeTree ORDER BY tuple() AS SELECT number AS a FROM system.numbers LIMIT 100000;
 
 SELECT rows > 1000 FROM (EXPLAIN ESTIMATE SELECT sum(a) FROM t1);
 SELECT count() == 1 FROM (EXPLAIN ESTIMATE SELECT sum(a) FROM t1);
 
 DROP TABLE IF EXISTS t1;
+
+SET allow_experimental_analyzer = 1;
+
+SELECT count() > 3 FROM (EXPLAIN PIPELINE header = 1 SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain LIKE '%Header: system.numbers.number__ UInt64%';
+SELECT count() > 0 FROM (EXPLAIN PLAN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%';
+SELECT count() > 0 FROM (EXPLAIN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%';
+SELECT count() > 0 FROM (EXPLAIN CURRENT TRANSACTION);
+SELECT count() == 1 FROM (EXPLAIN SYNTAX SELECT number FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE 'SELECT%';
+
+-- We have `Identifier number` instead of `Asterisk` because query argument of `viewExplain` table function was analyzed.
+-- Compare:
+-- :) EXPLAIN AST SELECT *;
+-- ┌─explain───────────────────────────┐
+-- │ SelectWithUnionQuery (children 1) │
+-- │  ExpressionList (children 1)      │
+-- │   SelectQuery (children 1)        │
+-- │    ExpressionList (children 1)    │
+-- │     Asterisk                      │
+-- └───────────────────────────────────┘
+-- :) SELECT * FROM (EXPLAIN AST SELECT *);
+-- ┌─explain─────────────────────────────────────┐
+-- │ SelectWithUnionQuery (children 1)           │
+-- │  ExpressionList (children 1)                │
+-- │   SelectQuery (children 2)                  │
+-- │    ExpressionList (children 1)              │
+-- │     Identifier dummy                        │
+-- │    TablesInSelectQuery (children 1)         │
+-- │     TablesInSelectQueryElement (children 1) │
+-- │      TableExpression (children 1)           │
+-- │       TableIdentifier system.one            │
+-- └─────────────────────────────────────────────┘
+-- TODO: argument of `viewExplain` (and subquery in `EXAPLAN ...`) should not be analyzed.
+-- See _Support query tree in table functions_ in https://github.com/ClickHouse/ClickHouse/issues/42648
+SELECT trim(explain) == 'Identifier number' FROM (EXPLAIN AST SELECT * FROM system.numbers LIMIT 10) WHERE explain LIKE '%Identifier number%';
+
+SELECT * FROM (
+    EXPLAIN AST SELECT * FROM (
+        EXPLAIN PLAN SELECT * FROM (
+            EXPLAIN SYNTAX SELECT trim(explain) == 'Asterisk' FROM (
+                EXPLAIN AST SELECT * FROM system.numbers LIMIT 10
+            ) WHERE explain LIKE '%Asterisk%'
+        )
+    )
+) FORMAT Null;
+
+SELECT (EXPLAIN SYNTAX oneline = 1 SELECT 1) == 'SELECT 1 FROM system.one';
+
+SELECT * FROM viewExplain('', ''); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM viewExplain('EXPLAIN AST', ''); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM viewExplain('EXPLAIN AST', '', 1); -- { serverError BAD_ARGUMENTS }
+SELECT * FROM viewExplain('EXPLAIN AST', '', ''); -- { serverError BAD_ARGUMENTS }
+
+-- EXPLAIN ESTIMATE is not supported in experimental analyzer
diff --git a/tests/queries/0_stateless/02433_default_expression_operator_in.reference b/tests/queries/0_stateless/02433_default_expression_operator_in.reference
new file mode 100644
index 00000000000..0039a1e75da
--- /dev/null
+++ b/tests/queries/0_stateless/02433_default_expression_operator_in.reference
@@ -0,0 +1,2 @@
+CREATE TABLE default.dep\n(\n    `id` Int32,\n    `country` LowCardinality(String),\n    `purchase_location` UInt16 MATERIALIZED if(id IN joinGet(\'default.id_join\', \'location\', \'CLICK\'), 123, 456)\n)\nENGINE = ReplicatedMergeTree(\'/test/02433/default/dep\', \'1\')\nORDER BY tuple()\nSETTINGS index_granularity = 8192
+CREATE TABLE default.dep2\n(\n    `id` Int32,\n    `country` LowCardinality(String),\n    `purchase_location` UInt16 MATERIALIZED if(id IN joinGet(\'default.id_join\', \'location\', \'CLICK\'), 123, 456)\n)\nENGINE = ReplicatedMergeTree(\'/test/02433/default/dep\', \'2\')\nORDER BY tuple()\nSETTINGS index_granularity = 8192
diff --git a/tests/queries/0_stateless/02433_default_expression_operator_in.sql b/tests/queries/0_stateless/02433_default_expression_operator_in.sql
new file mode 100644
index 00000000000..e009bc5cd36
--- /dev/null
+++ b/tests/queries/0_stateless/02433_default_expression_operator_in.sql
@@ -0,0 +1,37 @@
+DROP TABLE IF EXISTS dep;
+DROP TABLE IF EXISTS dep2;
+DROP TABLE IF EXISTS id_join;
+
+CREATE TABLE id_join (`country` String, `location` Array(Int32)) ENGINE = Join(ANY, LEFT, country);
+
+INSERT INTO id_join values ('CLICK', [1234]);
+
+CREATE TABLE dep
+(
+    `id` Int32,
+    `country` LowCardinality(String),
+    `purchase_location` UInt16 MATERIALIZED if(id IN joinGet(concat(currentDatabase(), '.id_join'), 'location', 'CLICK'), 123, 456)
+)
+ENGINE = ReplicatedMergeTree('/test/02433/{database}/dep', '1') ORDER BY tuple();
+
+SHOW CREATE TABLE dep;
+
+TRUNCATE TABLE id_join;
+
+CREATE TABLE dep2
+(
+    `id` Int32,
+    `country` LowCardinality(String),
+    `purchase_location` UInt16 MATERIALIZED if(id IN joinGet(concat(currentDatabase(), '.id_join'), 'location', 'CLICK'), 123, 456)
+)
+ENGINE = ReplicatedMergeTree('/test/02433/{database}/dep', '2') ORDER BY tuple();
+
+SHOW CREATE TABLE dep2;
+
+-- Ensure that a table name cannot be passed to IN as string literal
+create table test (n int, m default n in 'default.table_name') engine=Memory; -- { serverError TYPE_MISMATCH }
+create table test (n int, m default in(n, 'default.table_name')) engine=Memory; -- { serverError TYPE_MISMATCH }
+
+DROP TABLE dep;
+DROP TABLE dep2;
+DROP TABLE id_join;
diff --git a/tests/queries/0_stateless/02482_load_parts_refcounts.reference b/tests/queries/0_stateless/02482_load_parts_refcounts.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02482_load_parts_refcounts.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02482_load_parts_refcounts.sh b/tests/queries/0_stateless/02482_load_parts_refcounts.sh
new file mode 100755
index 00000000000..27549499a45
--- /dev/null
+++ b/tests/queries/0_stateless/02482_load_parts_refcounts.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# Tags: zookeeper
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+function query_with_retry
+{
+    retry=0
+    until [ $retry -ge 5 ]
+    do
+        result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1)
+        if [ "$?" == 0 ]; then
+            echo -n "$result"
+            return
+        else
+            retry=$(($retry + 1))
+            sleep 3
+        fi
+    done
+    echo "Query '$1' failed with '$result'"
+}
+
+$CLICKHOUSE_CLIENT -n --query "
+    DROP TABLE IF EXISTS load_parts_refcounts SYNC;
+
+    CREATE TABLE load_parts_refcounts (id UInt32)
+    ENGINE = ReplicatedMergeTree('/test/02482_load_parts_refcounts/{database}/{table}', '1')
+    ORDER BY id;
+
+    SYSTEM STOP MERGES load_parts_refcounts;
+
+    INSERT INTO load_parts_refcounts VALUES (1);
+    INSERT INTO load_parts_refcounts VALUES (2);
+    INSERT INTO load_parts_refcounts VALUES (3);
+
+    SYSTEM START MERGES load_parts_refcounts;
+"
+
+query_with_retry "OPTIMIZE TABLE load_parts_refcounts FINAL SETTINGS optimize_throw_if_noop = 1"
+
+$CLICKHOUSE_CLIENT --query "DETACH TABLE load_parts_refcounts"
+$CLICKHOUSE_CLIENT --query "ATTACH TABLE load_parts_refcounts"
+
+query_with_retry "
+    SELECT throwIf(count() == 0) FROM system.parts
+    WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'load_parts_refcounts' AND NOT active FORMAT Null"
+
+$CLICKHOUSE_CLIENT --query "
+    SELECT DISTINCT refcount FROM system.parts
+    WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'load_parts_refcounts' AND NOT active"
+
+$CLICKHOUSE_CLIENT --query "DROP TABLE load_parts_refcounts SYNC"
diff --git a/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference
new file mode 100644
index 00000000000..cf534567c6f
--- /dev/null
+++ b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference
@@ -0,0 +1 @@
+50	50	50	1	0
diff --git a/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql
new file mode 100644
index 00000000000..51522565014
--- /dev/null
+++ b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql
@@ -0,0 +1,4 @@
+SET allow_experimental_analyzer = 1;
+SET optimize_rewrite_sum_if_to_count_if = 1;
+
+SELECT sum(if((number % 2) = 0 AS cond_expr, 1 AS one_expr, 0 AS zero_expr) AS if_expr), sum(cond_expr), sum(if_expr), one_expr, zero_expr FROM numbers(100);
diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference
new file mode 100644
index 00000000000..4f9430ef608
--- /dev/null
+++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference
@@ -0,0 +1 @@
+4	2
diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql
new file mode 100644
index 00000000000..e3e508e17be
--- /dev/null
+++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql
@@ -0,0 +1,14 @@
+SET allow_experimental_analyzer = 1;
+SET optimize_arithmetic_operations_in_aggregate_functions = 1;
+
+DROP TABLE IF EXISTS test_table;
+CREATE TABLE test_table
+(
+    id UInt64,
+    value UInt64
+) ENGINE=MergeTree ORDER BY id;
+
+INSERT INTO test_table VALUES (1, 1);
+INSERT INTO test_table VALUES (1, 1);
+
+SELECT sum((2 * id) as func), func FROM test_table GROUP BY id;
diff --git a/tests/queries/0_stateless/02521_avro_union_null_nested.reference b/tests/queries/0_stateless/02521_avro_union_null_nested.reference
new file mode 100644
index 00000000000..e4818b4bcac
--- /dev/null
+++ b/tests/queries/0_stateless/02521_avro_union_null_nested.reference
@@ -0,0 +1,15 @@
+manifest_path	String					
+manifest_length	Int64					
+partition_spec_id	Int32					
+added_snapshot_id	Nullable(Int64)					
+added_data_files_count	Nullable(Int32)					
+existing_data_files_count	Nullable(Int32)					
+deleted_data_files_count	Nullable(Int32)					
+partitions	Array(Tuple(contains_null Bool, contains_nan Nullable(Bool), lower_bound Nullable(String), upper_bound Nullable(String)))					
+added_rows_count	Nullable(Int64)					
+existing_rows_count	Nullable(Int64)					
+deleted_rows_count	Nullable(Int64)					
+file:/warehouse/nyc.db/taxis/metadata/f9e891e9-fbd3-4411-a5c6-0cc14a2f1392-m0.avro	6488	0	1793608066486471262	8	0	0	[(false,false,'\0\0\0\0\0\0\0','\b\0\0\0\0\0\0\0')]	12	0	0
+file:/warehouse/nyc.db/taxis/metadata/a51dd31d-ea86-42dd-82d1-1981332a0f6d-m0.avro	6363	0	5735460159761889536	4	0	0	[(false,false,'\0\0\0\0\0\0\0','\b\0\0\0\0\0\0\0')]	4	0	0
+file:/warehouse/nyc.db/taxis/metadata/7ae325bd-fe20-4a55-917c-36cb8f6a488c-m0.avro	6370	0	7171740521400098346	4	0	0	[(false,false,'\0\0\0\0\0\0\0','\0\0\0\0\0\0\0')]	4	0	0
+file:/warehouse/nyc.db/taxis/metadata/5e3c62a9-1537-455f-98e5-0a067af5752a-m0.avro	6324	0	6850377589038341628	2	0	0	[(false,false,'\0\0\0\0\0\0\0','\0\0\0\0\0\0\0')]	4	0	0
diff --git a/tests/queries/0_stateless/02521_avro_union_null_nested.sh b/tests/queries/0_stateless/02521_avro_union_null_nested.sh
new file mode 100755
index 00000000000..d4d14c8ca8e
--- /dev/null
+++ b/tests/queries/0_stateless/02521_avro_union_null_nested.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+set -e
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+DATA_DIR=$CUR_DIR/data_avro
+
+$CLICKHOUSE_LOCAL -q "desc file('$DATA_DIR/union_null_nested.avro')"
+$CLICKHOUSE_LOCAL -q "select * from file('$DATA_DIR/union_null_nested.avro')"
diff --git a/tests/queries/0_stateless/02522_avro_complicate_schema.reference b/tests/queries/0_stateless/02522_avro_complicate_schema.reference
new file mode 100644
index 00000000000..55c0369020f
--- /dev/null
+++ b/tests/queries/0_stateless/02522_avro_complicate_schema.reference
@@ -0,0 +1,5 @@
+status	Int32					
+snapshot_id	Nullable(Int64)					
+data_file	Tuple(file_path String, file_format String, partition Tuple(vendor_id Nullable(Int64)), record_count Int64, file_size_in_bytes Int64, block_size_in_bytes Int64, column_sizes Array(Tuple(key Int32, value Int64)), value_counts Array(Tuple(key Int32, value Int64)), null_value_counts Array(Tuple(key Int32, value Int64)), nan_value_counts Array(Tuple(key Int32, value Int64)), lower_bounds Array(Tuple(key Int32, value String)), upper_bounds Array(Tuple(key Int32, value String)), key_metadata Nullable(String), split_offsets Array(Int64), sort_order_id Nullable(Int32))					
+1	6850377589038341628	('file:/warehouse/nyc.db/taxis/data/vendor_id=1/00000-0-c070e655-dc44-43d2-a01a-484f107210cb-00001.parquet','PARQUET',(1),2,1565,67108864,[(1,87),(2,51),(3,51),(4,57),(5,51)],[(1,2),(2,2),(3,2),(4,2),(5,2)],[(1,0),(2,0),(3,0),(4,0),(5,0)],[(3,0),(4,0)],[(1,'\0\0\0\0\0\0\0'),(2,'�C\0\0\0\0\0'),(3,'ff�?'),(4,'�p=\nף.@'),(5,'N')],[(1,'\0\0\0\0\0\0\0'),(2,'�C\0\0\0\0\0'),(3,'ffA'),(4,'q=\nףE@'),(5,'Y')],NULL,[4],0)
+1	6850377589038341628	('file:/warehouse/nyc.db/taxis/data/vendor_id=2/00000-0-c070e655-dc44-43d2-a01a-484f107210cb-00002.parquet','PARQUET',(2),2,1620,67108864,[(1,87),(2,51),(3,51),(4,57),(5,89)],[(1,2),(2,2),(3,2),(4,2),(5,2)],[(1,0),(2,0),(3,0),(4,0),(5,0)],[(3,0),(4,0)],[(1,'\0\0\0\0\0\0\0'),(2,'�C\0\0\0\0\0'),(3,'fff?'),(4,'��Q�"@'),(5,'N')],[(1,'\0\0\0\0\0\0\0'),(2,'�C\0\0\0\0\0'),(3,'\0\0 @'),(4,'fffff&6@'),(5,'N')],NULL,[4],0)
diff --git a/tests/queries/0_stateless/02522_avro_complicate_schema.sh b/tests/queries/0_stateless/02522_avro_complicate_schema.sh
new file mode 100755
index 00000000000..fa23c7e6f34
--- /dev/null
+++ b/tests/queries/0_stateless/02522_avro_complicate_schema.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+set -e
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+DATA_DIR=$CUR_DIR/data_avro
+
+$CLICKHOUSE_LOCAL -q "desc file('$DATA_DIR/complicated_schema.avro')"
+$CLICKHOUSE_LOCAL -q "select * from file('$DATA_DIR/complicated_schema.avro')"
diff --git a/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.reference b/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.reference
new file mode 100644
index 00000000000..d51dab695f2
--- /dev/null
+++ b/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.reference
@@ -0,0 +1,6 @@
+(1,2,NULL)
+(1,2,NULL)
+(NULL,NULL,NULL)
+1
+1
+1
diff --git a/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh b/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh
new file mode 100755
index 00000000000..97847b08203
--- /dev/null
+++ b/tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+echo '{"t" : {"a" : 1, "b" : 2}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table"
+
+echo '{"t" : { "a" : 1 , "b" : 2 } }' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table"
+
+echo '{"t" : {}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table"
+
+echo '{"t" : {"a" : 1, "b" : 2}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" --input_format_json_defaults_for_missing_elements_in_named_tuple=0 2>&1 | grep -F "INCORRECT_DATA" -c
+
+echo '{"t" : {"a" : 1, "d" : 2}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "NOT_FOUND_COLUMN_IN_BLOCK" -c
+
+echo '{"t" : {"a" : 1, "b" : 2, "c" : 3, "d" : 4}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "INCORRECT_DATA" -c
+
diff --git a/tests/queries/0_stateless/02532_profileevents_server_startup_time.reference b/tests/queries/0_stateless/02532_profileevents_server_startup_time.reference
new file mode 100644
index 00000000000..e3c0063769c
--- /dev/null
+++ b/tests/queries/0_stateless/02532_profileevents_server_startup_time.reference
@@ -0,0 +1 @@
+ServerStartupMilliseconds
diff --git a/tests/queries/0_stateless/02532_profileevents_server_startup_time.sql b/tests/queries/0_stateless/02532_profileevents_server_startup_time.sql
new file mode 100644
index 00000000000..d7c97d08c15
--- /dev/null
+++ b/tests/queries/0_stateless/02532_profileevents_server_startup_time.sql
@@ -0,0 +1 @@
+SELECT event FROM system.events WHERE event = 'ServerStartupMilliseconds'
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02533_generate_random_schema_inference.reference b/tests/queries/0_stateless/02533_generate_random_schema_inference.reference
new file mode 100644
index 00000000000..f599e28b8ab
--- /dev/null
+++ b/tests/queries/0_stateless/02533_generate_random_schema_inference.reference
@@ -0,0 +1 @@
+10
diff --git a/tests/queries/0_stateless/02533_generate_random_schema_inference.sql b/tests/queries/0_stateless/02533_generate_random_schema_inference.sql
new file mode 100644
index 00000000000..9ca43537439
--- /dev/null
+++ b/tests/queries/0_stateless/02533_generate_random_schema_inference.sql
@@ -0,0 +1,6 @@
+drop table if exists test;
+create table test (x UInt32, y String) engine=Memory;
+insert into test select * from generateRandom() limit 10;
+select count() from test;
+drop table test;
+
diff --git a/tests/queries/0_stateless/02534_s3_heap_use_after_free.reference b/tests/queries/0_stateless/02534_s3_heap_use_after_free.reference
new file mode 100644
index 00000000000..acd7c60768b
--- /dev/null
+++ b/tests/queries/0_stateless/02534_s3_heap_use_after_free.reference
@@ -0,0 +1,4 @@
+1	2	3
+4	5	6
+7	8	9
+0	0	0
diff --git a/tests/queries/0_stateless/02534_s3_heap_use_after_free.sql b/tests/queries/0_stateless/02534_s3_heap_use_after_free.sql
new file mode 100644
index 00000000000..b9f815e5a07
--- /dev/null
+++ b/tests/queries/0_stateless/02534_s3_heap_use_after_free.sql
@@ -0,0 +1,5 @@
+-- Tags: no-fasttest
+-- Tag no-fasttest: Depends on AWS
+
+select * from s3('http://localhost:11111/test/a.tsv', CustomSeparated);
+
diff --git a/tests/queries/0_stateless/data_avro/complicated_schema.avro b/tests/queries/0_stateless/data_avro/complicated_schema.avro
new file mode 100644
index 00000000000..a3385f4b23b
Binary files /dev/null and b/tests/queries/0_stateless/data_avro/complicated_schema.avro differ
diff --git a/tests/queries/0_stateless/data_avro/union_null_nested.avro b/tests/queries/0_stateless/data_avro/union_null_nested.avro
new file mode 100644
index 00000000000..c5246c3dd64
Binary files /dev/null and b/tests/queries/0_stateless/data_avro/union_null_nested.avro differ
diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh
index 8e465eb19e2..3006b74d3f9 100644
--- a/tests/queries/shell_config.sh
+++ b/tests/queries/shell_config.sh
@@ -23,7 +23,7 @@ export CLICKHOUSE_TEST_UNIQUE_NAME="${CLICKHOUSE_TEST_NAME}_${CLICKHOUSE_DATABAS
 [ -n "${CLICKHOUSE_DATABASE:-}" ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
 [ -n "${CLICKHOUSE_LOG_COMMENT:-}" ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment $(printf '%q' ${CLICKHOUSE_LOG_COMMENT}) "
 
-export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"}
+export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="$(command -v clickhouse)"}
 # client
 [ -x "$CLICKHOUSE_BINARY-client" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client}
 [ -x "$CLICKHOUSE_BINARY" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY client}
diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp
index be25d315d68..cd9cc51c45b 100644
--- a/utils/self-extracting-executable/decompressor.cpp
+++ b/utils/self-extracting-executable/decompressor.cpp
@@ -363,7 +363,7 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
 
 #if !defined(OS_DARWIN) && !defined(OS_FREEBSD)
 
-uint32_t getInode(const char * self)
+uint64_t getInode(const char * self)
 {
     std::ifstream maps("/proc/self/maps");
     if (maps.fail())
@@ -380,7 +380,7 @@ uint32_t getInode(const char * self)
     {
         std::stringstream ss(line); // STYLE_CHECK_ALLOW_STD_STRING_STREAM
         std::string addr, mode, offset, id, path;
-        uint32_t inode = 0;
+        uint64_t inode = 0;
         if (ss >> addr >> mode >> offset >> id >> inode >> path && path == self)
             return inode;
     }
@@ -415,7 +415,7 @@ int main(int/* argc*/, char* argv[])
 
 #if !defined(OS_DARWIN) && !defined(OS_FREEBSD)
     /// get inode of this executable
-    uint32_t inode = getInode(self);
+    uint64_t inode = getInode(self);
     if (inode == 0)
     {
         std::cerr << "Unable to obtain inode." << std::endl;
@@ -447,6 +447,11 @@ int main(int/* argc*/, char* argv[])
         return 1;
     }
 
+    /// inconsistency in WSL1 Ubuntu - inode reported in /proc/self/maps is a 64bit to
+    /// 32bit conversion of input_info.st_ino
+    if (input_info.st_ino & 0xFFFFFFFF00000000 && !(inode & 0xFFFFFFFF00000000))
+        input_info.st_ino &= 0x00000000FFFFFFFF;
+
     /// if decompression was performed by another process since this copy was started
     /// then file referred by path "self" is already pointing to different inode
     if (input_info.st_ino != inode)