diff --git a/.github/codecov.yml b/.github/codecov.yml
index 9b75efc791d..f185c5e2dcc 100644
--- a/.github/codecov.yml
+++ b/.github/codecov.yml
@@ -1,5 +1,5 @@
 codecov:
-  max_report_age: off
+  max_report_age: "off"
   strict_yaml_branch: "master"
 
 ignore:
@@ -14,4 +14,4 @@ ignore:
 comment: false
 
 github_checks:
-  annotations: false
\ No newline at end of file
+  annotations: false
diff --git a/.github/workflows/anchore-analysis.yml b/.github/workflows/anchore-analysis.yml
index 50eaf45e2ef..1005c8f6c38 100644
--- a/.github/workflows/anchore-analysis.yml
+++ b/.github/workflows/anchore-analysis.yml
@@ -8,9 +8,9 @@
 
 name: Docker Container Scan (clickhouse-server)
 
-on:
+"on":
   pull_request:
-    paths: 
+    paths:
       - docker/server/Dockerfile
       - .github/workflows/anchore-analysis.yml
   schedule:
@@ -20,20 +20,20 @@ jobs:
   Anchore-Build-Scan:
     runs-on: ubuntu-latest
     steps:
-    - name: Checkout the code
-      uses: actions/checkout@v2
-    - name: Build the Docker image
-      run: |
-        cd docker/server
-        perl -pi -e 's|=\$version||g' Dockerfile
-        docker build . --file Dockerfile --tag localbuild/testimage:latest      
-    - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
-      uses: anchore/scan-action@v2
-      id: scan
-      with:
-        image: "localbuild/testimage:latest"
-        acs-report-enable: true
-    - name: Upload Anchore Scan Report
-      uses: github/codeql-action/upload-sarif@v1
-      with:
-        sarif_file: ${{ steps.scan.outputs.sarif }}
+      - name: Checkout the code
+        uses: actions/checkout@v2
+      - name: Build the Docker image
+        run: |
+          cd docker/server
+          perl -pi -e 's|=\$version||g' Dockerfile
+          docker build . --file Dockerfile --tag localbuild/testimage:latest
+      - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
+        uses: anchore/scan-action@v2
+        id: scan
+        with:
+          image: "localbuild/testimage:latest"
+          acs-report-enable: true
+      - name: Upload Anchore Scan Report
+        uses: github/codeql-action/upload-sarif@v1
+        with:
+          sarif_file: ${{ steps.scan.outputs.sarif }}
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
deleted file mode 100644
index 633dd47a2d5..00000000000
--- a/.github/workflows/codeql-analysis.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-# See the example here: https://github.com/github/codeql-action
-
-name: "CodeQL Scanning"
-
-on:
-  schedule:
-    - cron: '0 19 * * *'
-jobs:
-  CodeQL-Build:
-
-    runs-on: self-hosted
-    timeout-minutes: 1440
-
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v2
-      with:
-        fetch-depth: 2
-        submodules: 'recursive'
-
-    - name: Initialize CodeQL
-      uses: github/codeql-action/init@v1
-
-      with:
-        languages: cpp
-
-    - run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-10 g++-10 && mkdir build
-    - run: cd build && CC=gcc-10 CXX=g++-10 cmake ..
-    - run: cd build && ninja
-
-    - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v1
diff --git a/.gitignore b/.gitignore
index 1e9765dca9e..d33dbf0600d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -137,3 +137,9 @@ website/package-lock.json
 /prof
 
 *.iml
+
+# data store
+/programs/server/data
+/programs/server/metadata
+/programs/server/store
+
diff --git a/.gitmodules b/.gitmodules
index ecccf0633e2..7a2c5600e65 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -184,7 +184,7 @@
 	url = https://github.com/ClickHouse-Extras/krb5
 [submodule "contrib/cyrus-sasl"]
 	path = contrib/cyrus-sasl
-	url = https://github.com/cyrusimap/cyrus-sasl
+	url = https://github.com/ClickHouse-Extras/cyrus-sasl
 	branch = cyrus-sasl-2.1
 [submodule "contrib/croaring"]
 	path = contrib/croaring
@@ -220,4 +220,4 @@
 	url = https://github.com/ClickHouse-Extras/boringssl.git
 [submodule "contrib/NuRaft"]
 	path = contrib/NuRaft
-	url = https://github.com/eBay/NuRaft.git
+	url = https://github.com/ClickHouse-Extras/NuRaft.git
diff --git a/.potato.yml b/.potato.yml
index 113bdacbdde..7cb87c58bd1 100644
--- a/.potato.yml
+++ b/.potato.yml
@@ -14,14 +14,14 @@ handlers:
         # The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker.
         github:pullRequest:labeled:
           data:
-              # The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
-          	queue: CLICKHOUSEDOCS
-              # The issue title.
-          	summary: '[Potato] Pull Request #{{pullRequest.number}}'
-              # The issue description.
-          	description: >
+          # The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
+          queue: CLICKHOUSEDOCS
+          # The issue title.
+          summary: '[Potato] Pull Request #{{pullRequest.number}}'
+          # The issue description.
+          description: >
                 {{pullRequest.description}}
 
                 Ссылка на Pull Request: {{pullRequest.webUrl}}
-            # The condition for creating the Yandex.Tracker issue.
-        	condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length
+          # The condition for creating the Yandex.Tracker issue.
+          condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length
diff --git a/.yamllint b/.yamllint
new file mode 100644
index 00000000000..fe161e71849
--- /dev/null
+++ b/.yamllint
@@ -0,0 +1,15 @@
+# vi: ft=yaml
+extends: default
+
+rules:
+    indentation:
+        level: warning
+        indent-sequences: consistent
+    line-length:
+        # there are some bash -c "", so this is OK
+        max: 300
+        level: warning
+    comments:
+        min-spaces-from-content: 1
+    document-start:
+        present: false
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b328dcf5c88..e2c777b3bcf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,180 @@
+## ClickHouse release 21.2
+
+### ClickHouse release v21.2.2.8-stable, 2021-02-07
+
+#### Backward Incompatible Change
+
+* Bitwise functions (`bitAnd`, `bitOr`, etc) are forbidden for floating point arguments. Now you have to do explicit cast to integer. [#19853](https://github.com/ClickHouse/ClickHouse/pull/19853) ([Azat Khuzhin](https://github.com/azat)).
+* Forbid `lcm`/`gcd` for floats. [#19532](https://github.com/ClickHouse/ClickHouse/pull/19532) ([Azat Khuzhin](https://github.com/azat)).
+* Fix memory tracking for `OPTIMIZE TABLE`/merges; account query memory limits and sampling for `OPTIMIZE TABLE`/merges. [#18772](https://github.com/ClickHouse/ClickHouse/pull/18772) ([Azat Khuzhin](https://github.com/azat)).
+* Disallow floating point column as partition key, see [#18421](https://github.com/ClickHouse/ClickHouse/issues/18421#event-4147046255). [#18464](https://github.com/ClickHouse/ClickHouse/pull/18464) ([hexiaoting](https://github.com/hexiaoting)).
+* Excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`.
+
+#### New Feature
+
+* Added `PostgreSQL` table engine (both select/insert, with support for multidimensional arrays), also as table function. Added `PostgreSQL` dictionary source. Added `PostgreSQL` database engine. [#18554](https://github.com/ClickHouse/ClickHouse/pull/18554) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Data type `Nested` now supports arbitrary levels of nesting. Introduced subcolumns of complex types, such as `size0` in `Array`, `null` in `Nullable`, names of `Tuple` elements, which can be read without reading of whole column. [#17310](https://github.com/ClickHouse/ClickHouse/pull/17310) ([Anton Popov](https://github.com/CurtizJ)).
+* Added `Nullable` support for `FlatDictionary`, `HashedDictionary`, `ComplexKeyHashedDictionary`, `DirectDictionary`, `ComplexKeyDirectDictionary`, `RangeHashedDictionary`. [#18236](https://github.com/ClickHouse/ClickHouse/pull/18236) ([Maksim Kita](https://github.com/kitaisreal)).
+* Adds a new table called `system.distributed_ddl_queue` that displays the queries in the DDL worker queue. [#17656](https://github.com/ClickHouse/ClickHouse/pull/17656) ([Bharat Nallan](https://github.com/bharatnc)).
+* Added support of mapping LDAP group names, and attribute values in general, to local roles for users from ldap user directories. [#17211](https://github.com/ClickHouse/ClickHouse/pull/17211) ([Denis Glazachev](https://github.com/traceon)).
+* Support insert into table function `cluster`, and for both table functions `remote` and `cluster`, support distributing data across nodes by specify sharding key. Close [#16752](https://github.com/ClickHouse/ClickHouse/issues/16752). [#18264](https://github.com/ClickHouse/ClickHouse/pull/18264) ([flynn](https://github.com/ucasFL)).
+* Add function `decodeXMLComponent` to decode characters for XML. Example: `SELECT decodeXMLComponent('Hello,&quot;world&quot;!')` [#17659](https://github.com/ClickHouse/ClickHouse/issues/17659). [#18542](https://github.com/ClickHouse/ClickHouse/pull/18542) ([nauta](https://github.com/nautaa)).
+* Added functions `parseDateTimeBestEffortUSOrZero`, `parseDateTimeBestEffortUSOrNull`. [#19712](https://github.com/ClickHouse/ClickHouse/pull/19712) ([Maksim Kita](https://github.com/kitaisreal)).
+* Add `sign` math function. [#19527](https://github.com/ClickHouse/ClickHouse/pull/19527) ([flynn](https://github.com/ucasFL)).
+* Add information about used features (functions, table engines, etc) into system.query_log. [#18495](https://github.com/ClickHouse/ClickHouse/issues/18495). [#19371](https://github.com/ClickHouse/ClickHouse/pull/19371) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Function `formatDateTime` support the `%Q` modification to format date to quarter. [#19224](https://github.com/ClickHouse/ClickHouse/pull/19224) ([Jianmei Zhang](https://github.com/zhangjmruc)).
+* Support MetaKey+Enter hotkey binding in play UI. [#19012](https://github.com/ClickHouse/ClickHouse/pull/19012) ([sundyli](https://github.com/sundy-li)).
+* Add three functions for map data type: 1. `mapContains(map, key)` to check weather map.keys include the second parameter key. 2. `mapKeys(map)` return all the keys in Array format 3. `mapValues(map)` return all the values in Array format. [#18788](https://github.com/ClickHouse/ClickHouse/pull/18788) ([hexiaoting](https://github.com/hexiaoting)).
+* Add `log_comment` setting related to [#18494](https://github.com/ClickHouse/ClickHouse/issues/18494). [#18549](https://github.com/ClickHouse/ClickHouse/pull/18549) ([Zijie Lu](https://github.com/TszKitLo40)).
+* Add support of tuple argument to `argMin` and `argMax` functions. [#17359](https://github.com/ClickHouse/ClickHouse/pull/17359) ([Ildus Kurbangaliev](https://github.com/ildus)).
+* Support `EXISTS VIEW` syntax. [#18552](https://github.com/ClickHouse/ClickHouse/pull/18552) ([Du Chuan](https://github.com/spongedu)).
+* Add `SELECT ALL` syntax. closes [#18706](https://github.com/ClickHouse/ClickHouse/issues/18706). [#18723](https://github.com/ClickHouse/ClickHouse/pull/18723) ([flynn](https://github.com/ucasFL)).
+
+#### Performance Improvement
+
+* Faster parts removal by lowering the number of `stat` syscalls. This returns the optimization that existed while ago. More safe interface of `IDisk`. This closes [#19065](https://github.com/ClickHouse/ClickHouse/issues/19065). [#19086](https://github.com/ClickHouse/ClickHouse/pull/19086) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Aliases declared in `WITH` statement are properly used in index analysis. Queries like `WITH column AS alias SELECT ... WHERE alias = ...` may use index now. [#18896](https://github.com/ClickHouse/ClickHouse/pull/18896) ([Amos Bird](https://github.com/amosbird)).
+* Add `optimize_alias_column_prediction` (on by default), that will: - Respect aliased columns in WHERE during partition pruning and skipping data using secondary indexes; - Respect aliased columns in WHERE for trivial count queries for optimize_trivial_count; - Respect aliased columns in GROUP BY/ORDER BY for optimize_aggregation_in_order/optimize_read_in_order. [#16995](https://github.com/ClickHouse/ClickHouse/pull/16995) ([sundyli](https://github.com/sundy-li)).
+* Speed up aggregate function `sum`. Improvement only visible on synthetic benchmarks and not very practical. [#19216](https://github.com/ClickHouse/ClickHouse/pull/19216) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Update libc++ and use another ABI to provide better performance. [#18914](https://github.com/ClickHouse/ClickHouse/pull/18914) ([Danila Kutenin](https://github.com/danlark1)).
+* Rewrite `sumIf()` and `sum(if())` function to `countIf()` function when logically equivalent. [#17041](https://github.com/ClickHouse/ClickHouse/pull/17041) ([flynn](https://github.com/ucasFL)).
+* Use a connection pool for S3 connections, controlled by the `s3_max_connections` settings. [#13405](https://github.com/ClickHouse/ClickHouse/pull/13405) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Add support for zstd long option for better compression of string columns to save space. [#17184](https://github.com/ClickHouse/ClickHouse/pull/17184) ([ygrek](https://github.com/ygrek)).
+* Slightly improve server latency by removing access to configuration on every connection. [#19863](https://github.com/ClickHouse/ClickHouse/pull/19863) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Reduce lock contention for multiple layers of the `Buffer` engine. [#19379](https://github.com/ClickHouse/ClickHouse/pull/19379) ([Azat Khuzhin](https://github.com/azat)).
+* Support splitting `Filter` step of query plan into `Expression + Filter` pair. Together with `Expression + Expression` merging optimization ([#17458](https://github.com/ClickHouse/ClickHouse/issues/17458)) it may delay execution for some expressions after `Filter` step. [#19253](https://github.com/ClickHouse/ClickHouse/pull/19253) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
+#### Improvement
+
+* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Set charset to `utf8mb4` when interacting with remote MySQL servers. Fixes [#19795](https://github.com/ClickHouse/ClickHouse/issues/19795). [#19800](https://github.com/ClickHouse/ClickHouse/pull/19800) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* `S3` table function now supports `auto` compression mode (autodetect). This closes [#18754](https://github.com/ClickHouse/ClickHouse/issues/18754). [#19793](https://github.com/ClickHouse/ClickHouse/pull/19793) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Correctly output infinite arguments for `formatReadableTimeDelta` function. In previous versions, there was implicit conversion to implementation specific integer value. [#19791](https://github.com/ClickHouse/ClickHouse/pull/19791) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Table function `S3` will use global region if the region can't be determined exactly. This closes [#10998](https://github.com/ClickHouse/ClickHouse/issues/10998). [#19750](https://github.com/ClickHouse/ClickHouse/pull/19750) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* In distributed queries if the setting `async_socket_for_remote` is enabled, it was possible to get stack overflow at least in debug build configuration if very deeply nested data type is used in table (e.g. `Array(Array(Array(...more...)))`). This fixes [#19108](https://github.com/ClickHouse/ClickHouse/issues/19108). This change introduces minor backward incompatibility: excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. [#19736](https://github.com/ClickHouse/ClickHouse/pull/19736) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Add separate pool for message brokers (RabbitMQ and Kafka). [#19722](https://github.com/ClickHouse/ClickHouse/pull/19722) ([Azat Khuzhin](https://github.com/azat)).
+* Fix rare `max_number_of_merges_with_ttl_in_pool` limit overrun (more merges with TTL can be assigned) for non-replicated MergeTree. [#19708](https://github.com/ClickHouse/ClickHouse/pull/19708) ([alesapin](https://github.com/alesapin)).
+* Dictionary: better error message during attribute parsing. [#19678](https://github.com/ClickHouse/ClickHouse/pull/19678) ([Maksim Kita](https://github.com/kitaisreal)).
+* Add an option to disable validation of checksums on reading. Should never be used in production. Please do not expect any benefits in disabling it. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network. In my observations there is no performance difference or it is less than 0.5%. [#19588](https://github.com/ClickHouse/ClickHouse/pull/19588) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Support constant result in function `multiIf`. [#19533](https://github.com/ClickHouse/ClickHouse/pull/19533) ([Maksim Kita](https://github.com/kitaisreal)).
+* Enable function length/empty/notEmpty for datatype Map, which returns keys number in Map. [#19530](https://github.com/ClickHouse/ClickHouse/pull/19530) ([taiyang-li](https://github.com/taiyang-li)).
+* Add `--reconnect` option to `clickhouse-benchmark`. When this option is specified, it will reconnect before every request. This is needed for testing. [#19872](https://github.com/ClickHouse/ClickHouse/pull/19872) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Support using the new location of `.debug` file. This fixes [#19348](https://github.com/ClickHouse/ClickHouse/issues/19348). [#19520](https://github.com/ClickHouse/ClickHouse/pull/19520) ([Amos Bird](https://github.com/amosbird)).
+* `toIPv6` function parses `IPv4` addresses. [#19518](https://github.com/ClickHouse/ClickHouse/pull/19518) ([Bharat Nallan](https://github.com/bharatnc)).
+* Add `http_referer` field to `system.query_log`, `system.processes`, etc. This closes [#19389](https://github.com/ClickHouse/ClickHouse/issues/19389). [#19390](https://github.com/ClickHouse/ClickHouse/pull/19390) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Improve MySQL compatibility by making more functions case insensitive and adding aliases. [#19387](https://github.com/ClickHouse/ClickHouse/pull/19387) ([Daniil Kondratyev](https://github.com/dankondr)).
+* Add metrics for MergeTree parts (Wide/Compact/InMemory) types. [#19381](https://github.com/ClickHouse/ClickHouse/pull/19381) ([Azat Khuzhin](https://github.com/azat)).
+* Allow docker to be executed with arbitrary uid. [#19374](https://github.com/ClickHouse/ClickHouse/pull/19374) ([filimonov](https://github.com/filimonov)).
+* Fix wrong alignment of values of `IPv4` data type in Pretty formats. They were aligned to the right, not to the left. This closes [#19184](https://github.com/ClickHouse/ClickHouse/issues/19184). [#19339](https://github.com/ClickHouse/ClickHouse/pull/19339) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Allow change `max_server_memory_usage` without restart. This closes [#18154](https://github.com/ClickHouse/ClickHouse/issues/18154). [#19186](https://github.com/ClickHouse/ClickHouse/pull/19186) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* The exception when function `bar` is called with certain NaN argument may be slightly misleading in previous versions. This fixes [#19088](https://github.com/ClickHouse/ClickHouse/issues/19088). [#19107](https://github.com/ClickHouse/ClickHouse/pull/19107) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)).
+* Fixed `PeekableReadBuffer: Memory limit exceed` error when inserting data with huge strings. Fixes [#18690](https://github.com/ClickHouse/ClickHouse/issues/18690). [#18979](https://github.com/ClickHouse/ClickHouse/pull/18979) ([tavplubix](https://github.com/tavplubix)).
+* Docker image: several improvements for clickhouse-server entrypoint. [#18954](https://github.com/ClickHouse/ClickHouse/pull/18954) ([filimonov](https://github.com/filimonov)).
+* Add `normalizeQueryKeepNames` and `normalizedQueryHashKeepNames` to normalize queries without masking long names with `?`. This helps better analyze complex query logs. [#18910](https://github.com/ClickHouse/ClickHouse/pull/18910) ([Amos Bird](https://github.com/amosbird)).
+* Check per-block checksum of the distributed batch on the sender before sending (without reading the file twice, the checksums will be verified while reading), this will avoid stuck of the INSERT on the receiver (on truncated .bin file on the sender). Avoid reading .bin files twice for batched INSERT (it was required to calculate rows/bytes to take squashing into account, now this information included into the header, backward compatible is preserved). [#18853](https://github.com/ClickHouse/ClickHouse/pull/18853) ([Azat Khuzhin](https://github.com/azat)).
+* Fix issues with RIGHT and FULL JOIN of tables with aggregate function states. In previous versions exception about `cloneResized` method was thrown. [#18818](https://github.com/ClickHouse/ClickHouse/pull/18818) ([templarzq](https://github.com/templarzq)).
+* Added prefix-based S3 endpoint settings. [#18812](https://github.com/ClickHouse/ClickHouse/pull/18812) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Add [UInt8, UInt16, UInt32, UInt64] arguments types support for bitmapTransform, bitmapSubsetInRange, bitmapSubsetLimit, bitmapContains functions. This closes [#18713](https://github.com/ClickHouse/ClickHouse/issues/18713). [#18791](https://github.com/ClickHouse/ClickHouse/pull/18791) ([sundyli](https://github.com/sundy-li)).
+* Allow CTE (Common Table Expressions) to be further aliased. Propagate CSE (Common Subexpressions Elimination) to subqueries in the same level when `enable_global_with_statement = 1`. This fixes [#17378](https://github.com/ClickHouse/ClickHouse/issues/17378) . This fixes https://github.com/ClickHouse/ClickHouse/pull/16575#issuecomment-753416235 . [#18684](https://github.com/ClickHouse/ClickHouse/pull/18684) ([Amos Bird](https://github.com/amosbird)).
+* Update librdkafka to v1.6.0-RC2. Fixes [#18668](https://github.com/ClickHouse/ClickHouse/issues/18668). [#18671](https://github.com/ClickHouse/ClickHouse/pull/18671) ([filimonov](https://github.com/filimonov)).
+* In case of unexpected exceptions automatically restart background thread which is responsible for execution of distributed DDL queries. Fixes [#17991](https://github.com/ClickHouse/ClickHouse/issues/17991). [#18285](https://github.com/ClickHouse/ClickHouse/pull/18285) ([徐炘](https://github.com/weeds085490)).
+* Updated AWS C++ SDK in order to utilize global regions in S3. [#17870](https://github.com/ClickHouse/ClickHouse/pull/17870) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Added support for `WITH ... [AND] [PERIODIC] REFRESH [interval_in_sec]` clause when creating `LIVE VIEW` tables. [#14822](https://github.com/ClickHouse/ClickHouse/pull/14822) ([vzakaznikov](https://github.com/vzakaznikov)).
+* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)).
+
+#### Bug Fix
+
+* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)).
+* Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)).
+* `MaterializeMySQL` (experimental feature): Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)).
+* Prevent "Connection refused" in docker during initialization script execution. [#20012](https://github.com/ClickHouse/ClickHouse/pull/20012) ([filimonov](https://github.com/filimonov)).
+* `EmbeddedRocksDB` is an experimental storage. Fix the issue with lack of proper type checking. Simplified code. This closes [#19967](https://github.com/ClickHouse/ClickHouse/issues/19967). [#19972](https://github.com/ClickHouse/ClickHouse/pull/19972) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)).
+* The function `greatCircleAngle` returned inaccurate results in previous versions. This closes [#19769](https://github.com/ClickHouse/ClickHouse/issues/19769). [#19789](https://github.com/ClickHouse/ClickHouse/pull/19789) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix rare bug when some replicated operations (like mutation) cannot process some parts after data corruption. Fixes [#19593](https://github.com/ClickHouse/ClickHouse/issues/19593). [#19702](https://github.com/ClickHouse/ClickHouse/pull/19702) ([alesapin](https://github.com/alesapin)).
+* Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)).
+* Fix wrong deserialization of columns description. It makes INSERT into a table with a column named `\` impossible. [#19479](https://github.com/ClickHouse/ClickHouse/pull/19479) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)).
+* Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([tavplubix](https://github.com/tavplubix)).
+* Fix possible error `Extremes transform was already added to pipeline`. Fixes [#14100](https://github.com/ClickHouse/ClickHouse/issues/14100). [#19430](https://github.com/ClickHouse/ClickHouse/pull/19430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([vdimir](https://github.com/vdimir)).
+* Do not mark file for distributed send as broken on EOF. [#19290](https://github.com/ClickHouse/ClickHouse/pull/19290) ([Azat Khuzhin](https://github.com/azat)).
+* Fix leaking of pipe fd for `async_socket_for_remote`. [#19153](https://github.com/ClickHouse/ClickHouse/pull/19153) ([Azat Khuzhin](https://github.com/azat)).
+* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix issue in merge tree data writer which can lead to marks with bigger size than fixed granularity size. Fixes [#18913](https://github.com/ClickHouse/ClickHouse/issues/18913). [#19123](https://github.com/ClickHouse/ClickHouse/pull/19123) ([alesapin](https://github.com/alesapin)).
+* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)).
+* Simplify the implementation of `tupleHammingDistance`. Support for tuples of any equal length. Fixes [#19029](https://github.com/ClickHouse/ClickHouse/issues/19029). [#19084](https://github.com/ClickHouse/ClickHouse/pull/19084) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix minor issue in JOIN: Join tries to materialize const columns, but our code waits for them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix possible exception `QueryPipeline stream: different number of columns` caused by merging of query plan's `Expression` steps. Fixes [#18190](https://github.com/ClickHouse/ClickHouse/issues/18190). [#18980](https://github.com/ClickHouse/ClickHouse/pull/18980) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)).
+* Fixed rare crashes when server run out of memory. [#18976](https://github.com/ClickHouse/ClickHouse/pull/18976) ([tavplubix](https://github.com/tavplubix)).
+* Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)).
+* Fixed issue [#18894](https://github.com/ClickHouse/ClickHouse/issues/18894) Add a check to avoid exception when long column alias('table.column' style, usually auto-generated by BI tools like Looker) equals to long table name. [#18968](https://github.com/ClickHouse/ClickHouse/pull/18968) ([Daniel Qin](https://github.com/mathfool)).
+* Fix error `Task was not found in task queue` (possible only for remote queries, with `async_socket_for_remote = 1`). [#18964](https://github.com/ClickHouse/ClickHouse/pull/18964) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)).
+* ATTACH PARTITION will reset mutations. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)).
+* Fix issue with `bitmapOrCardinality` that may lead to nullptr dereference. This closes [#18911](https://github.com/ClickHouse/ClickHouse/issues/18911). [#18912](https://github.com/ClickHouse/ClickHouse/pull/18912) ([sundyli](https://github.com/sundy-li)).
+* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)).
+* Fix data type convert issue for MySQL engine. [#18124](https://github.com/ClickHouse/ClickHouse/pull/18124) ([bo zeng](https://github.com/mis98zb)).
+* Fix clickhouse-client abort exception while executing only `select`. [#19790](https://github.com/ClickHouse/ClickHouse/pull/19790) ([taiyang-li](https://github.com/taiyang-li)).
+
+
+#### Build/Testing/Packaging Improvement
+
+* Run [SQLancer](https://twitter.com/RiggerManuel/status/1352345625480884228) (logical SQL fuzzer) in CI. [#19006](https://github.com/ClickHouse/ClickHouse/pull/19006) ([Ilya Yatsishin](https://github.com/qoega)).
+* Query Fuzzer will fuzz newly added tests more extensively. This closes [#18916](https://github.com/ClickHouse/ClickHouse/issues/18916). [#19185](https://github.com/ClickHouse/ClickHouse/pull/19185) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Integrate with [Big List of Naughty Strings](https://github.com/minimaxir/big-list-of-naughty-strings/) for better fuzzing. [#19480](https://github.com/ClickHouse/ClickHouse/pull/19480) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Add integration tests run with MSan. [#18974](https://github.com/ClickHouse/ClickHouse/pull/18974) ([alesapin](https://github.com/alesapin)).
+* Fixed MemorySanitizer errors in cyrus-sasl and musl. [#19821](https://github.com/ClickHouse/ClickHouse/pull/19821) ([Ilya Yatsishin](https://github.com/qoega)).
+* Insuffiient arguments check in `positionCaseInsensitiveUTF8` function triggered address sanitizer. [#19720](https://github.com/ClickHouse/ClickHouse/pull/19720) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Remove --project-directory for docker-compose in integration test. Fix logs formatting from docker container. [#19706](https://github.com/ClickHouse/ClickHouse/pull/19706) ([Ilya Yatsishin](https://github.com/qoega)).
+* Made generation of macros.xml easier for integration tests. No more excessive logging from dicttoxml. dicttoxml project is not active for 5+ years. [#19697](https://github.com/ClickHouse/ClickHouse/pull/19697) ([Ilya Yatsishin](https://github.com/qoega)).
+* Allow to explicitly enable or disable watchdog via environment variable `CLICKHOUSE_WATCHDOG_ENABLE`. By default it is enabled if server is not attached to terminal. [#19522](https://github.com/ClickHouse/ClickHouse/pull/19522) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Allow building ClickHouse with Kafka support on arm64. [#19369](https://github.com/ClickHouse/ClickHouse/pull/19369) ([filimonov](https://github.com/filimonov)).
+* Allow building librdkafka without ssl. [#19337](https://github.com/ClickHouse/ClickHouse/pull/19337) ([filimonov](https://github.com/filimonov)).
+* Restore Kafka input in FreeBSD builds. [#18924](https://github.com/ClickHouse/ClickHouse/pull/18924) ([Alexandre Snarskii](https://github.com/snar)).
+* Fix potential nullptr dereference in table function `VALUES`. [#19357](https://github.com/ClickHouse/ClickHouse/pull/19357) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Avoid UBSan reports in `arrayElement` function, `substring` and `arraySum`. Fixes [#19305](https://github.com/ClickHouse/ClickHouse/issues/19305). Fixes [#19287](https://github.com/ClickHouse/ClickHouse/issues/19287). This closes [#19336](https://github.com/ClickHouse/ClickHouse/issues/19336). [#19347](https://github.com/ClickHouse/ClickHouse/pull/19347) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+
+
 ## ClickHouse release 21.1
 
+### ClickHouse release v21.1.3.32-stable, 2021-02-03
+
+#### Bug Fix
+
+* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix crash when pushing down predicates to union distinct subquery. This fixes [#19855](https://github.com/ClickHouse/ClickHouse/issues/19855). [#19861](https://github.com/ClickHouse/ClickHouse/pull/19861) ([Amos Bird](https://github.com/amosbird)).
+* Fix filtering by UInt8 greater than 127. [#19799](https://github.com/ClickHouse/ClickHouse/pull/19799) ([Anton Popov](https://github.com/CurtizJ)).
+* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([tavplubix](https://github.com/tavplubix)).
+* Fix crash when nested column name was used in `WHERE` or `PREWHERE`. Fixes [#19755](https://github.com/ClickHouse/ClickHouse/issues/19755). [#19763](https://github.com/ClickHouse/ClickHouse/pull/19763) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)).
+* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)).
+* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([tavplubix](https://github.com/tavplubix)).
+* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)).
+* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Uninitialized memory read was possible in encrypt/decrypt functions if empty string was passed as IV. This closes [#19391](https://github.com/ClickHouse/ClickHouse/issues/19391). [#19397](https://github.com/ClickHouse/ClickHouse/pull/19397) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)).
+* Fixed possible wrong result or segfault on aggregation when Materialized View and its target table have different structure. Fixes [#18063](https://github.com/ClickHouse/ClickHouse/issues/18063). [#19322](https://github.com/ClickHouse/ClickHouse/pull/19322) ([tavplubix](https://github.com/tavplubix)).
+* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)).
+* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([tavplubix](https://github.com/tavplubix)).
+* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)).
+* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)).
+
+
+
 ### ClickHouse release v21.1.2.15-stable 2021-01-18
 
 #### Backward Incompatible Change
diff --git a/README.md b/README.md
index 8e114d5abe9..3329a98877f 100644
--- a/README.md
+++ b/README.md
@@ -8,12 +8,8 @@ ClickHouse® is an open-source column-oriented database management system that a
 * [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster.
 * [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information.
 * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
-* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-d2zxkf9e-XyxDa_ucfPxzuH4SJIm~Ng) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
+* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
 * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
 * [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
-* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
 * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
 * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
-
-## Upcoming Events
-* [Chinese ClickHouse Meetup (online)](http://hdxu.cn/8KxZE) on 6 February 2021.
diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h
index b5d4be950b5..064787fb64e 100644
--- a/base/common/DateLUTImpl.h
+++ b/base/common/DateLUTImpl.h
@@ -7,6 +7,7 @@
 #include <ctime>
 #include <string>
 
+
 #define DATE_LUT_MAX (0xFFFFFFFFU - 86400)
 #define DATE_LUT_MAX_DAY_NUM (0xFFFFFFFFU / 86400)
 /// Table size is bigger than DATE_LUT_MAX_DAY_NUM to fill all indices within UInt16 range: this allows to remove extra check.
@@ -249,7 +250,7 @@ public:
     {
         DayNum index = findIndex(t);
 
-        if (unlikely(index == 0))
+        if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM))
             return t + offset_at_start_of_epoch;
 
         time_t res = t - lut[index].date;
@@ -264,18 +265,43 @@ public:
     {
         DayNum index = findIndex(t);
 
-        /// If it is not 1970 year (findIndex found nothing appropriate),
-        ///  than limit number of hours to avoid insane results like 1970-01-01 89:28:15
-        if (unlikely(index == 0))
+        /// If it is overflow case,
+        ///  then limit number of hours to avoid insane results like 1970-01-01 89:28:15
+        if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM))
             return static_cast<unsigned>((t + offset_at_start_of_epoch) / 3600) % 24;
 
-        time_t res = t - lut[index].date;
+        time_t time = t - lut[index].date;
 
-        /// Data is cleaned to avoid possibility of underflow.
-        if (res >= lut[index].time_at_offset_change)
+        if (time >= lut[index].time_at_offset_change)
+            time += lut[index].amount_of_offset_change;
+
+        unsigned res = time / 3600;
+        return res <= 23 ? res : 0;
+    }
+
+    /** Calculating offset from UTC in seconds.
+     * which means Using the same literal time of "t" to get the corresponding timestamp in UTC,
+     * then subtract the former from the latter to get the offset result.
+     * The boundaries when meets DST(daylight saving time) change should be handled very carefully.
+     */
+    inline time_t timezoneOffset(time_t t) const
+    {
+        DayNum index = findIndex(t);
+
+        /// Calculate daylight saving offset first.
+        /// Because the "amount_of_offset_change" in LUT entry only exists in the change day, it's costly to scan it from the very begin.
+        /// but we can figure out all the accumulated offsets from 1970-01-01 to that day just by get the whole difference between lut[].date,
+        /// and then, we can directly subtract multiple 86400s to get the real DST offsets for the leap seconds is not considered now.
+        time_t res = (lut[index].date - lut[0].date) % 86400;
+        /// As so far to know, the maximal DST offset couldn't be more than 2 hours, so after the modulo operation the remainder
+        /// will sits between [-offset --> 0 --> offset] which respectively corresponds to moving clock forward or backward.
+        res = res > 43200 ? (86400 - res) : (0 - res);
+
+        /// Check if has a offset change during this day. Add the change when cross the line
+        if (lut[index].amount_of_offset_change != 0 && t >= lut[index].date + lut[index].time_at_offset_change)
             res += lut[index].amount_of_offset_change;
 
-        return res / 3600;
+        return res + offset_at_start_of_epoch;
     }
 
     /** Only for time zones with/when offset from UTC is multiple of five minutes.
@@ -289,12 +315,12 @@ public:
       *  each minute, with added or subtracted leap second, spans exactly 60 unix timestamps.
       */
 
-    inline unsigned toSecond(time_t t) const { return t % 60; }
+    inline unsigned toSecond(time_t t) const { return UInt32(t) % 60; }
 
     inline unsigned toMinute(time_t t) const
     {
         if (offset_is_whole_number_of_hours_everytime)
-            return (t / 60) % 60;
+            return (UInt32(t) / 60) % 60;
 
         UInt32 date = find(t).date;
         return (UInt32(t) - date) / 60 % 60;
@@ -530,9 +556,7 @@ public:
         }
     }
 
-    /*
-     * check and change mode to effective
-     */
+    /// Check and change mode to effective.
     inline UInt8 check_week_mode(UInt8 mode) const
     {
         UInt8 week_format = (mode & 7);
@@ -541,10 +565,9 @@ public:
         return week_format;
     }
 
-    /*
-     * Calc weekday from d
-     * Returns 0 for monday, 1 for tuesday ...
-     */
+    /** Calculate weekday from d.
+      * Returns 0 for monday, 1 for tuesday...
+      */
     inline unsigned calc_weekday(DayNum d, bool sunday_first_day_of_week) const
     {
         if (!sunday_first_day_of_week)
@@ -553,7 +576,7 @@ public:
             return toDayOfWeek(DayNum(d + 1)) - 1;
     }
 
-    /* Calc days in one year. */
+    /// Calculate days in one year.
     inline unsigned calc_days_in_year(UInt16 year) const
     {
         return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 : 365);
diff --git a/base/common/LocalDate.h b/base/common/LocalDate.h
index a063d6e98a3..e5ebe877bc5 100644
--- a/base/common/LocalDate.h
+++ b/base/common/LocalDate.h
@@ -168,14 +168,6 @@ public:
 static_assert(sizeof(LocalDate) == 4);
 
 
-inline std::ostream & operator<< (std::ostream & ostr, const LocalDate & date)
-{
-    return ostr << date.year()
-        << '-' << (date.month() / 10) << (date.month() % 10)
-        << '-' << (date.day() / 10) << (date.day() % 10);
-}
-
-
 namespace std
 {
 inline string to_string(const LocalDate & date)
diff --git a/base/common/LocalDateTime.h b/base/common/LocalDateTime.h
index d19d862f2ca..0e237789bd1 100644
--- a/base/common/LocalDateTime.h
+++ b/base/common/LocalDateTime.h
@@ -169,20 +169,6 @@ public:
 static_assert(sizeof(LocalDateTime) == 8);
 
 
-inline std::ostream & operator<< (std::ostream & ostr, const LocalDateTime & datetime)
-{
-    ostr << std::setfill('0') << std::setw(4) << datetime.year();
-
-    ostr << '-' << (datetime.month() / 10) << (datetime.month() % 10)
-        << '-' << (datetime.day() / 10) << (datetime.day() % 10)
-        << ' ' << (datetime.hour() / 10) << (datetime.hour() % 10)
-        << ':' << (datetime.minute() / 10) << (datetime.minute() % 10)
-        << ':' << (datetime.second() / 10) << (datetime.second() % 10);
-
-    return ostr;
-}
-
-
 namespace std
 {
 inline string to_string(const LocalDateTime & datetime)
diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp
index 28c7990c353..fcd1610e589 100644
--- a/base/common/ReplxxLineReader.cpp
+++ b/base/common/ReplxxLineReader.cpp
@@ -12,6 +12,8 @@
 #include <dlfcn.h>
 #include <fcntl.h>
 #include <fstream>
+#include <fmt/format.h>
+
 
 namespace
 {
@@ -189,8 +191,8 @@ void ReplxxLineReader::openEditor()
         return;
     }
 
-    String editor = std::getenv("EDITOR");
-    if (editor.empty())
+    const char * editor = std::getenv("EDITOR");
+    if (!editor || !*editor)
         editor = "vim";
 
     replxx::Replxx::State state(rx.get_state());
@@ -204,7 +206,7 @@ void ReplxxLineReader::openEditor()
         if ((-1 == res || 0 == res) && errno != EINTR)
         {
             rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
-            return;
+            break;
         }
         bytes_written += res;
     }
@@ -215,7 +217,7 @@ void ReplxxLineReader::openEditor()
         return;
     }
 
-    if (0 == execute(editor + " " + filename))
+    if (0 == execute(fmt::format("{} {}", editor, filename)))
     {
         try
         {
diff --git a/base/common/arithmeticOverflow.h b/base/common/arithmeticOverflow.h
index 8df037a14af..a92fe56b9cb 100644
--- a/base/common/arithmeticOverflow.h
+++ b/base/common/arithmeticOverflow.h
@@ -1,9 +1,30 @@
 #pragma once
 
 #include <common/extended_types.h>
+#include <common/defines.h>
+
 
 namespace common
 {
+    /// Multiply and ignore overflow.
+    template <typename T1, typename T2>
+    inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y)
+    {
+        return x * y;
+    }
+
+    template <typename T1, typename T2>
+    inline auto NO_SANITIZE_UNDEFINED addIgnoreOverflow(T1 x, T2 y)
+    {
+        return x + y;
+    }
+
+    template <typename T1, typename T2>
+    inline auto NO_SANITIZE_UNDEFINED subIgnoreOverflow(T1 x, T2 y)
+    {
+        return x - y;
+    }
+
     template <typename T>
     inline bool addOverflow(T x, T y, T & res)
     {
@@ -33,14 +54,14 @@ namespace common
     {
         static constexpr __int128 min_int128 = minInt128();
         static constexpr __int128 max_int128 = maxInt128();
-        res = x + y;
+        res = addIgnoreOverflow(x, y);
         return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y);
     }
 
     template <>
     inline bool addOverflow(wInt256 x, wInt256 y, wInt256 & res)
     {
-        res = x + y;
+        res = addIgnoreOverflow(x, y);
         return (y > 0 && x > std::numeric_limits<wInt256>::max() - y) ||
             (y < 0 && x < std::numeric_limits<wInt256>::min() - y);
     }
@@ -48,7 +69,7 @@ namespace common
     template <>
     inline bool addOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
     {
-        res = x + y;
+        res = addIgnoreOverflow(x, y);
         return x > std::numeric_limits<wUInt256>::max() - y;
     }
 
@@ -81,14 +102,14 @@ namespace common
     {
         static constexpr __int128 min_int128 = minInt128();
         static constexpr __int128 max_int128 = maxInt128();
-        res = x - y;
+        res = subIgnoreOverflow(x, y);
         return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y);
     }
 
     template <>
     inline bool subOverflow(wInt256 x, wInt256 y, wInt256 & res)
     {
-        res = x - y;
+        res = subIgnoreOverflow(x, y);
         return (y < 0 && x > std::numeric_limits<wInt256>::max() + y) ||
             (y > 0 && x < std::numeric_limits<wInt256>::min() + y);
     }
@@ -96,7 +117,7 @@ namespace common
     template <>
     inline bool subOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
     {
-        res = x - y;
+        res = subIgnoreOverflow(x, y);
         return x < y;
     }
 
@@ -127,33 +148,33 @@ namespace common
     template <>
     inline bool mulOverflow(__int128 x, __int128 y, __int128 & res)
     {
-        res = static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(y);    /// Avoid signed integer overflow.
+        res = mulIgnoreOverflow(x, y);
         if (!x || !y)
             return false;
 
         unsigned __int128 a = (x > 0) ? x : -x;
         unsigned __int128 b = (y > 0) ? y : -y;
-        return (a * b) / b != a;
+        return mulIgnoreOverflow(a, b) / b != a;
     }
 
     template <>
     inline bool mulOverflow(wInt256 x, wInt256 y, wInt256 & res)
     {
-        res = x * y;
+        res = mulIgnoreOverflow(x, y);
         if (!x || !y)
             return false;
 
         wInt256 a = (x > 0) ? x : -x;
         wInt256 b = (y > 0) ? y : -y;
-        return (a * b) / b != a;
+        return mulIgnoreOverflow(a, b) / b != a;
     }
 
     template <>
     inline bool mulOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
     {
-        res = x * y;
+        res = mulIgnoreOverflow(x, y);
         if (!x || !y)
             return false;
-        return (x * y) / y != x;
+        return res / y != x;
     }
 }
diff --git a/base/common/defines.h b/base/common/defines.h
index 39df4698b88..845a53179ef 100644
--- a/base/common/defines.h
+++ b/base/common/defines.h
@@ -84,10 +84,12 @@
 #    define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
 #    define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
 #    define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
+#    define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
 #else  /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
 #    define NO_SANITIZE_UNDEFINED
 #    define NO_SANITIZE_ADDRESS
 #    define NO_SANITIZE_THREAD
+#    define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE
 #endif
 
 /// A template function for suppressing warnings about unused variables or function results.
diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp
index 4cf8a8d7ce9..83384038b7c 100644
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@@ -152,7 +152,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
     if (sig != SIGTSTP) /// This signal is used for debugging.
     {
         /// The time that is usually enough for separate thread to print info into log.
-        sleepForSeconds(10);
+        sleepForSeconds(20);  /// FIXME: use some feedback from threads that process stacktrace
         call_default_signal_handler(sig);
     }
 
@@ -230,10 +230,10 @@ public:
             }
             else
             {
-                siginfo_t info;
-                ucontext_t context;
+                siginfo_t info{};
+                ucontext_t context{};
                 StackTrace stack_trace(NoCapture{});
-                UInt32 thread_num;
+                UInt32 thread_num{};
                 std::string query_id;
                 DB::ThreadStatus * thread_ptr{};
 
@@ -311,7 +311,8 @@ private:
         if (stack_trace.getSize())
         {
             /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace.
-            /// NOTE This still require memory allocations and mutex lock inside logger. BTW we can also print it to stderr using write syscalls.
+            /// NOTE: This still require memory allocations and mutex lock inside logger.
+            ///       BTW we can also print it to stderr using write syscalls.
 
             std::stringstream bare_stacktrace;
             bare_stacktrace << "Stack trace:";
@@ -324,7 +325,7 @@ private:
         /// Write symbolized stack trace line by line for better grep-ability.
         stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
 
-#if defined(__linux__)
+#if defined(OS_LINUX)
         /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
         String calculated_binary_hash = getHashOfLoadedBinaryHex();
         if (daemon.stored_binary_hash.empty())
@@ -415,7 +416,9 @@ static void sanitizerDeathCallback()
     else
         log_message = "Terminate called without an active exception";
 
-    static const size_t buf_size = 1024;
+    /// POSIX.1 says that write(2)s of less than PIPE_BUF bytes must be atomic - man 7 pipe
+    /// And the buffer should not be too small because our exception messages can be large.
+    static constexpr size_t buf_size = PIPE_BUF;
 
     if (log_message.size() > buf_size - 16)
         log_message.resize(buf_size - 16);
@@ -561,6 +564,7 @@ void debugIncreaseOOMScore()
     {
         DB::WriteBufferFromFile buf("/proc/self/oom_score_adj");
         buf.write(new_score.c_str(), new_score.size());
+        buf.close();
     }
     catch (const Poco::Exception & e)
     {
@@ -783,7 +787,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
     /// Setup signal handlers.
     /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
 
-    addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP}, signalHandler, &handled_signals);
+    addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals);
     addSignalHandler({SIGHUP, SIGUSR1}, closeLogsSignalHandler, &handled_signals);
     addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals);
 
@@ -986,7 +990,7 @@ void BaseDaemon::setupWatchdog()
         if (errno == ECHILD)
         {
             logger().information("Child process no longer exists.");
-            _exit(status);
+            _exit(WEXITSTATUS(status));
         }
 
         if (WIFEXITED(status))
@@ -1020,7 +1024,7 @@ void BaseDaemon::setupWatchdog()
 
         /// Automatic restart is not enabled but you can play with it.
 #if 1
-        _exit(status);
+        _exit(WEXITSTATUS(status));
 #else
         logger().information("Will restart.");
         if (argv0)
diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h
index 42d94629ae9..8b9d765cf2e 100644
--- a/base/daemon/BaseDaemon.h
+++ b/base/daemon/BaseDaemon.h
@@ -83,7 +83,7 @@ public:
     template <class T>
     void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
     {
-        auto writer = getGraphiteWriter(config_name);
+        auto *writer = getGraphiteWriter(config_name);
         if (writer)
             writer->write(key, value, timestamp, custom_root_path);
     }
@@ -91,7 +91,7 @@ public:
     template <class T>
     void writeToGraphite(const GraphiteWriter::KeyValueVector<T> & key_vals, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
     {
-        auto writer = getGraphiteWriter(config_name);
+        auto *writer = getGraphiteWriter(config_name);
         if (writer)
             writer->write(key_vals, timestamp, custom_root_path);
     }
@@ -99,7 +99,7 @@ public:
     template <class T>
     void writeToGraphite(const GraphiteWriter::KeyValueVector<T> & key_vals, const std::chrono::system_clock::time_point & current_time, const std::string & custom_root_path)
     {
-        auto writer = getGraphiteWriter();
+        auto *writer = getGraphiteWriter();
         if (writer)
             writer->write(key_vals, std::chrono::system_clock::to_time_t(current_time), custom_root_path);
     }
diff --git a/base/glibc-compatibility/musl/sched_getcpu.c b/base/glibc-compatibility/musl/sched_getcpu.c
index 57b8b416043..f290f01d153 100644
--- a/base/glibc-compatibility/musl/sched_getcpu.c
+++ b/base/glibc-compatibility/musl/sched_getcpu.c
@@ -31,7 +31,7 @@ static void *volatile vdso_func = (void *)getcpu_init;
 int sched_getcpu(void)
 {
 	int r;
-	unsigned cpu;
+	unsigned cpu = 0;
 
 #ifdef VDSO_GETCPU_SYM
 	getcpu_f f = (getcpu_f)vdso_func;
diff --git a/base/mysqlxx/CMakeLists.txt b/base/mysqlxx/CMakeLists.txt
index b410c38cfad..849c58a8527 100644
--- a/base/mysqlxx/CMakeLists.txt
+++ b/base/mysqlxx/CMakeLists.txt
@@ -3,7 +3,6 @@ add_library (mysqlxx
     Exception.cpp
     Query.cpp
     ResultBase.cpp
-    StoreQueryResult.cpp
     UseQueryResult.cpp
     Row.cpp
     Value.cpp
diff --git a/base/mysqlxx/Connection.cpp b/base/mysqlxx/Connection.cpp
index 55757008562..8a15115cb06 100644
--- a/base/mysqlxx/Connection.cpp
+++ b/base/mysqlxx/Connection.cpp
@@ -116,8 +116,8 @@ void Connection::connect(const char* db,
     if (!mysql_real_connect(driver.get(), server, user, password, db, port, ifNotEmpty(socket), driver->client_flag))
         throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
 
-    /// Sets UTF-8 as default encoding.
-    if (mysql_set_character_set(driver.get(), "UTF8"))
+    /// Sets UTF-8 as default encoding. See https://mariadb.com/kb/en/mysql_set_character_set/
+    if (mysql_set_character_set(driver.get(), "utf8mb4"))
         throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
 
     is_connected = true;
diff --git a/base/mysqlxx/Connection.h b/base/mysqlxx/Connection.h
index 0e5a608108c..ca67db0e0c6 100644
--- a/base/mysqlxx/Connection.h
+++ b/base/mysqlxx/Connection.h
@@ -39,7 +39,6 @@ private:
 /** MySQL connection.
   * Usage:
   *        mysqlxx::Connection connection("Test", "127.0.0.1", "root", "qwerty", 3306);
-  *        std::cout << connection.query("SELECT 'Hello, World!'").store().at(0).at(0).getString() << std::endl;
   *
   * Or with Poco library configuration:
   *        mysqlxx::Connection connection("mysql_params");
diff --git a/base/mysqlxx/Query.cpp b/base/mysqlxx/Query.cpp
index ab9bb174d4a..f3485c54edc 100644
--- a/base/mysqlxx/Query.cpp
+++ b/base/mysqlxx/Query.cpp
@@ -71,16 +71,6 @@ UseQueryResult Query::use()
     return UseQueryResult(res, conn, this);
 }
 
-StoreQueryResult Query::store()
-{
-    executeImpl();
-    MYSQL_RES * res = mysql_store_result(conn->getDriver());
-    if (!res)
-        checkError(conn->getDriver());
-
-    return StoreQueryResult(res, conn, this);
-}
-
 void Query::execute()
 {
     executeImpl();
diff --git a/base/mysqlxx/Query.h b/base/mysqlxx/Query.h
index 1d3ab9678d5..036e8952bc3 100644
--- a/base/mysqlxx/Query.h
+++ b/base/mysqlxx/Query.h
@@ -3,7 +3,6 @@
 #include <sstream>
 
 #include <mysqlxx/UseQueryResult.h>
-#include <mysqlxx/StoreQueryResult.h>
 
 
 namespace mysqlxx
@@ -46,11 +45,6 @@ public:
       */
     UseQueryResult use();
 
-    /** Выполнить запрос с загрузкой на клиента всех строк.
-      * Требуется оперативка, чтобы вместить весь результат, зато к строкам можно обращаться в произвольном порядке.
-      */
-    StoreQueryResult store();
-
     /// Значение auto increment после последнего INSERT-а.
     UInt64 insertID();
 
diff --git a/base/mysqlxx/ResultBase.h b/base/mysqlxx/ResultBase.h
index 4f2ab2eb0a2..d08922a269c 100644
--- a/base/mysqlxx/ResultBase.h
+++ b/base/mysqlxx/ResultBase.h
@@ -9,7 +9,7 @@ class Connection;
 class Query;
 
 
-/** Базовый класс для UseQueryResult и StoreQueryResult.
+/** Базовый класс для UseQueryResult.
   * Содержит общую часть реализации,
   * Ссылается на Connection. Если уничтожить Connection, то пользоваться ResultBase и любым результатом нельзя.
   * Использовать объект можно только для результата одного запроса!
diff --git a/base/mysqlxx/Row.h b/base/mysqlxx/Row.h
index a0b88638546..d668fdbd29a 100644
--- a/base/mysqlxx/Row.h
+++ b/base/mysqlxx/Row.h
@@ -35,7 +35,7 @@ public:
     {
     }
 
-    /** Для того, чтобы создать Row, используйте соответствующие методы UseQueryResult или StoreQueryResult. */
+    /** Для того, чтобы создать Row, используйте соответствующие методы UseQueryResult. */
     Row(MYSQL_ROW row_, ResultBase * res_, MYSQL_LENGTHS lengths_)
         : row(row_), res(res_), lengths(lengths_)
     {
diff --git a/base/mysqlxx/StoreQueryResult.cpp b/base/mysqlxx/StoreQueryResult.cpp
deleted file mode 100644
index 620ed8def56..00000000000
--- a/base/mysqlxx/StoreQueryResult.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#if __has_include(<mysql.h>)
-#include <mysql.h>
-#else
-#include <mysql/mysql.h>
-#endif
-
-#include <mysqlxx/Connection.h>
-#include <mysqlxx/StoreQueryResult.h>
-
-
-namespace mysqlxx
-{
-
-StoreQueryResult::StoreQueryResult(MYSQL_RES * res_, Connection * conn_, const Query * query_) : ResultBase(res_, conn_, query_)
-{
-    UInt64 rows = mysql_num_rows(res);
-    reserve(rows);
-    lengths.resize(rows * num_fields);
-
-    for (UInt64 i = 0; MYSQL_ROW row = mysql_fetch_row(res); ++i)
-    {
-        MYSQL_LENGTHS lengths_for_row = mysql_fetch_lengths(res);
-        memcpy(&lengths[i * num_fields], lengths_for_row, sizeof(lengths[0]) * num_fields);
-
-        push_back(Row(row, this, &lengths[i * num_fields]));
-    }
-    checkError(conn->getDriver());
-}
-
-}
diff --git a/base/mysqlxx/StoreQueryResult.h b/base/mysqlxx/StoreQueryResult.h
deleted file mode 100644
index 9c242d2782f..00000000000
--- a/base/mysqlxx/StoreQueryResult.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#pragma once
-
-#include <vector>
-
-#include <mysqlxx/ResultBase.h>
-#include <mysqlxx/Row.h>
-
-
-namespace mysqlxx
-{
-
-class Connection;
-
-
-/** Результат выполнения запроса, загруженный полностью на клиента.
-  * Это требует оперативку, чтобы вместить весь результат,
-  *  но зато реализует произвольный доступ к строкам по индексу.
-  * Если размер результата большой - используйте лучше UseQueryResult.
-  * Объект содержит ссылку на Connection.
-  * Если уничтожить Connection, то объект становится некорректным и все строки результата - тоже.
-  * Если задать следующий запрос в соединении, то объект и все строки тоже становятся некорректными.
-  * Использовать объект можно только для результата одного запроса!
-  * (При попытке присвоить объекту результат следующего запроса - UB.)
-  */
-class StoreQueryResult : public std::vector<Row>, public ResultBase
-{
-public:
-    StoreQueryResult(MYSQL_RES * res_, Connection * conn_, const Query * query_);
-
-    size_t num_rows() const { return size(); }
-
-private:
-
-    /** Не смотря на то, что весь результат выполнения запроса загружается на клиента,
-      *  и все указатели MYSQL_ROW на отдельные строки различные,
-      *  при этом функция mysql_fetch_lengths() возвращает длины
-      *  для текущей строки по одному и тому же адресу.
-      * То есть, чтобы можно было пользоваться несколькими Row одновременно,
-      *  необходимо заранее куда-то сложить все длины.
-      */
-    using Lengths = std::vector<MYSQL_LENGTH>;
-    Lengths lengths;
-};
-
-}
diff --git a/base/mysqlxx/UseQueryResult.h b/base/mysqlxx/UseQueryResult.h
index 3a641020dcf..37cbbd19669 100644
--- a/base/mysqlxx/UseQueryResult.h
+++ b/base/mysqlxx/UseQueryResult.h
@@ -12,8 +12,7 @@ class Connection;
 
 /** Результат выполнения запроса, предназначенный для чтения строк, одна за другой.
   * В памяти при этом хранится только одна, текущая строка.
-  * В отличие от StoreQueryResult, произвольный доступ к строкам невозможен,
-  *  а также, при чтении следующей строки, предыдущая становится некорректной.
+  * При чтении следующей строки, предыдущая становится некорректной.
   * Вы обязаны прочитать все строки из результата
   *  (вызывать функцию fetch(), пока она не вернёт значение, преобразующееся к false),
   *  иначе при следующем запросе будет выкинуто исключение с текстом "Commands out of sync".
diff --git a/base/mysqlxx/Value.h b/base/mysqlxx/Value.h
index dfa86e8aa7d..57cfd452045 100644
--- a/base/mysqlxx/Value.h
+++ b/base/mysqlxx/Value.h
@@ -25,7 +25,7 @@ class ResultBase;
 
 /** Represents a single value read from MySQL.
   * It doesn't owns the value. It's just a wrapper of a pair (const char *, size_t).
-  * If the UseQueryResult/StoreQueryResult or Connection is destroyed,
+  * If the UseQueryResult or Connection is destroyed,
   *  or you have read the next Row while using UseQueryResult, then the object is invalidated.
   * Allows to transform (parse) the value to various data types:
   * - with getUInt(), getString(), ... (recommended);
diff --git a/base/mysqlxx/tests/mysqlxx_test.cpp b/base/mysqlxx/tests/mysqlxx_test.cpp
index cf304a5cb5f..c505d34a58d 100644
--- a/base/mysqlxx/tests/mysqlxx_test.cpp
+++ b/base/mysqlxx/tests/mysqlxx_test.cpp
@@ -38,15 +38,6 @@ int main(int, char **)
             }
         }
 
-        {
-            mysqlxx::Query query = connection.query();
-            query << "SELECT 1234567890 abc, 12345.67890 def UNION ALL SELECT 9876543210, 98765.43210";
-            mysqlxx::StoreQueryResult result = query.store();
-
-            std::cerr << result.at(0)["abc"].getUInt() << ", " << result.at(0)["def"].getDouble() << std::endl
-                << result.at(1)["abc"].getUInt() << ", " << result.at(1)["def"].getDouble() << std::endl;
-        }
-
         {
             mysqlxx::UseQueryResult result = connection.query("SELECT 'abc\\\\def' x").use();
             mysqlxx::Row row = result.fetch();
@@ -54,27 +45,6 @@ int main(int, char **)
             std::cerr << row << std::endl;
         }
 
-        {
-            mysqlxx::Query query = connection.query("SEL");
-            query << "ECT 1";
-
-            std::cerr << query.store().at(0).at(0) << std::endl;
-        }
-
-        {
-            /// Копирование Query
-            mysqlxx::Query query = connection.query("SELECT 'Ok' x");
-            using Queries = std::vector<mysqlxx::Query>;
-            Queries queries;
-            queries.push_back(query);
-
-            for (auto & q : queries)
-            {
-                std::cerr << q.str() << std::endl;
-                std::cerr << q.store().at(0) << std::endl;
-            }
-        }
-
         {
             /// Копирование Query
             mysqlxx::Query query1 = connection.query("SELECT");
@@ -84,62 +54,6 @@ int main(int, char **)
             std::cerr << query1.str() << ", " << query2.str() << std::endl;
         }
 
-        {
-            /// Копирование Query
-            using Queries = std::list<mysqlxx::Query>;
-            Queries queries;
-            queries.push_back(connection.query("SELECT"));
-            mysqlxx::Query & qref = queries.back();
-            qref << " 1";
-
-            for (auto & query : queries)
-            {
-                std::cerr << query.str() << std::endl;
-                std::cerr << query.store().at(0) << std::endl;
-            }
-        }
-
-        {
-            /// Транзакции
-            connection.query("DROP TABLE IF EXISTS tmp").execute();
-            connection.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute();
-
-            mysqlxx::Transaction trans(connection);
-            connection.query("INSERT INTO tmp VALUES (1)").execute();
-
-            std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl;
-
-            trans.rollback();
-
-            std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl;
-        }
-
-        {
-            /// Транзакции
-            connection.query("DROP TABLE IF EXISTS tmp").execute();
-            connection.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute();
-
-            {
-                mysqlxx::Transaction trans(connection);
-                connection.query("INSERT INTO tmp VALUES (1)").execute();
-                std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl;
-            }
-
-            std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl;
-        }
-
-        {
-            /// Транзакции
-            mysqlxx::Connection connection2("test", "127.0.0.1", "root", "qwerty", 3306);
-            connection2.query("DROP TABLE IF EXISTS tmp").execute();
-            connection2.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute();
-
-            mysqlxx::Transaction trans(connection2);
-            connection2.query("INSERT INTO tmp VALUES (1)").execute();
-            std::cerr << connection2.query("SELECT * FROM tmp").store().size() << std::endl;
-        }
-        std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl;
-
         {
             /// NULL
             mysqlxx::Null<int> x = mysqlxx::null;
@@ -152,59 +66,6 @@ int main(int, char **)
             std::cerr << (x == 1 ? "Ok" : "Fail") << std::endl;
             std::cerr << (x.isNull() ? "Fail" : "Ok") << std::endl;
         }
-
-        {
-            /// Исключения при попытке достать значение не того типа
-            try
-            {
-                connection.query("SELECT -1").store().at(0).at(0).getUInt();
-                std::cerr << "Fail" << std::endl;
-            }
-            catch (const mysqlxx::Exception & e)
-            {
-                std::cerr << "Ok, " << e.message() << std::endl;
-            }
-
-            try
-            {
-                connection.query("SELECT 'xxx'").store().at(0).at(0).getInt();
-                std::cerr << "Fail" << std::endl;
-            }
-            catch (const mysqlxx::Exception & e)
-            {
-                std::cerr << "Ok, " << e.message() << std::endl;
-            }
-
-            try
-            {
-                connection.query("SELECT NULL").store().at(0).at(0).getString();
-                std::cerr << "Fail" << std::endl;
-            }
-            catch (const mysqlxx::Exception & e)
-            {
-                std::cerr << "Ok, " << e.message() << std::endl;
-            }
-
-            try
-            {
-                connection.query("SELECT 123").store().at(0).at(0).getDate();
-                std::cerr << "Fail" << std::endl;
-            }
-            catch (const mysqlxx::Exception & e)
-            {
-                std::cerr << "Ok, " << e.message() << std::endl;
-            }
-
-            try
-            {
-                connection.query("SELECT '2011-01-01'").store().at(0).at(0).getDateTime();
-                std::cerr << "Fail" << std::endl;
-            }
-            catch (const mysqlxx::Exception & e)
-            {
-                std::cerr << "Ok, " << e.message() << std::endl;
-            }
-        }
     }
     catch (const mysqlxx::Exception & e)
     {
diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index 5d643cc4bee..ce92ae203ea 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -1,9 +1,9 @@
 # This strings autochanged from release_lib.sh:
-SET(VERSION_REVISION 54447)
+SET(VERSION_REVISION 54448)
 SET(VERSION_MAJOR 21)
-SET(VERSION_MINOR 2)
+SET(VERSION_MINOR 3)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7)
-SET(VERSION_DESCRIBE v21.2.1.1-prestable)
-SET(VERSION_STRING 21.2.1.1)
+SET(VERSION_GITHASH ef72ba7349f230321750c13ee63b49a11a7c0adc)
+SET(VERSION_DESCRIBE v21.3.1.1-prestable)
+SET(VERSION_STRING 21.3.1.1)
 # end of autochange
diff --git a/cmake/find/ccache.cmake b/cmake/find/ccache.cmake
index d8e9cf9588d..d9ccd1a9ac6 100644
--- a/cmake/find/ccache.cmake
+++ b/cmake/find/ccache.cmake
@@ -37,15 +37,13 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
       #
       # - 4.0+ ccache always includes this environment variable into the hash
       #   of the manifest, which do not allow to use previous cache,
-      # - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness.
+      # - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__
       #
       # So for:
-      # - 4.2+ time_macros sloppiness is used,
+      # - 4.2+ does not require any sloppiness
       # - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
       if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
-         message(STATUS "Use time_macros sloppiness for ccache")
-         set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
-         set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
+         message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
       elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
          message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
          set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
diff --git a/cmake/find/nuraft.cmake b/cmake/find/nuraft.cmake
index d31fe9c1de8..7fa5251946e 100644
--- a/cmake/find/nuraft.cmake
+++ b/cmake/find/nuraft.cmake
@@ -11,7 +11,7 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/NuRaft/CMakeLists.txt")
     return()
 endif ()
 
-if (NOT OS_FREEBSD)
+if (NOT OS_FREEBSD AND NOT OS_DARWIN)
     set (USE_NURAFT 1)
     set (NURAFT_LIBRARY nuraft)
 
@@ -20,5 +20,5 @@ if (NOT OS_FREEBSD)
     message (STATUS "Using NuRaft=${USE_NURAFT}: ${NURAFT_INCLUDE_DIR} : ${NURAFT_LIBRARY}")
 else()
     set (USE_NURAFT 0)
-    message (STATUS "Using internal NuRaft library on FreeBSD is not supported")
+    message (STATUS "Using internal NuRaft library on FreeBSD and Darwin is not supported")
 endif()
diff --git a/contrib/NuRaft b/contrib/NuRaft
index 410bd149da8..7adf7ae33e7 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 410bd149da84cdde60b4436b02b738749f4e87e1
+Subproject commit 7adf7ae33e7d5c307342431b577c8ab1025ee793
diff --git a/contrib/base64-cmake/CMakeLists.txt b/contrib/base64-cmake/CMakeLists.txt
index 63b4e324d29..a295ee45b84 100644
--- a/contrib/base64-cmake/CMakeLists.txt
+++ b/contrib/base64-cmake/CMakeLists.txt
@@ -11,7 +11,7 @@ endif ()
 target_compile_options(base64_scalar PRIVATE -falign-loops)
 
 if (ARCH_AMD64)
-    target_compile_options(base64_ssse3 PRIVATE -mssse3 -falign-loops)
+    target_compile_options(base64_ssse3 PRIVATE -mno-avx -mno-avx2 -mssse3 -falign-loops)
     target_compile_options(base64_avx PRIVATE -falign-loops -mavx)
     target_compile_options(base64_avx2 PRIVATE -falign-loops -mavx2)
 else ()
diff --git a/contrib/boost b/contrib/boost
index 8e259cd2a6b..48f40ebb539 160000
--- a/contrib/boost
+++ b/contrib/boost
@@ -1 +1 @@
-Subproject commit 8e259cd2a6b60d75dd17e73432f11bb7b9351bb1
+Subproject commit 48f40ebb539220d328958f8823b094c0b07a4e79
diff --git a/contrib/hyperscan b/contrib/hyperscan
index 3907fd00ee8..e9f08df0213 160000
--- a/contrib/hyperscan
+++ b/contrib/hyperscan
@@ -1 +1 @@
-Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531
+Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa
diff --git a/contrib/hyperscan-cmake/CMakeLists.txt b/contrib/hyperscan-cmake/CMakeLists.txt
index c44214cded8..75c45ff7bf5 100644
--- a/contrib/hyperscan-cmake/CMakeLists.txt
+++ b/contrib/hyperscan-cmake/CMakeLists.txt
@@ -252,6 +252,7 @@ if (NOT EXTERNAL_HYPERSCAN_LIBRARY_FOUND)
     target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1)
     target_compile_options (hyperscan
         PRIVATE -g0 # Library has too much debug information
+        -mno-avx -mno-avx2 # The library is using dynamic dispatch and is confused if AVX is enabled globally
         -march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system
         -fno-sanitize=undefined # Assume the library takes care of itself
     )
diff --git a/contrib/nuraft-cmake/CMakeLists.txt b/contrib/nuraft-cmake/CMakeLists.txt
index e5bb7f7d11b..83137fe73bf 100644
--- a/contrib/nuraft-cmake/CMakeLists.txt
+++ b/contrib/nuraft-cmake/CMakeLists.txt
@@ -30,7 +30,12 @@ set(SRCS
 
 add_library(nuraft ${SRCS})
 
-target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1)
+
+if (NOT OPENSSL_SSL_LIBRARY OR NOT OPENSSL_CRYPTO_LIBRARY)
+    target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1 SSL_LIBRARY_NOT_FOUND=1)
+else()
+    target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1)
+endif()
 
 target_include_directories (nuraft SYSTEM PRIVATE ${LIBRARY_DIR}/include/libnuraft)
 # for some reason include "asio.h" directly without "boost/" prefix.
diff --git a/contrib/poco b/contrib/poco
index 2c32e17c7df..fbaaba4a02e 160000
--- a/contrib/poco
+++ b/contrib/poco
@@ -1 +1 @@
-Subproject commit 2c32e17c7dfee1f8bf24227b697cdef5fddf0823
+Subproject commit fbaaba4a02e29987b8c584747a496c79528f125f
diff --git a/debian/changelog b/debian/changelog
index 1cec020f026..53b36cae114 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,5 @@
-clickhouse (21.2.1.1) unstable; urgency=low
+clickhouse (21.3.1.1) unstable; urgency=low
 
   * Modified source code
 
- -- clickhouse-release <clickhouse-release@yandex-team.ru>  Mon, 11 Jan 2021 11:12:08 +0300
+ -- clickhouse-release <clickhouse-release@yandex-team.ru>  Mon, 01 Feb 2021 12:50:53 +0300
diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile
index 5022687c47b..43921a4d3c4 100644
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:18.04
 
 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.2.1.*
+ARG version=21.3.1.*
 
 RUN apt-get update \
     && apt-get install --yes --no-install-recommends \
diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile
index 3528ae68ef6..8e39af5646c 100644
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:20.04
 
 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.2.1.*
+ARG version=21.3.1.*
 ARG gosu_ver=1.10
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/README.md b/docker/server/README.md
index d8e9204dffa..6f799d68185 100644
--- a/docker/server/README.md
+++ b/docker/server/README.md
@@ -56,7 +56,7 @@ $ echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @-
 20.12.3.3
 ```
 
-### Volumes 
+### Volumes
 
 Typically you may want to mount the following folders inside your container to archieve persistency:
 
@@ -76,7 +76,7 @@ You may also want to mount:
 * `/etc/clickhouse-server/usert.d/*.xml` - files with use settings adjustmenets
 * `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below).
 
-### Linux capabilities 
+### Linux capabilities
 
 ClickHouse has some advanced functionality which requite enabling several [linux capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html).
 
@@ -113,10 +113,10 @@ $ docker run --rm -e CLICKHOUSE_UID=0 -e CLICKHOUSE_GID=0 --name clickhouse-serv
 
 ### How to create default database and user on starting
 
-Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD`:
+Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER`, `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT` and `CLICKHOUSE_PASSWORD`:
 
 ```
-$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server
+$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server
 ```
 
 ## How to extend this image
diff --git a/docker/server/alpine-build.sh b/docker/server/alpine-build.sh
index 0142149b5bd..329888f2fcb 100755
--- a/docker/server/alpine-build.sh
+++ b/docker/server/alpine-build.sh
@@ -54,8 +54,10 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6       "${CONTAIN
 docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib"
 docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1      "${CONTAINER_ROOT_FOLDER}/lib"
 docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_files.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
 docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2  "${CONTAINER_ROOT_FOLDER}/lib"
 docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2           "${CONTAINER_ROOT_FOLDER}/lib64"
+docker cp -L "${ubuntu20image}":/etc/nsswitch.conf                    "${CONTAINER_ROOT_FOLDER}/etc"
 
 docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
 rm -rf "$CONTAINER_ROOT_FOLDER"
diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh
index 8a4d02a6014..0138a165505 100755
--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@@ -54,6 +54,7 @@ FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_
 CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
 CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
 CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
+CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}"
 
 for dir in "$DATA_DIR" \
   "$ERROR_LOG_DIR" \
@@ -97,6 +98,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
           </networks>
           <password>${CLICKHOUSE_PASSWORD}</password>
           <quota>default</quota>
+          <access_management>${CLICKHOUSE_ACCESS_MANAGEMENT}</access_management>
         </${CLICKHOUSE_USER}>
       </users>
     </yandex>
@@ -120,7 +122,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
         sleep 1
     done
 
-    clickhouseclient=( clickhouse-client --multiquery -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
+    clickhouseclient=( clickhouse-client --multiquery --host "127.0.0.1" -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
 
     echo
 
diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile
index df918928f99..f151ae8fddf 100644
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:18.04
 
 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.2.1.*
+ARG version=21.3.1.*
 
 RUN apt-get update && \
     apt-get install -y apt-transport-https dirmngr && \
diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index 03b7b2fc53a..64be52d8e30 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -47,6 +47,7 @@ RUN apt-get update \
         expect \
         fakeroot \
         git \
+        gdb \
         gperf \
         lld-${LLVM_VERSION} \
         llvm-${LLVM_VERSION} \
diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 7555b5591d0..b6fcdd7f7d2 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -107,6 +107,18 @@ function start_server
     fi
 
     echo "ClickHouse server pid '$server_pid' started and responded"
+
+    echo "
+handle all noprint
+handle SIGSEGV stop print
+handle SIGBUS stop print
+handle SIGABRT stop print
+continue
+thread apply all backtrace
+continue
+" > script.gdb
+
+    gdb -batch -command script.gdb -p "$server_pid" &
 }
 
 function clone_root
@@ -120,7 +132,7 @@ function clone_root
                 git checkout FETCH_HEAD
                 echo 'Clonned merge head'
             else
-                git fetch
+                git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/head"
                 git checkout "$COMMIT_SHA"
                 echo 'Checked out to commit'
             fi
@@ -163,6 +175,7 @@ function clone_submodules
             contrib/xz
             contrib/dragonbox
             contrib/fast_float
+            contrib/NuRaft
         )
 
         git submodule sync
@@ -182,6 +195,7 @@ function run_cmake
         "-DENABLE_EMBEDDED_COMPILER=0"
         "-DENABLE_THINLTO=0"
         "-DUSE_UNWIND=1"
+        "-DENABLE_NURAFT=1"
     )
 
     # TODO remove this? we don't use ccache anyway. An option would be to download it
@@ -251,8 +265,13 @@ function run_tests
         00701_rollup
         00834_cancel_http_readonly_queries_on_client_close
         00911_tautological_compare
+
+        # Hyperscan
         00926_multimatch
         00929_multi_match_edit_distance
+        01681_hyperscan_debug_assertion
+
+        01176_mysql_client_interactive          # requires mysql client
         01031_mutations_interpreter_and_context
         01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
         01083_expressions_in_engine_arguments
@@ -315,11 +334,12 @@ function run_tests
 
          # In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
         01504_rocksdb
+        01686_rocksdb
 
         # Look at DistributedFilesToInsert, so cannot run in parallel.
         01460_DistributedFilesToInsert
 
-        01541_max_memory_usage_for_user
+        01541_max_memory_usage_for_user_long
 
         # Require python libraries like scipy, pandas and numpy
         01322_ttest_scipy
@@ -335,9 +355,10 @@ function run_tests
 
         # JSON functions
         01666_blns
+        01674_htm_xml_coarse_parse
     )
 
-    time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
+    (time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
 
     # substr is to remove semicolon after test name
     readarray -t FAILED_TESTS < <(awk '/\[ FAIL|TIMEOUT|ERROR \]/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index b036f99e91d..766fec76179 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -21,13 +21,16 @@ function clone
 
     git init
     git remote add origin https://github.com/ClickHouse/ClickHouse
-    git fetch --depth=100 origin "$SHA_TO_TEST"
-    git fetch --depth=100 origin master # Used to obtain the list of modified or added tests
+
+    # Network is unreliable. GitHub neither.
+    for _ in {1..100}; do git fetch --depth=100 origin "$SHA_TO_TEST" && break; sleep 1; done
+    # Used to obtain the list of modified or added tests
+    for _ in {1..100}; do git fetch --depth=100 origin master && break; sleep 1; done
 
     # If not master, try to fetch pull/.../{head,merge}
     if [ "$PR_TO_TEST" != "0" ]
     then
-        git fetch --depth=100 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
+        for _ in {1..100}; do git fetch --depth=100 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*" && break; sleep 1; done
     fi
 
     git checkout "$SHA_TO_TEST"
@@ -187,7 +190,7 @@ case "$stage" in
         # Lost connection to the server. This probably means that the server died
         # with abort.
         echo "failure" > status.txt
-        if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*" server.log > description.txt
+        if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
         then
             echo "Lost connection to server. See the logs." > description.txt
         fi
diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index 9b51891ccf5..e0e5e36a3d6 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -58,10 +58,11 @@ RUN dockerd --version; docker --version
 
 RUN python3 -m pip install \
     PyMySQL \
-    aerospike \
+    aerospike==4.0.0 \
     avro \
     cassandra-driver \
-    confluent-kafka \
+    confluent-kafka==1.5.0 \
+    dict2xml \
     dicttoxml \
     docker \
     docker-compose==1.22.0 \
diff --git a/docker/test/integration/runner/compose/docker_compose_cassandra.yml b/docker/test/integration/runner/compose/docker_compose_cassandra.yml
index 6567a352027..c5cdfac5ce7 100644
--- a/docker/test/integration/runner/compose/docker_compose_cassandra.yml
+++ b/docker/test/integration/runner/compose/docker_compose_cassandra.yml
@@ -4,4 +4,4 @@ services:
         image: cassandra
         restart: always
         ports:
-          - 9043:9042
+            - 9043:9042
diff --git a/docker/test/integration/runner/compose/docker_compose_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_hdfs.yml
index b8cd7f64273..43dd1aa43d3 100644
--- a/docker/test/integration/runner/compose/docker_compose_hdfs.yml
+++ b/docker/test/integration/runner/compose/docker_compose_hdfs.yml
@@ -5,6 +5,6 @@ services:
         hostname: hdfs1
         restart: always
         ports:
-          - 50075:50075
-          - 50070:50070
+            - 50075:50075
+            - 50070:50070
         entrypoint: /etc/bootstrap.sh -d
diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml
index 219d977ffd9..b77542f7e11 100644
--- a/docker/test/integration/runner/compose/docker_compose_kafka.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml
@@ -5,42 +5,42 @@ services:
     image: zookeeper:3.4.9
     hostname: kafka_zookeeper
     environment:
-        ZOO_MY_ID: 1
-        ZOO_PORT: 2181
-        ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888
+      ZOO_MY_ID: 1
+      ZOO_PORT: 2181
+      ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888
     security_opt:
-        - label:disable
+      - label:disable
 
   kafka1:
     image: confluentinc/cp-kafka:5.2.0
     hostname: kafka1
     ports:
-        - "9092:9092"
+      - "9092:9092"
     environment:
-        KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092
-        KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092
-        KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
-        KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
-        KAFKA_BROKER_ID: 1
-        KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
-        KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
-        KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+      KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092
+      KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092
+      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
+      KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
+      KAFKA_BROKER_ID: 1
+      KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
+      KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
+      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
     depends_on:
-        - kafka_zookeeper
+      - kafka_zookeeper
     security_opt:
-        - label:disable
+      - label:disable
 
   schema-registry:
     image: confluentinc/cp-schema-registry:5.2.0
     hostname: schema-registry
     ports:
-        - "8081:8081"
+      - "8081:8081"
     environment:
-        SCHEMA_REGISTRY_HOST_NAME: schema-registry
-        SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
-        SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
+      SCHEMA_REGISTRY_HOST_NAME: schema-registry
+      SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
+      SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
     depends_on:
-        - kafka_zookeeper
-        - kafka1
+      - kafka_zookeeper
+      - kafka1
     security_opt:
-        - label:disable
+      - label:disable
diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
index a74476613f3..e2e15975e22 100644
--- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
@@ -8,22 +8,22 @@ services:
     hostname: kerberizedhdfs1
     restart: always
     volumes:
-        - ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro
-        - ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf
-        - ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro
+      - ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro
+      - ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf
+      - ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro
     ports:
       - 1006:1006
       - 50070:50070
-      - 9000:9000
+      - 9010:9010
     depends_on:
-        - hdfskerberos
+      - hdfskerberos
     entrypoint: /etc/bootstrap.sh -d
 
   hdfskerberos:
     image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG}
     hostname: hdfskerberos
     volumes:
-        - ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
-        - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
-        - /dev/urandom:/dev/random
+      - ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
+      - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
+      - /dev/urandom:/dev/random
     ports: [88, 749]
diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml
index 6e1e11344bb..64a3ef3e956 100644
--- a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml
@@ -6,54 +6,54 @@ services:
     # restart: always
     hostname: kafka_kerberized_zookeeper
     environment:
-        ZOOKEEPER_SERVER_ID: 1
-        ZOOKEEPER_CLIENT_PORT: 2181
-        ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888"
-        KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true"
+      ZOOKEEPER_SERVER_ID: 1
+      ZOOKEEPER_CLIENT_PORT: 2181
+      ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888"
+      KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true"
     volumes:
-        - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
-        - /dev/urandom:/dev/random
+      - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
+      - /dev/urandom:/dev/random
     depends_on:
-        - kafka_kerberos
+      - kafka_kerberos
     security_opt:
-        - label:disable
+      - label:disable
 
   kerberized_kafka1:
     image: confluentinc/cp-kafka:5.2.0
     # restart: always
     hostname: kerberized_kafka1
     ports:
-        - "9092:9092"
-        - "9093:9093"
+      - "9092:9092"
+      - "9093:9093"
     environment:
-        KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093
-        KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093
-        # KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092
-        # KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092
-        KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI
-        KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI
-        KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka
-        KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT,
-        KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE
-        KAFKA_BROKER_ID: 1
-        KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181"
-        KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
-        KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
-        KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
+      KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093
+      KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093
+      # KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092
+      # KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092
+      KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI
+      KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI
+      KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka
+      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT,
+      KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE
+      KAFKA_BROKER_ID: 1
+      KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181"
+      KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
+      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+      KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
     volumes:
-        - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
-        - /dev/urandom:/dev/random
+      - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
+      - /dev/urandom:/dev/random
     depends_on:
-        - kafka_kerberized_zookeeper
-        - kafka_kerberos
+      - kafka_kerberized_zookeeper
+      - kafka_kerberos
     security_opt:
-        - label:disable
+      - label:disable
 
   kafka_kerberos:
     image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
     hostname: kafka_kerberos
     volumes:
-        - ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
-        - ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh
-        - /dev/urandom:/dev/random
+      - ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
+      - ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh
+      - /dev/urandom:/dev/random
     ports: [88, 749]
diff --git a/docker/test/integration/runner/compose/docker_compose_mongo.yml b/docker/test/integration/runner/compose/docker_compose_mongo.yml
index 8c54544ed88..6c98fde2303 100644
--- a/docker/test/integration/runner/compose/docker_compose_mongo.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mongo.yml
@@ -7,5 +7,5 @@ services:
             MONGO_INITDB_ROOT_USERNAME: root
             MONGO_INITDB_ROOT_PASSWORD: clickhouse
         ports:
-          - 27018:27017
+            - 27018:27017
         command: --profile=2 --verbose
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql.yml
index 90daf8a4238..5b15d517f37 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql.yml
@@ -6,5 +6,5 @@ services:
         environment:
             MYSQL_ROOT_PASSWORD: clickhouse
         ports:
-          - 3308:3306
+            - 3308:3306
         command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
index e7d762203ee..5aa13ba91c7 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
@@ -6,5 +6,9 @@ services:
         environment:
             MYSQL_ROOT_PASSWORD: clickhouse
         ports:
-          - 3308:3306
-        command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
+            - 3308:3306
+        command: --server_id=100 --log-bin='mysql-bin-1.log'
+            --default-time-zone='+3:00'
+            --gtid-mode="ON"
+            --enforce-gtid-consistency
+            --log-error-verbosity=3
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
index 918a2b5f80f..7c8a930c84e 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
@@ -6,5 +6,10 @@ services:
         environment:
             MYSQL_ROOT_PASSWORD: clickhouse
         ports:
-          - 33308:3306
-        command: --server_id=100 --log-bin='mysql-bin-1.log' --default_authentication_plugin='mysql_native_password' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
+            - 33308:3306
+        command: --server_id=100 --log-bin='mysql-bin-1.log'
+            --default_authentication_plugin='mysql_native_password'
+            --default-time-zone='+3:00'
+            --gtid-mode="ON"
+            --enforce-gtid-consistency
+            --log-error-verbosity=3
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_client.yml
index 802151c4d7b..5e4565d64c3 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_client.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_client.yml
@@ -7,7 +7,7 @@ services:
       MYSQL_ALLOW_EMPTY_PASSWORD: 1
     command: --federated --socket /var/run/mysqld/mysqld.sock
     healthcheck:
-      test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
+      test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
       interval: 1s
       timeout: 2s
       retries: 100
diff --git a/docker/test/integration/runner/compose/docker_compose_postgesql.yml b/docker/test/integration/runner/compose/docker_compose_postgesql.yml
index 984f5f97384..90764188ddd 100644
--- a/docker/test/integration/runner/compose/docker_compose_postgesql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_postgesql.yml
@@ -11,4 +11,4 @@ services:
     ports:
       - "5433:5433"
     environment:
-      POSTGRES_HOST_AUTH_METHOD: "trust"
\ No newline at end of file
+      POSTGRES_HOST_AUTH_METHOD: "trust"
diff --git a/docker/test/integration/runner/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml
index fff4fb1fa42..5657352e1b3 100644
--- a/docker/test/integration/runner/compose/docker_compose_postgres.yml
+++ b/docker/test/integration/runner/compose/docker_compose_postgres.yml
@@ -6,8 +6,8 @@ services:
         environment:
             POSTGRES_PASSWORD: mysecretpassword
         ports:
-          - 5432:5432
+            - 5432:5432
         networks:
-          default:
-            aliases:
-              - postgre-sql.local
+            default:
+                aliases:
+                    - postgre-sql.local
diff --git a/docker/test/integration/runner/compose/docker_compose_redis.yml b/docker/test/integration/runner/compose/docker_compose_redis.yml
index 72df99ec59b..3d834aadaa4 100644
--- a/docker/test/integration/runner/compose/docker_compose_redis.yml
+++ b/docker/test/integration/runner/compose/docker_compose_redis.yml
@@ -4,5 +4,5 @@ services:
         image: redis
         restart: always
         ports:
-          - 6380:6379
+            - 6380:6379
         command: redis-server --requirepass "clickhouse" --databases 32
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 9a0d8093a55..2b19a5e75a8 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -97,6 +97,7 @@ function configure
     rm -r right/db ||:
     rm -r db0/preprocessed_configs ||:
     rm -r db0/{data,metadata}/system ||:
+    rm db0/status ||:
     cp -al db0/ left/db/
     cp -al db0/ right/db/
 }
diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py
index 48479161ef9..f1c5df146aa 100755
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@@ -44,6 +44,7 @@ parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated l
 parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
 parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
 parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
+parser.add_argument('--max-query-seconds', type=int, default=10, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
 parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
 parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
 parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
@@ -323,7 +324,7 @@ for query_index in queries_to_run:
             server_seconds += elapsed
             print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
 
-            if elapsed > 10:
+            if elapsed > args.max_query_seconds:
                 # Stop processing pathologically slow queries, to avoid timing out
                 # the entire test task. This shouldn't really happen, so we don't
                 # need much handling for this case and can just exit.
diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index f2fcefd604f..7779f0e9dc2 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -60,4 +60,8 @@ fi
 # more idiologically correct.
 read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
 
+if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
+    ADDITIONAL_OPTIONS+=('--replicated-database')
+fi
+
 clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile
index b063f8d81f6..2437415d17c 100644
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@@ -3,6 +3,9 @@ FROM yandex/clickhouse-test-base
 
 ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"
 
+RUN echo "deb [trusted=yes] http://repo.mysql.com/apt/ubuntu/ bionic mysql-5.7" >> /etc/apt/sources.list \
+    && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 8C718D3B5072E1F5
+
 RUN apt-get update -y \
     && env DEBIAN_FRONTEND=noninteractive \
         apt-get install --yes --no-install-recommends \
@@ -13,6 +16,7 @@ RUN apt-get update -y \
             ncdu \
             netcat-openbsd \
             openssl \
+            protobuf-compiler \
             python3 \
             python3-lxml \
             python3-requests \
@@ -23,7 +27,8 @@ RUN apt-get update -y \
             telnet \
             tree \
             unixodbc \
-            wget
+            wget \
+            mysql-client=5.7*
 
 RUN pip3 install numpy scipy pandas
 
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index d9a03f84726..d078f3739fd 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -53,14 +53,19 @@ function run_tests()
     if [ "$NUM_TRIES" -gt "1" ]; then
         ADDITIONAL_OPTIONS+=('--skip')
         ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip')
+        ADDITIONAL_OPTIONS+=('--jobs')
+        ADDITIONAL_OPTIONS+=('4')
     fi
 
-    for _ in $(seq 1 "$NUM_TRIES"); do
-        clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt
-        if [ "${PIPESTATUS[0]}" -ne "0" ]; then
-            break;
-        fi
-    done
+    if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
+        ADDITIONAL_OPTIONS+=('--replicated-database')
+    fi
+
+    clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
+            --test-runs "$NUM_TRIES" \
+            "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
+        | ts '%Y-%m-%d %H:%M:%S' \
+        | tee -a test_output/test_result.txt
 }
 
 export -f run_tests
diff --git a/docker/test/stateless_pytest/Dockerfile b/docker/test/stateless_pytest/Dockerfile
index 4d0274143d6..58846f90fa7 100644
--- a/docker/test/stateless_pytest/Dockerfile
+++ b/docker/test/stateless_pytest/Dockerfile
@@ -5,7 +5,10 @@ RUN apt-get update -y && \
     apt-get install -y --no-install-recommends \
         python3-pip \
         python3-setuptools \
-        python3-wheel
+        python3-wheel \
+        brotli \
+        netcat-openbsd \
+        zstd
 
 RUN python3 -m pip install \
     wheel \
@@ -15,7 +18,10 @@ RUN python3 -m pip install \
     pytest-randomly \
     pytest-rerunfailures \
     pytest-timeout \
-    pytest-xdist
+    pytest-xdist \
+    pandas \
+    numpy \
+    scipy
 
 CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
     dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 9da2f3d3ada..dc1e4db4477 100755
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -8,16 +8,23 @@ dpkg -i package_folder/clickhouse-server_*.deb
 dpkg -i package_folder/clickhouse-client_*.deb
 dpkg -i package_folder/clickhouse-test_*.deb
 
+function configure()
+{
+    # install test configs
+    /usr/share/clickhouse-test/config/install.sh
+
+    # for clickhouse-server (via service)
+    echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
+    # for clickhouse-client
+    export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
+
+    # since we run clickhouse from root
+    sudo chown root: /var/lib/clickhouse
+}
+
 function stop()
 {
-    timeout 120 service clickhouse-server stop
-
-    # Wait for process to disappear from processlist and also try to kill zombies.
-    while kill -9 "$(pidof clickhouse-server)"
-    do
-        echo "Killed clickhouse-server"
-        sleep 0.5
-    done
+    clickhouse stop
 }
 
 function start()
@@ -33,19 +40,26 @@ function start()
             tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
             break
         fi
-        timeout 120 service clickhouse-server start
+        # use root to match with current uid
+        clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>/var/log/clickhouse-server/stderr.log
         sleep 0.5
         counter=$((counter + 1))
     done
+
+    echo "
+handle all noprint
+handle SIGSEGV stop print
+handle SIGBUS stop print
+handle SIGABRT stop print
+continue
+thread apply all backtrace
+continue
+" > script.gdb
+
+    gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" &
 }
 
-# install test configs
-/usr/share/clickhouse-test/config/install.sh
-
-# for clickhouse-server (via service)
-echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
-# for clickhouse-client
-export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
+configure
 
 start
 
@@ -64,7 +78,7 @@ clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
 clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
 clickhouse-client --query "SHOW TABLES FROM test"
 
-./stress --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION"
+./stress --hung-check --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" && echo "OK" > /test_output/script_exit_code.txt || echo "FAIL" > /test_output/script_exit_code.txt
 
 stop
 start
diff --git a/docker/test/stress/stress b/docker/test/stress/stress
index 458f78fcdb4..841556cf090 100755
--- a/docker/test/stress/stress
+++ b/docker/test/stress/stress
@@ -1,8 +1,9 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from multiprocessing import cpu_count
-from subprocess import Popen, check_call
+from subprocess import Popen, call, STDOUT
 import os
+import sys
 import shutil
 import argparse
 import logging
@@ -22,12 +23,15 @@ def get_options(i):
     if 0 < i:
         options += " --order=random"
 
-    if i % 2 == 1:
+    if i % 3 == 1:
         options += " --db-engine=Ordinary"
 
+    if i % 3 == 2:
+        options += ''' --db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i)
+
     # If database name is not specified, new database is created for each functional test.
     # Run some threads with one database for all tests.
-    if i % 3 == 1:
+    if i % 2 == 1:
         options += " --database=test_{}".format(i)
 
     if i == 13:
@@ -64,7 +68,8 @@ if __name__ == "__main__":
     parser.add_argument("--server-log-folder", default='/var/log/clickhouse-server')
     parser.add_argument("--output-folder")
     parser.add_argument("--global-time-limit", type=int, default=3600)
-    parser.add_argument("--num-parallel", default=cpu_count());
+    parser.add_argument("--num-parallel", default=cpu_count())
+    parser.add_argument('--hung-check', action='store_true', default=False)
 
     args = parser.parse_args()
     func_pipes = []
@@ -81,4 +86,13 @@ if __name__ == "__main__":
         logging.info("Finished %s from %s processes", len(retcodes), len(func_pipes))
         time.sleep(5)
 
+    logging.info("All processes finished")
+    if args.hung_check:
+        logging.info("Checking if some queries hung")
+        cmd = "{} {} {}".format(args.test_cmd, "--hung-check", "00001_select_1")
+        res = call(cmd, shell=True, stderr=STDOUT)
+        if res != 0:
+            logging.info("Hung check failed with exit code {}".format(res))
+            sys.exit(1)
+
     logging.info("Stress test finished")
diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile
index 7047007d2fc..e70f9e05679 100644
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@@ -1,12 +1,23 @@
 # docker build -t yandex/clickhouse-style-test .
 FROM ubuntu:20.04
 
-RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip python3-pytest && pip3 install codespell
+RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
+    shellcheck \
+    libxml2-utils \
+    git \
+    python3-pip \
+    pylint \
+    yamllint \
+    && pip3 install codespell
 
 
+# For |& syntax
+SHELL ["bash", "-c"]
+
 CMD cd /ClickHouse/utils/check-style && \
-    ./check-style -n | tee /test_output/style_output.txt && \
-    ./check-typos | tee /test_output/typos_output.txt && \
-    ./check-whitespaces -n | tee /test_output/whitespaces_output.txt && \
-    ./check-duplicate-includes.sh | tee /test_output/duplicate_output.txt && \
-    ./shellcheck-run.sh | tee /test_output/shellcheck_output.txt
+    ./check-style -n              |& tee /test_output/style_output.txt && \
+    ./check-typos                 |& tee /test_output/typos_output.txt && \
+    ./check-whitespaces -n        |& tee /test_output/whitespaces_output.txt && \
+    ./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt && \
+    ./shellcheck-run.sh           |& tee /test_output/shellcheck_output.txt && \
+    true
diff --git a/docs/_description_templates/template-data-type.md b/docs/_description_templates/template-data-type.md
new file mode 100644
index 00000000000..edb6586ee7d
--- /dev/null
+++ b/docs/_description_templates/template-data-type.md
@@ -0,0 +1,29 @@
+---
+toc_priority: 
+toc_title: 
+---
+
+# data_type_name {#data_type-name}
+
+Description.
+
+**Parameters** (Optional)
+
+-   `x` — Description. [Type name](relative/path/to/type/dscr.md#type).
+-   `y` — Description. [Type name](relative/path/to/type/dscr.md#type).
+
+**Examples**
+
+```sql
+
+```
+
+## Additional Info {#additional-info} (Optional)
+
+The name of an additional section can be any, for example, **Usage**.
+
+**See Also** (Optional)
+
+-   [link](#)
+
+[Original article](https://clickhouse.tech/docs/en/data_types/<data-type-name>/) <!--hide-->
diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md
index b69d7ed5309..a0074a76ef6 100644
--- a/docs/_description_templates/template-function.md
+++ b/docs/_description_templates/template-function.md
@@ -12,16 +12,20 @@ Alias: `<alias name>`. (Optional)
 
 More text (Optional).
 
-**Parameters** (Optional)
+**Arguments** (Optional)
 
 -   `x` — Description. [Type name](relative/path/to/type/dscr.md#type).
 -   `y` — Description. [Type name](relative/path/to/type/dscr.md#type).
 
+**Parameters** (Optional, only for parametric aggregate functions)
+
+-   `z` — Description. [Type name](relative/path/to/type/dscr.md#type).
+
 **Returned value(s)**
 
--   Returned values list.
+-   Returned values list. 
 
-Type: [Type](relative/path/to/type/dscr.md#type).
+Type: [Type name](relative/path/to/type/dscr.md#type).
 
 **Example**
 
diff --git a/docs/_description_templates/template-system-table.md b/docs/_description_templates/template-system-table.md
index 3fdf9788d79..f2decc4bb6d 100644
--- a/docs/_description_templates/template-system-table.md
+++ b/docs/_description_templates/template-system-table.md
@@ -8,10 +8,14 @@ Columns:
 
 **Example**
 
+Query:
+
 ``` sql
 SELECT * FROM system.table_name
 ```
 
+Result:
+
 ``` text
 Some output. It shouldn't be too long.
 ```
diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md
index 60365ad744a..e0b1be710f1 100644
--- a/docs/en/development/build-osx.md
+++ b/docs/en/development/build-osx.md
@@ -40,7 +40,7 @@ $ cd ClickHouse
 ``` bash
 $ mkdir build
 $ cd build
-$ cmake ..-DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm`
+$ cmake .. -DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm`
 $ ninja
 $ cd ..
 ```
diff --git a/docs/en/engines/database-engines/materialize-mysql.md b/docs/en/engines/database-engines/materialize-mysql.md
index 89fe9304c4c..2e361cc82f0 100644
--- a/docs/en/engines/database-engines/materialize-mysql.md
+++ b/docs/en/engines/database-engines/materialize-mysql.md
@@ -93,6 +93,7 @@ ClickHouse has only one physical order, which is determined by `ORDER BY` clause
 - Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine.
 - Replication can be easily broken.
 - Manual operations on database and tables are forbidden.
+- `MaterializeMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializeMySQL` database when a table in the MySQL server changes.
 
 ## Examples of Use {#examples-of-use}
 
@@ -156,4 +157,4 @@ SELECT * FROM mysql.test;
 └───┴─────┴──────┘
 ```
 
-[Original article](https://clickhouse.tech/docs/en/database_engines/materialize-mysql/) <!--hide-->
+[Original article](https://clickhouse.tech/docs/en/engines/database-engines/materialize-mysql/) <!--hide-->
diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
index 857e148277c..6e864751cc3 100644
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@@ -7,8 +7,6 @@ toc_title: EmbeddedRocksDB
 
 This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).
 
-`EmbeddedRocksDB` lets you:
-
 ## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table}
 
 ``` sql
@@ -23,6 +21,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 Required parameters:
 
 -  `primary_key_name` – any column name in the column list.
+- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`.
+- columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order.
+- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from `rocksdb`.
 
 Example:
 
@@ -38,8 +39,4 @@ ENGINE = EmbeddedRocksDB
 PRIMARY KEY key
 ```
 
-## Description {#description}
-
-- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key.
-- columns other than the primary key will be serialized in binary as rocksdb value in corresponding order.
-- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb.
+[Original article](https://clickhouse.tech/docs/en/operations/table_engines/embedded-rocksdb/) <!--hide-->
diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md
index cf3e36c2f48..288c9c3cd56 100644
--- a/docs/en/engines/table-engines/integrations/index.md
+++ b/docs/en/engines/table-engines/integrations/index.md
@@ -12,6 +12,9 @@ List of supported integrations:
 -   [ODBC](../../../engines/table-engines/integrations/odbc.md)
 -   [JDBC](../../../engines/table-engines/integrations/jdbc.md)
 -   [MySQL](../../../engines/table-engines/integrations/mysql.md)
+-   [MongoDB](../../../engines/table-engines/integrations/mongodb.md)
 -   [HDFS](../../../engines/table-engines/integrations/hdfs.md)
 -   [S3](../../../engines/table-engines/integrations/s3.md)
 -   [Kafka](../../../engines/table-engines/integrations/kafka.md)
+-   [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
+-   [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md
new file mode 100644
index 00000000000..e648a13b5e0
--- /dev/null
+++ b/docs/en/engines/table-engines/integrations/mongodb.md
@@ -0,0 +1,57 @@
+---
+toc_priority: 7
+toc_title: MongoDB
+---
+
+# MongoDB {#mongodb}
+
+MongoDB engine is read-only table engine which allows to read data (`SELECT` queries) from remote MongoDB collection. Engine supports only non-nested data types. `INSERT` queries are not supported.
+
+## Creating a Table {#creating-a-table}
+
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name
+(
+    name1 [type1],
+    name2 [type2],
+    ...
+) ENGINE = MongoDB(host:port, database, collection, user, password);
+```
+
+**Engine Parameters**
+
+-   `host:port` — MongoDB server address.
+
+-   `database` — Remote database name.
+
+-   `collection` — Remote collection name.
+
+-   `user` — MongoDB user.
+
+-   `password` — User password.
+
+## Usage Example {#usage-example}
+
+Table in ClickHouse which allows to read data from MongoDB collection:
+
+``` text
+CREATE TABLE mongo_table
+(
+    key UInt64, 
+    data String
+) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
+```
+
+Query:
+
+``` sql
+SELECT COUNT() FROM mongo_table;
+```
+
+``` text
+┌─count()─┐
+│       4 │
+└─────────┘
+```
+
+[Original article](https://clickhouse.tech/docs/en/operations/table_engines/integrations/mongodb/) <!--hide-->
diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md
index b0901ee6f6e..4a0550275ca 100644
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@@ -59,10 +59,26 @@ Optional parameters:
 -   `rabbitmq_max_block_size`
 -   `rabbitmq_flush_interval_ms`
 
-Required configuration:
+Also format settings can be added along with rabbitmq-related settings.
+
+Example:
+
+``` sql
+  CREATE TABLE queue (
+    key UInt64,
+    value UInt64,
+    date DateTime
+  ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
+                            rabbitmq_exchange_name = 'exchange1',
+                            rabbitmq_format = 'JSONEachRow',
+                            rabbitmq_num_consumers = 5,
+                            date_time_input_format = 'best_effort';
+```
 
 The RabbitMQ server configuration should be added using the ClickHouse config file.
 
+Required configuration:
+
 ``` xml
  <rabbitmq>
     <username>root</username>
@@ -70,16 +86,12 @@ The RabbitMQ server configuration should be added using the ClickHouse config fi
  </rabbitmq>
 ```
 
-Example:
+Additional configuration:
 
-``` sql
-  CREATE TABLE queue (
-    key UInt64,
-    value UInt64
-  ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
-                            rabbitmq_exchange_name = 'exchange1',
-                            rabbitmq_format = 'JSONEachRow',
-                            rabbitmq_num_consumers = 5;
+``` xml
+ <rabbitmq>
+    <vhost>clickhouse</vhost>
+ </rabbitmq>
 ```
 
 ## Description {#description}
@@ -105,6 +117,7 @@ Exchange type options:
 -   `consistent_hash` - Data is evenly distributed between all bound tables (where the exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`.
 
 Setting `rabbitmq_queue_base` may be used for the following cases:
+
 -   to let different tables share queues, so that multiple consumers could be registered for the same queues, which makes a better performance. If using `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings, the exact match of queues is achieved in case these parameters are the same.
 -   to be able to restore reading from certain durable queues when not all messages were successfully consumed. To resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables.
 -   to reuse queues as they are declared durable and not auto-deleted. (Can be deleted via any of RabbitMQ CLI tools.)
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index d8cceb4d511..5858a0803e6 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -136,8 +136,7 @@ The following settings can be specified in configuration file for given endpoint
 -   `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint.
 -   `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint.
 -   `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint.
-
-This configuration also applies to S3 disks in `MergeTree` table engine family.
+-   `server_side_encryption_customer_key_base64` — Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
 
 Example:
 
@@ -149,6 +148,7 @@ Example:
         <!-- <secret_access_key>SECRET_ACCESS_KEY</secret_access_key> -->
         <!-- <use_environment_credentials>false</use_environment_credentials> -->
         <!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
+        <!-- <server_side_encryption_customer_key_base64>BASE64-ENCODED-KEY</server_side_encryption_customer_key_base64> -->
     </endpoint-name>
 </s3>
 ```
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 084d05ec0a0..753859b46d2 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -45,7 +45,10 @@ ORDER BY expr
 [PARTITION BY expr]
 [PRIMARY KEY expr]
 [SAMPLE BY expr]
-[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
+[TTL expr 
+    [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ]
+    [WHERE conditions] 
+    [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ] 
 [SETTINGS name=value, ...]
 ```
 
@@ -80,7 +83,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
     Expression must have one `Date` or `DateTime` column as a result. Example:
     `TTL date + INTERVAL 1 DAY`
 
-    Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`). Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule.
+    Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule.
 
     For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)
 
@@ -101,7 +104,8 @@ For a description of parameters, see the [CREATE query description](../../../sql
     -   `max_parts_in_total` — Maximum number of parts in all partitions.
 	-   `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
 	-   `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
-
+    -   `max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) in the global setting.
+    
 **Example of Sections Setting**
 
 ``` sql
@@ -455,18 +459,28 @@ ALTER TABLE example_table
 Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving feature, all rows of a part must satisfy the movement expression criteria.
 
 ``` sql
-TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ...
+TTL expr 
+    [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
+    [WHERE conditions] 
+    [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ]   
 ```
 
 Type of TTL rule may follow each TTL expression. It affects an action which is to be done once the expression is satisfied (reaches current time):
 
 -   `DELETE` - delete expired rows (default action);
 -   `TO DISK 'aaa'` - move part to the disk `aaa`;
--   `TO VOLUME 'bbb'` - move part to the disk `bbb`.
+-   `TO VOLUME 'bbb'` - move part to the disk `bbb`;
+-   `GROUP BY` - aggregate expired rows.
 
-Examples:
+With `WHERE` clause you may specify which of the expired rows to delete or aggregate (it cannot be applied to moves).
 
-Creating a table with TTL
+`GROUP BY` expression must be a prefix of the table primary key. 
+
+If a column is not part of the `GROUP BY` expression and is not set explicitely in the `SET` clause, in result row it contains an occasional value from the grouped rows (as if aggregate function `any` is applied to it).
+
+**Examples**
+
+Creating a table with TTL:
 
 ``` sql
 CREATE TABLE example_table
@@ -482,13 +496,43 @@ TTL d + INTERVAL 1 MONTH [DELETE],
     d + INTERVAL 2 WEEK TO DISK 'bbb';
 ```
 
-Altering TTL of the table
+Altering TTL of the table:
 
 ``` sql
 ALTER TABLE example_table
     MODIFY TTL d + INTERVAL 1 DAY;
 ```
 
+Creating a table, where the rows are expired after one month. The expired rows where dates are Mondays are deleted:
+
+``` sql
+CREATE TABLE table_with_where
+(
+    d DateTime, 
+    a Int
+)
+ENGINE = MergeTree
+PARTITION BY toYYYYMM(d)
+ORDER BY d
+TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1;
+```
+
+Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
+
+``` sql
+CREATE TABLE table_for_aggregation
+(
+    d DateTime, 
+    k1 Int, 
+    k2 Int, 
+    x Int, 
+    y Int
+)
+ENGINE = MergeTree
+ORDER BY k1, k2
+TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
+```
+
 **Removing Data**
 
 Data with an expired TTL is removed when ClickHouse merges data parts.
@@ -671,6 +715,7 @@ Configuration markup:
             <endpoint>https://storage.yandexcloud.net/my-bucket/root-path/</endpoint>
             <access_key_id>your_access_key_id</access_key_id>
             <secret_access_key>your_secret_access_key</secret_access_key>
+            <server_side_encryption_customer_key_base64>your_base64_encoded_customer_key</server_side_encryption_customer_key_base64>
             <proxy>
                 <uri>http://proxy1</uri>
                 <uri>http://proxy2</uri>
@@ -706,7 +751,8 @@ Optional parameters:
 -   `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`. 
 -   `cache_enabled` — Allows to cache mark and index files on local FS. Default value is `true`. 
 -   `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`. 
--   `skip_access_check` — If true disk access checks will not be performed on disk start-up. Default value is `false`.
+-   `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
+-   `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
 
 
 S3 disk can be configured as `main` or `cold` storage:
diff --git a/docs/en/faq/operations/delete-old-data.md b/docs/en/faq/operations/delete-old-data.md
index 5addc455602..fdf1f1f290e 100644
--- a/docs/en/faq/operations/delete-old-data.md
+++ b/docs/en/faq/operations/delete-old-data.md
@@ -39,4 +39,4 @@ More details on [manipulating partitions](../../sql-reference/statements/alter/p
 
 It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need.
 
-More details on [table truncation](../../sql-reference/statements/alter/partition.md#alter_drop-partition).
+More details on [table truncation](../../sql-reference/statements/truncate.md).
diff --git a/docs/en/getting-started/tutorial.md b/docs/en/getting-started/tutorial.md
index 64363c963c5..fe697972dff 100644
--- a/docs/en/getting-started/tutorial.md
+++ b/docs/en/getting-started/tutorial.md
@@ -644,7 +644,7 @@ If there are no replicas at the moment on replicated table creation, a new first
 
 ``` sql
 CREATE TABLE tutorial.hits_replica (...)
-ENGINE = ReplcatedMergeTree(
+ENGINE = ReplicatedMergeTree(
     '/clickhouse_perftest/tables/{shard}/hits',
     '{replica}'
 )
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 11291d61300..33bf90a8b52 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -31,8 +31,8 @@ The supported formats are:
 | [JSONCompactString](#jsoncompactstring)                                                 | ✗     | ✔      |
 | [JSONEachRow](#jsoneachrow)                                                             | ✔     | ✔      |
 | [JSONEachRowWithProgress](#jsoneachrowwithprogress)                                     | ✗     | ✔      |
-| [JSONStringEachRow](#jsonstringeachrow)                                                 | ✔     | ✔      |
-| [JSONStringEachRowWithProgress](#jsonstringeachrowwithprogress)                         | ✗     | ✔      |
+| [JSONStringsEachRow](#jsonstringseachrow)                                               | ✔     | ✔      |
+| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress)                       | ✗     | ✔      |
 | [JSONCompactEachRow](#jsoncompacteachrow)                                               | ✔     | ✔      |
 | [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes)             | ✔     | ✔      |
 | [JSONCompactStringEachRow](#jsoncompactstringeachrow)                                   | ✔     | ✔      |
@@ -612,7 +612,7 @@ Example:
 ```
 
 ## JSONEachRow {#jsoneachrow}
-## JSONStringEachRow {#jsonstringeachrow}
+## JSONStringsEachRow {#jsonstringseachrow}
 ## JSONCompactEachRow {#jsoncompacteachrow}
 ## JSONCompactStringEachRow {#jsoncompactstringeachrow}
 
@@ -627,9 +627,9 @@ When using these formats, ClickHouse outputs rows as separated, newline-delimite
 When inserting the data, you should provide a separate JSON value for each row.
 
 ## JSONEachRowWithProgress {#jsoneachrowwithprogress}
-## JSONStringEachRowWithProgress {#jsonstringeachrowwithprogress}
+## JSONStringsEachRowWithProgress {#jsonstringseachrowwithprogress}
 
-Differs from `JSONEachRow`/`JSONStringEachRow` in that ClickHouse will also yield progress information as JSON values.
+Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yield progress information as JSON values.
 
 ```json
 {"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}}
diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index 2684e6fdd3a..454d856f779 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -8,118 +8,120 @@ toc_title: Adopters
 !!! warning "Disclaimer"
     The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We’d appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won’t have any NDA issues by doing so. Providing updates with publications from other companies is also useful.
 
-| Company                                                                                        | Industry                        | Usecase               | Cluster Size                                               | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference                                                                                                                                                                                                               |
-|------------------------------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| <a href="https://2gis.ru" class="favicon">2gis</a>                                             | Maps                            | Monitoring            | —                                                          | —                                                                            | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw)                                                                                                                                                              |
-| <a href="https://getadmiral.com/" class="favicon">Admiral</a>                                  | Martech                         | Engagement Management | —                                                          | —                                                                            | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers)                                                                                                                                                                                              |
-| <a href="https://cn.aliyun.com/" class="favicon">Alibaba Cloud</a> | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html)                                                                                                                                                              |
-| <a href="https://alohabrowser.com/" class="favicon">Aloha Browser</a>                          | Mobile App                      | Browser backend       | —                                                          | —                                                                            | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf)                                                                                                                                 |
-| <a href="https://amadeus.com/" class="favicon">Amadeus</a>                                     | Travel                          | Analytics             | —                                                          | —                                                                            | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms)                                        |
-| <a href="https://www.appsflyer.com" class="favicon">Appsflyer</a>                              | Mobile analytics                | Main product          | —                                                          | —                                                                            | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY)                                                                                                                                               |
-| <a href="https://arenadata.tech/" class="favicon">ArenaData</a>                                | Data Platform                   | Main product          | —                                                          | —                                                                            | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf)                                                                                             |
-| <a href="https://avito.ru/" class="favicon">Avito</a>                                | Classifieds                   | Monitoring          | —                                                          | —                                                                            | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ)                                                                                             |
-| <a href="https://badoo.com" class="favicon">Badoo</a>                                          | Dating                          | Timeseries            | —                                                          | —                                                                            | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf)                                                                                                                         |
-| <a href="https://www.benocs.com/" class="favicon">Benocs</a>                                   | Network Telemetry and Analytics | Main Product          | —                                                          | —                                                                            | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf)                                                                                                   |
+| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference |
+|---------|----------|---------|--------------|------------------------------------------------------------------------------|-----------|
+| <a href="https://2gis.ru" class="favicon">2gis</a> | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) |
+| <a href="https://getadmiral.com/" class="favicon">Admiral</a> | Martech | Engagement Management | — | — | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers) |
+| <a href="https://cn.aliyun.com/" class="favicon">Alibaba Cloud</a> | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html) |
+| <a href="https://alohabrowser.com/" class="favicon">Aloha Browser</a> | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) |
+| <a href="https://amadeus.com/" class="favicon">Amadeus</a> | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) |
+| <a href="https://www.appsflyer.com" class="favicon">Appsflyer</a> | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) |
+| <a href="https://arenadata.tech/" class="favicon">ArenaData</a> | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) |
+| <a href="https://avito.ru/" class="favicon">Avito</a> | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) |
+| <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) |
+| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
 | <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
-| <a href="https://www.bloomberg.com/" class="favicon">Bloomberg</a>                             | Finance, Media                  | Monitoring            | 102 servers                                                | —                                                                            | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov)                                                                                |
-| <a href="https://bloxy.info" class="favicon">Bloxy</a>                                         | Blockchain                      | Analytics             | —                                                          | —                                                                            | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx)                                                                                              |
-| <a href="https://www.bytedance.com" class="favicon">Bytedance</a>                                              | Social platforms                            |  — | —                                                          | —                                                                            | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns)                                                                                                |
+| <a href="https://www.bloomberg.com/" class="favicon">Bloomberg</a> | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
+| <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
+| <a href="https://www.bytedance.com" class="favicon">Bytedance</a> | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) |
 | <a href="https://cardsmobile.ru/" class="favicon">CardsMobile</a> | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) |
-| <a href="https://carto.com/" class="favicon">CARTO</a>                                         | Business Intelligence           | Geo analytics         | —                                                          | —                                                                            | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/)                                                                                                                  |
-| <a href="http://public.web.cern.ch/public/" class="favicon">CERN</a>                           | Research                        | Experiment            | —                                                          | —                                                                            | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/)                                                                                                                |
-| <a href="http://cisco.com/" class="favicon">Cisco</a>                                          | Networking                      | Traffic analysis      | —                                                          | —                                                                            | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057)                                                                                                                                                     |
-| <a href="https://www.citadelsecurities.com/" class="favicon">Citadel Securities</a>            | Finance                         | —                     | —                                                          | —                                                                            | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774)                                                                                                                                          |
-| <a href="https://city-mobil.ru" class="favicon">Citymobil</a>                                  | Taxi                            | Analytics             | —                                                          | —                                                                            | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/)                                                                                                                                  |
-| <a href="https://cloudflare.com" class="favicon">Cloudflare</a>                                | CDN                             | Traffic analysis      | 36 servers                                                 | —                                                                            | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
-| <a href="https://corporate.comcast.com/" class="favicon">Comcast</a>                           | Media                           | CDN Traffic Analysis  | —                                                          | —                                                                            | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg)                                                                                                          |
-| <a href="https://contentsquare.com" class="favicon">ContentSquare</a>                          | Web analytics                   | Main product          | —                                                          | —                                                                            | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/)                              |
-| <a href="https://coru.net/" class="favicon">Corunet</a>                                        | Analytics                       | Main product          | —                                                          | —                                                                            | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf)                                                                                      |
-| <a href="https://www.creditx.com" class="favicon">CraiditX 氪信</a>                            | Finance AI                      | Analysis              | —                                                          | —                                                                            | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx)                                                                                                |
-| <a href="https://crazypanda.ru/en/" class="favicon">Crazypanda</a>                             | Games                           |                       | —                                                          | —                                                                            | Live session on ClickHouse meetup                                                                                                                                                                                       |
-| <a href="https://www.criteo.com/" class="favicon">Criteo</a>                                   | Retail                          | Main product          | —                                                          | —                                                                            | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx)                                                                                         |
-| <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom                         | Analytics             | —                                                          | —                                                                            | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf)                                                                                              |
-| <a href="https://db.com" class="favicon">Deutsche Bank</a>                                     | Finance                         | BI Analytics          | —                                                          | —                                                                            | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf)                                                                                                       |
-| <a href="https://deeplay.io/eng/" class="favicon">Deeplay</a> | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568)                                                                                                       |
-| <a href="https://www.diva-e.com" class="favicon">Diva-e</a>                                    | Digital consulting              | Main Product          | —                                                          | —                                                                            | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf)                                               |
-| <a href="https://www.ecwid.com/" class="favicon">Ecwid</a>                                    | E-commerce SaaS | Metrics, Logging      | —                                                          | —                                                                            | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf)                                                                                                                                                        |
-| <a href="https://www.ebay.com/" class="favicon">eBay</a>                                    | E-commerce | Logs, Metrics and Events | —                                                          | —                                                                            | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/)                                                                                                                                                        |
-| <a href="https://www.exness.com" class="favicon">Exness</a>                                    | Trading                         | Metrics, Logging      | —                                                          | —                                                                            | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215)                                                                                                                                                        |
-| <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product |  | —                                                                            | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
-| <a href="https://www.flipkart.com/" class="favicon">Flipkart</a>                               | e-Commerce                      | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239)                                                                                               |
-| <a href="https://fun.co/rp" class="favicon">FunCorp</a>                                        | Games                           |                       | —                                                          | —                                                                            | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse)                                                                                                                                          |
-| <a href="https://geniee.co.jp" class="favicon">Geniee</a>                                      | Ad network                      | Main product          | —                                                          | —                                                                            | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100)                                                                                                                                   |
-| <a href="https://www.genotek.ru/" class="favicon">Genotek</a>                                      | Bioinformatics                      | Main product          | —                                                          | —                                                                            | [Video, August 2020](https://youtu.be/v3KyZbz9lEE)                                                                                                                                   |
-| <a href="https://www.huya.com/" class="favicon">HUYA</a>                                       | Video Streaming                 | Analytics             | —                                                          | —                                                                            | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf)                                            |
-| <a href="https://www.the-ica.com/" class="favicon">ICA</a>                   | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263)                                                                                                         |
-| <a href="https://www.idealista.com" class="favicon">Idealista</a>                              | Real Estate                     | Analytics             | —                                                          | —                                                                            | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019)                                                                                                         |
-| <a href="https://www.infovista.com/" class="favicon">Infovista</a>                             | Networks                        | Analytics             | —                                                          | —                                                                            | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf)                                                                                            |
-| <a href="https://www.innogames.com" class="favicon">InnoGames</a>                              | Games                           | Metrics, Logging      | —                                                          | —                                                                            | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf)                                                                            |
-| <a href="https://www.instana.com" class="favicon">Instana</a>                                  | APM Platform                    | Main product          | —                                                          | —                                                                            | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304)                                                                                                                                              |
-| <a href="https://integros.com" class="favicon">Integros</a>                                    | Platform for video services     | Analytics             | —                                                          | —                                                                            | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf)                                                                                               |
-| <a href="https://ippon.tech" class="favicon">Ippon Technologies</a>                            | Technology Consulting           | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205)                                                                                               |
-| <a href="https://www.ivi.ru/" class="favicon">Ivi</a> | Online Cinema | Analytics, Monitoring | —                                                          | —                                                                            | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) |
-| <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a>                             | BI Analytics                    | Main product          | —                                                          | —                                                                            | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf)                                                                    |
-| <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a>                          | Clouds                          | Main product          | —                                                          | —                                                                            | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf)                                                                                             |
-| <a href="https://kontur.ru" class="favicon">Kontur</a>                                         | Software Development            | Metrics               | —                                                          | —                                                                            | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY)                                                                                                                                           |
-| <a href="https://www.kuaishou.com/" class="favicon">Kuaishou</a>                                         | Video            | — | —                                                          | —                                                                            | [ClickHouse Meetup, October 2018](https://clickhouse.tech/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/)                                                                                                                                           |
-| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a>        | Research                        | Traffic analysis      | 1 server                                                   | 11.8 TiB                                                                     | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf)                                                                                                                          |
-| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a>                               | Ad network                      | Main product          | 75 servers (3 replicas)                                    | 5.27 PiB                                                                     | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/)                                                                                                                                                 |
-| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a>                     | Cloud services                  | Main product          | —                                                          | —                                                                            | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#)                                                                                                                                                    |
-| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a>                                | Advertising                     | Statistics            | —                                                          | —                                                                            | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc)                                                                                                                                               |
-| <a href="https://mellodesign.ru/" class="favicon">Mello</a>                                | Marketing                     | Analytics            | 1 server                                                          | —                                                                            | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki)                                                                                                                                               |
-| <a href="https://www.messagebird.com" class="favicon">MessageBird</a>                          | Telecommunications              | Statistics            | —                                                          | —                                                                            | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf)                                                                                         |
-| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a>                          | Machine Learning              | Main Product            | —                                                          | —                                                                            | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x
-| <a href="https://mux.com/" class="favicon">MUX</a>                                       | Online Video                           | Video Analytics       | —                                                          | —                                                                            | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/)                                                                                                              |
-| <a href="https://www.mgid.com/" class="favicon">MGID</a>                                       | Ad network                      | Web-analytics         | —                                                          | —                                                                            | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c)                                                                                                                          |
-| <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/)                                                                                               |
-| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a>                                  | Health Data Analytics           | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170)                                                                                               |
-| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a>                                   | Monitorings and Data Analysis   | Main product          | —                                                          | —                                                                            | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf)                                                                 |
-| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a>                           | Analytics                       | Main Product          | —                                                          | —                                                                            | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf)                                                            |
-| <a href="https://www.percona.com/" class="favicon">Percona</a>               | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/)                                                            |
-| <a href="https://plausible.io/" class="favicon">Plausible</a>                                  | Analytics                       | Main Product          | —                                                          | —                                                                            | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280)                                                                                                                                      |
-| <a href="https://posthog.com/" class="favicon">PostHog</a>                                 | Product Analytics                        | Main Product                     | —                                                          | —                                                                            | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0)                                                                                               |
-| <a href="https://postmates.com/" class="favicon">Postmates</a>                                 | Delivery                        | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188)                                                                                               |
-| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a>               | Telemetry and Big Data Analysis | Main product          | —                                                          | —                                                                            | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf)                                                                                  |
-| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a>                             | Cloud services                  | Main product          | —                                                          | —                                                                            | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf)                                        |
-| <a href="https://qrator.net" class="favicon">Qrator</a>                                        | DDoS protection                 | Main product          | —                                                          | —                                                                            | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/)                                                                                                                                      |
-| <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a>                                       | Banking               | Analytics             | —                                                          | —                                                                            | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html)                                                                                                |
-| <a href="https://rambler.ru" class="favicon">Rambler</a>                                       | Internet services               | Analytics             | —                                                          | —                                                                            | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141)                                                                                                |
-| <a href="https://retell.cc/" class="favicon">Retell</a>                                       | Speech synthesis               | Analytics             | —                                                          | —                                                                            | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno)                                                                                                |
-| <a href="https://rspamd.com/" class="favicon">Rspamd</a> | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html)                                                                                                |
-| <a href="https://rusiem.com/en" class="favicon">RuSIEM</a> | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture)                                                                                                |
-| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a>                                    | Airlines                        | Metrics, Logging      | —                                                          | —                                                                            | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s)                                                                                                                                        |
-| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a>                             | e-Commerce                      | Main product          | —                                                          | —                                                                            | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4)                                                                                                                                            |
-| <a href="https://segment.com/" class="favicon">Segment</a>                   | Data processing                      | Main product          | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs                                                          | —                                                                            | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse)                                                                                                                                            |
-| <a href="https://www.semrush.com/" class="favicon">SEMrush</a>                                 | Marketing                       | Main product          | —                                                          | —                                                                            | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf)                                                                                             |
-| <a href="https://sentry.io/" class="favicon">Sentry</a>                                        | Software Development            | Main product          | —                                                          | —                                                                            | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure)                                                                                                 |
-| <a href="https://seo.do/" class="favicon">seo.do</a>                                           | Analytics                       | Main product          | —                                                          | —                                                                            | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf)                                                            |
-| <a href="http://www.sgk.gov.tr/wps/portal/sgk/tr" class="favicon">SGK</a>                      | Goverment Social Security       | Analytics             | —                                                          | —                                                                            | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf)                                                                 |
-| <a href="http://english.sina.com/index.html" class="favicon">Sina</a>                          | News                            | —                     | —                                                          | —                                                                            | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf)                                                                  |
-| <a href="https://smi2.ru/" class="favicon">SMI2</a>                                            | News                            | Analytics             | —                                                          | —                                                                            | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/)                                                                                                                                    |
-| <a href="https://www.splunk.com/" class="favicon">Splunk</a>                                   | Business Analytics              | Main product          | —                                                          | —                                                                            | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf)                                                                                               |
-| <a href="https://www.spotify.com" class="favicon">Spotify</a>                                  | Music                           | Experimentation       | —                                                          | —                                                                            | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173)                                                                                                                   |
-| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a>                                | Information Security                      | Main Product | —                                                          | —                                                                            | [Official website, Documentation](https://www.staffcop.ru/sce43) |
-| <a href="https://www.suning.com/" class="favicon">Suning</a>                                | E-Commerce                      | User behaviour analytics | —                                                          | —                                                                            | [Blog article](https://www.sohu.com/a/434152235_411876) |
-| <a href="https://www.teralytics.net/" class="favicon">Teralytics</a>                                | Mobility                      | Analytics | —                                                          | —                                                                            | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
-| <a href="https://www.tencent.com" class="favicon">Tencent</a>                                  | Big Data                        | Data processing       | —                                                          | —                                                                            | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf)                                                   |
-| <a href="https://www.tencent.com" class="favicon">Tencent</a>                                  | Messaging                       | Logging               | —                                                          | —                                                                            | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050)                                                                                                                                                   |
-| <a href="https://www.tencentmusic.com/" class="favicon">Tencent Music Entertainment (TME)</a>                                  | BigData                       | Data processing               | —                                                          | —                                                                            | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840)                                                                                                                                                   |
-| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a>                          | AD network                      | —                     | —                                                          | —                                                                            | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf)                                                                                          |
-| <a href="https://www.uber.com" class="favicon">Uber</a>                                        | Taxi                            | Logging               | —                                                          | —                                                                            | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf)                                                                                                                                        |
-| <a href="https://vk.com" class="favicon">VKontakte</a>                                         | Social Network                  | Statistics, Logging   | —                                                          | —                                                                            | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf)                                                                                                  |
-| <a href="https://www.walmartlabs.com/" class="favicon">Walmart Labs</a>                        | Internet, Retail                | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144)                                                                                               |
-| <a href="https://wargaming.com/en/" class="favicon">Wargaming</a>                              | Games                           |                       | —                                                          | —                                                                            | [Interview](https://habr.com/en/post/496954/)                                                                                                                                                                           |
-| <a href="https://wisebits.com/" class="favicon">Wisebits</a>                                   | IT Solutions                    | Analytics             | —                                                          | —                                                                            | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf)                                                                                               |
-| <a href="https://www.workato.com/" class="favicon">Workato</a>                                 | Automation Software             | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334)                                                                                               |
-| <a href="http://www.xiaoxintech.cn/" class="favicon">Xiaoxin Tech</a>                          | Education                       | Common purpose        | —                                                          | —                                                                            | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx)                                                                 |
-| <a href="https://www.ximalaya.com/" class="favicon">Ximalaya</a>                               | Audio sharing                   | OLAP                  | —                                                          | —                                                                            | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf)                                                                                            |
-| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud                    | Main product          | —                                                          | —                                                                            | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o)                                                                                                                                           |
-| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a>        | Business Intelligence           | Main product          | —                                                          | —                                                                            | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf)                                                                                                                         |
-| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a>                          | e-Commerce                      | Metrics, Logging      | —                                                          | —                                                                            | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478)                                                                                                                                                     |
-| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a>                        | Web analytics                   | Main product          | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records                                                        | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13)                                                                                                                                |
-| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a>                                           | Software Development            | Metrics, Logging      | —                                                          | —                                                                            | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk)                                                                                                   |
-| <a href="https://mkb.ru/" class="favicon">МКБ</a>                                              | Bank                            | Web-system monitoring | —                                                          | —                                                                            | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf)                                                                                                |
-| <a href="https://cft.ru/" class="favicon">ЦФТ</a>                                              | Banking, Financial products, Payments                            |  — | —                                                          | —                                                                            | [Meetup in Russian, April 2020](https://team.cft.ru/events/162)                                                                                                |
-| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a>                                              | Internet company         |  — | —                                                          | —                                                                            | [if(kakao)2020 conference](https://if.kakao.com/session/117)                                                                                                |
+| <a href="https://carto.com/" class="favicon">CARTO</a> | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) |
+| <a href="http://public.web.cern.ch/public/" class="favicon">CERN</a> | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) |
+| <a href="http://cisco.com/" class="favicon">Cisco</a> | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) |
+| <a href="https://www.citadelsecurities.com/" class="favicon">Citadel Securities</a> | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) |
+| <a href="https://city-mobil.ru" class="favicon">Citymobil</a> | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) |
+| <a href="https://cloudflare.com" class="favicon">Cloudflare</a> | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
+| <a href="https://corporate.comcast.com/" class="favicon">Comcast</a> | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) |
+| <a href="https://contentsquare.com" class="favicon">ContentSquare</a> | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
+| <a href="https://coru.net/" class="favicon">Corunet</a> | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) |
+| <a href="https://www.creditx.com" class="favicon">CraiditX 氪信</a> | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) |
+| <a href="https://crazypanda.ru/en/" class="favicon">Crazypanda</a> | Games | | — | — | Live session on ClickHouse meetup |
+| <a href="https://www.criteo.com/" class="favicon">Criteo</a> | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) |
+| <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
+| <a href="https://db.com" class="favicon">Deutsche Bank</a> | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
+| <a href="https://deeplay.io/eng/" class="favicon">Deeplay</a> | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) |
+| <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
+| <a href="https://www.ecwid.com/" class="favicon">Ecwid</a> | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) |
+| <a href="https://www.ebay.com/" class="favicon">eBay</a> | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) |
+| <a href="https://www.exness.com" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
+| <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
+| <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |
+| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
+| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
+| <a href="https://www.genotek.ru/" class="favicon">Genotek</a> | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) |
+| <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
+| <a href="https://www.the-ica.com/" class="favicon">ICA</a> | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) |
+| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
+| <a href="https://www.infovista.com/" class="favicon">Infovista</a> | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
+| <a href="https://www.innogames.com" class="favicon">InnoGames</a> | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
+| <a href="https://www.instana.com" class="favicon">Instana</a> | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) |
+| <a href="https://integros.com" class="favicon">Integros</a> | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
+| <a href="https://ippon.tech" class="favicon">Ippon Technologies</a> | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) |
+| <a href="https://www.ivi.ru/" class="favicon">Ivi</a> | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) |
+| <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a> | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
+| <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a> | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
+| <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
+| <a href="https://www.kuaishou.com/" class="favicon">Kuaishou</a> | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.tech/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) |
+| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
+| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
+| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
+| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
+| <a href="https://mellodesign.ru/" class="favicon">Mello</a> | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) |
+| <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
+| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a> | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x
+| <a href="https://mux.com/" class="favicon">MUX</a> | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) |
+| <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
+| <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) |
+| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
+| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
+| <a href="https://panelbear.com/" class="favicon">Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) |
+| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
+| <a href="https://www.percona.com/" class="favicon">Percona</a> | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) |
+| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
+| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
+| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
+| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
+| <a href="https://prana-system.com/en/" class="favicon">PRANA</a> | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) |
+| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
+| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
+| <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a> | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) |
+| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
+| <a href="https://retell.cc/" class="favicon">Retell</a> | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) |
+| <a href="https://rspamd.com/" class="favicon">Rspamd</a> | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) |
+| <a href="https://rusiem.com/en" class="favicon">RuSIEM</a> | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) |
+| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a> | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
+| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a> | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
+| <a href="https://segment.com/" class="favicon">Segment</a> | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) |
+| <a href="https://www.semrush.com/" class="favicon">SEMrush</a> | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) |
+| <a href="https://sentry.io/" class="favicon">Sentry</a> | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) |
+| <a href="https://seo.do/" class="favicon">seo.do</a> | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) |
+| <a href="http://www.sgk.gov.tr/wps/portal/sgk/tr" class="favicon">SGK</a> | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) |
+| <a href="http://english.sina.com/index.html" class="favicon">Sina</a> | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |
+| <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
+| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
+| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
+| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |
+| <a href="https://www.suning.com/" class="favicon">Suning</a> | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) |
+| <a href="https://www.teralytics.net/" class="favicon">Teralytics</a> | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
+| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
+| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
+| <a href="https://www.tencentmusic.com/" class="favicon">Tencent Music Entertainment (TME)</a> | BigData | Data processing | — | — | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840) |
+| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
+| <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) |
+| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
+| <a href="https://www.walmartlabs.com/" class="favicon">Walmart Labs</a> | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) |
+| <a href="https://wargaming.com/en/" class="favicon">Wargaming</a> | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |
+| <a href="https://wisebits.com/" class="favicon">Wisebits</a> | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
+| <a href="https://www.workato.com/" class="favicon">Workato</a> | Automation Software | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334) |
+| <a href="http://www.xiaoxintech.cn/" class="favicon">Xiaoxin Tech</a> | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) |
+| <a href="https://www.ximalaya.com/" class="favicon">Ximalaya</a> | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) |
+| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
+| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
+| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
+| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
+| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
+| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
+| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
+| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) |
 
 [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->
diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md
index ea37a22c165..f4206f5d70c 100644
--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@@ -5,7 +5,7 @@ toc_title: Data Backup
 
 # Data Backup {#data-backup}
 
-While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). However, these safeguards don’t cover all possible cases and can be circumvented.
+While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards don’t cover all possible cases and can be circumvented.
 
 In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data **in advance**.
 
diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md
new file mode 100644
index 00000000000..95f80f192f5
--- /dev/null
+++ b/docs/en/operations/external-authenticators/index.md
@@ -0,0 +1,13 @@
+---
+toc_folder_title: External User Authenticators and Directories
+toc_priority: 48
+toc_title: Introduction
+---
+
+# External User Authenticators and Directories {#external-authenticators}
+
+ClickHouse supports authenticating and managing users using external services.
+
+The following external authenticators and directories are supported:
+
+- [LDAP](./ldap.md#external-authenticators-ldap) [Authenticator](./ldap.md#ldap-external-authenticator) and [Directory](./ldap.md#ldap-external-user-directory)
diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md
new file mode 100644
index 00000000000..36a13227852
--- /dev/null
+++ b/docs/en/operations/external-authenticators/ldap.md
@@ -0,0 +1,156 @@
+# LDAP {#external-authenticators-ldap} 
+
+LDAP server can be used to authenticate ClickHouse users. There are two different approaches for doing this:
+
+- use LDAP as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths
+- use LDAP as an external user directory and allow locally undefined users to be authenticated if they exist on the LDAP server
+
+For both of these approaches, an internally named LDAP server must be defined in the ClickHouse config so that other parts of config are able to refer to it.
+
+## LDAP Server Definition {#ldap-server-definition}
+
+To define LDAP server you must add `ldap_servers` section to the `config.xml`. For example,
+
+```xml
+<yandex>
+    <!- ... -->
+    <ldap_servers>
+        <my_ldap_server>
+            <host>localhost</host>
+            <port>636</port>
+            <bind_dn>uid={user_name},ou=users,dc=example,dc=com</bind_dn>
+            <verification_cooldown>300</verification_cooldown>
+            <enable_tls>yes</enable_tls>
+            <tls_minimum_protocol_version>tls1.2</tls_minimum_protocol_version>
+            <tls_require_cert>demand</tls_require_cert>
+            <tls_cert_file>/path/to/tls_cert_file</tls_cert_file>
+            <tls_key_file>/path/to/tls_key_file</tls_key_file>
+            <tls_ca_cert_file>/path/to/tls_ca_cert_file</tls_ca_cert_file>
+            <tls_ca_cert_dir>/path/to/tls_ca_cert_dir</tls_ca_cert_dir>
+            <tls_cipher_suite>ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384</tls_cipher_suite>
+        </my_ldap_server>
+    </ldap_servers>
+</yandex>
+```
+
+Note, that you can define multiple LDAP servers inside the `ldap_servers` section using distinct names.
+
+Parameters:
+
+- `host` - LDAP server hostname or IP, this parameter is mandatory and cannot be empty.
+- `port` - LDAP server port, default is `636` if `enable_tls` is set to `true`, `389` otherwise.
+- `bind_dn` - template used to construct the DN to bind to.
+    - The resulting DN will be constructed by replacing all `{user_name}` substrings of the
+      template with the actual user name during each authentication attempt.
+- `verification_cooldown` - a period of time, in seconds, after a successful bind attempt,
+  during which the user will be assumed to be successfully authenticated for all consecutive
+  requests without contacting the LDAP server.
+    - Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request.
+- `enable_tls` - flag to trigger use of secure connection to the LDAP server.
+    - Specify `no` for plain text `ldap://` protocol (not recommended).
+    - Specify `yes` for LDAP over SSL/TLS `ldaps://` protocol (recommended, the default).
+    - Specify `starttls` for legacy StartTLS protocol (plain text `ldap://` protocol, upgraded to TLS).
+- `tls_minimum_protocol_version` - the minimum protocol version of SSL/TLS.
+    - Accepted values are: `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, `tls1.2` (the default).
+- `tls_require_cert` - SSL/TLS peer certificate verification behavior.
+    - Accepted values are: `never`, `allow`, `try`, `demand` (the default).
+- `tls_cert_file` - path to certificate file.
+- `tls_key_file` - path to certificate key file.
+- `tls_ca_cert_file` - path to CA certificate file.
+- `tls_ca_cert_dir` - path to the directory containing CA certificates.
+- `tls_cipher_suite` - allowed cipher suite (in OpenSSL notation).
+
+## LDAP External Authenticator {#ldap-external-authenticator}
+
+A remote LDAP server can be used as a method for verifying passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition.
+
+At each login attempt, ClickHouse will try to "bind" to the specified DN defined by the `bind_dn` parameter in the [LDAP server definition](#ldap-server-definition) using the provided credentials, and if successful, the user will be considered authenticated. This is often called a "simple bind" method.
+
+For example,
+
+```xml
+<yandex>
+    <!- ... -->
+    <users>
+        <!- ... -->
+        <my_user>
+            <!- ... -->
+            <ldap>
+                <server>my_ldap_server</server>
+            </ldap>
+        </my_user>
+    </users>
+</yandex>
+```
+
+Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously.
+
+When SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled in ClickHouse, users that are authenticated by LDAP servers can also be created using the [CRATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement.
+
+
+```sql
+CREATE USER my_user IDENTIFIED WITH ldap_server BY 'my_ldap_server'
+```
+
+## LDAP Exernal User Directory {#ldap-external-user-directory}
+
+In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. In order to achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section of the `config.xml` file.
+
+At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN at the LDAP server using the provided credentials. If successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then be assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled and roles are created using the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement.
+
+Example (goes into `config.xml`):
+
+```xml
+<yandex>
+    <!- ... -->
+    <user_directories>
+        <!- ... -->
+        <ldap>
+            <server>my_ldap_server</server>
+            <roles>
+                <my_local_role1 />
+                <my_local_role2 />
+            </roles>
+            <role_mapping>
+                <base_dn>ou=groups,dc=example,dc=com</base_dn>
+                <scope>subtree</scope>
+                <search_filter>(&amp;(objectClass=groupOfNames)(member={bind_dn}))</search_filter>
+                <attribute>cn</attribute>
+                <prefix>clickhouse_</prefix>
+            </role_mapping>
+        </ldap>
+    </user_directories>
+</yandex>
+```
+
+Note that `my_ldap_server` referred in the `ldap` section inside the `user_directories` section must be a previously
+defined LDAP server that is configured in the `config.xml` (see [LDAP Server Definition](#ldap-server-definition)).
+
+Parameters:
+
+- `server` - one of LDAP server names defined in the `ldap_servers` config section above.
+  This parameter is mandatory and cannot be empty.
+- `roles` - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server.
+    - If no roles are specified here or assigned during role mapping (below), user will not be able
+      to perform any actions after authentication.
+- `role_mapping` - section with LDAP search parameters and mapping rules.
+    - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter`
+      and the name of the logged in user. For each entry found during that search, the value of the specified
+      attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed,
+      and the rest of the value becomes the name of a local role defined in ClickHouse,
+      which is expected to be created beforehand by the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement.
+    - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied.
+        - `base_dn` - template used to construct the base DN for the LDAP search.
+           - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}`
+             substrings of the template with the actual user name and bind DN during each LDAP search.
+        - `scope` - scope of the LDAP search.
+            - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default).
+        - `search_filter` - template used to construct the search filter for the LDAP search.
+            - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}`
+              substrings of the template with the actual user name, bind DN, and base DN during each LDAP search.
+            - Note, that the special characters must be escaped properly in XML.
+        - `attribute` - attribute name whose values will be returned by the LDAP search.
+        - `prefix` - prefix, that will be expected to be in front of each string in the original
+          list of strings returned by the LDAP search. Prefix will be removed from the original
+          strings and resulting strings will be treated as local role names. Empty, by default.
+
diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md
index c637ef03f71..56c3eaf6455 100644
--- a/docs/en/operations/quotas.md
+++ b/docs/en/operations/quotas.md
@@ -29,6 +29,8 @@ Let’s look at the section of the ‘users.xml’ file that defines quotas.
 
             <!-- Unlimited. Just collect data for the specified time interval. -->
             <queries>0</queries>
+            <query_selects>0</query_selects>
+            <query_inserts>0</query_inserts>
             <errors>0</errors>
             <result_rows>0</result_rows>
             <read_rows>0</read_rows>
@@ -48,6 +50,8 @@ The resource consumption calculated for each interval is output to the server lo
         <duration>3600</duration>
 
         <queries>1000</queries>
+        <query_selects>100</query_selects>
+        <query_inserts>100</query_inserts>
         <errors>100</errors>
         <result_rows>1000000000</result_rows>
         <read_rows>100000000000</read_rows>
@@ -58,6 +62,8 @@ The resource consumption calculated for each interval is output to the server lo
         <duration>86400</duration>
 
         <queries>10000</queries>
+        <query_selects>10000</query_selects>
+        <query_inserts>10000</query_inserts>
         <errors>1000</errors>
         <result_rows>5000000000</result_rows>
         <read_rows>500000000000</read_rows>
@@ -74,6 +80,10 @@ Here are the amounts that can be restricted:
 
 `queries` – The total number of requests.
 
+`query_selects` – The total number of select requests.
+
+`query_inserts` – The total number of insert requests.
+
 `errors` – The number of queries that threw an exception.
 
 `result_rows` – The total number of rows given as a result.
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index a1ed34f10bb..89fcbafe663 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -296,11 +296,33 @@ Useful for breaking away from a specific network interface.
 <interserver_http_host>example.yandex.ru</interserver_http_host>
 ```
 
+## interserver_https_port {#interserver-https-port}
+
+Port for exchanging data between ClickHouse servers over `HTTPS`.
+
+**Example**
+
+``` xml
+<interserver_https_port>9010</interserver_https_port>
+```
+
+## interserver_https_host {#interserver-https-host}
+
+Similar to `interserver_http_host`, except that this hostname can be used by other servers to access this server over `HTTPS`.
+
+**Example**
+
+``` xml
+<interserver_https_host>example.yandex.ru</interserver_https_host>
+```
+
 ## interserver_http_credentials {#server-settings-interserver-http-credentials}
 
 The username and password used to authenticate during [replication](../../engines/table-engines/mergetree-family/replication.md) with the Replicated\* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster.
 By default, the authentication is not used.
 
+**Note:** These credentials are common for replication through `HTTP` and `HTTPS`.
+
 This section contains the following parameters:
 
 -   `user` — username.
diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md
index e0f7c79dcab..77b68715ba9 100644
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@@ -186,5 +186,16 @@ Possible values:
 Default value: auto (number of CPU cores).
 
 During startup ClickHouse reads all parts of all tables (reads files with metadata of parts) to build a list of all parts in memory. In some systems with a large number of parts this process can take a long time, and this time might be shortened by increasing `max_part_loading_threads` (if this process is not CPU and disk I/O bound).
+## max_partitions_to_read {#max-partitions-to-read}
+
+Limits the maximum number of partitions that can be accessed in one query.
+
+The setting value specified when the table is created can be overridden via query-level setting.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: -1 (unlimited).
 
 [Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) <!--hide-->
diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md
index 3e15d9e6dea..ee834dca98a 100644
--- a/docs/en/operations/settings/settings-users.md
+++ b/docs/en/operations/settings/settings-users.md
@@ -139,7 +139,7 @@ You can assign a quotas set for the user. For a detailed description of quotas c
 
 ### user_name/databases {#user-namedatabases}
 
-In this section, you can you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security.
+In this section, you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security.
 
 **Example**
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 85a3b8bd941..6440f09bb40 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -312,7 +312,7 @@ Enables or disables parsing enum values as enum ids for TSV input format.
 Possible values:
 
 -   0 — Enum values are parsed as values.
--   1 — Enum values are parsed as enum IDs
+-   1 — Enum values are parsed as enum IDs.
 
 Default value: 0.
 
@@ -428,7 +428,7 @@ Possible values:
 
 -   `'basic'` — Use basic parser.
 
-    ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `'2019-08-20 10:18:56'` or `2019-08-20`.
+    ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`.
 
 Default value: `'basic'`.
 
@@ -443,19 +443,19 @@ Allows choosing different output formats of the text representation of date and
 
 Possible values:
 
--   `'simple'` - Simple output format.
+-   `simple` - Simple output format.
 
-    Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. Calculation is performed according to the data type's time zone (if present) or server time zone.
+    Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone.
 
--   `'iso'` - ISO output format.
+-   `iso` - ISO output format.
 
-    Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `'2019-08-20T10:18:56Z'`. Note that output is in UTC (`Z` means UTC).
+    Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC).
 
--   `'unix_timestamp'` - Unix timestamp output format.
+-   `unix_timestamp` - Unix timestamp output format.
 
-    Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `'1566285536'`.
+    Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`.
 
-Default value: `'simple'`.
+Default value: `simple`.
 
 See also:
 
@@ -1944,6 +1944,21 @@ Possible values:
 
 Default value: 16.
 
+## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size}
+
+Sets the number of threads performing background tasks for message streaming. This setting is applied at the ClickHouse server start and can’t be changed in a user session.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: 16.
+
+**See Also**
+
+-   [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine
+-   [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine
+
 ## validate_polygons {#validate_polygons}
 
 Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.
@@ -2577,4 +2592,90 @@ Possible values:
 
 Default value: `16`.
 
+## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability}
+
+Sets the probability that the ClickHouse can start a trace for executed queries (if no parent [trace context](https://www.w3.org/TR/trace-context/) is supplied).
+
+Possible values:
+
+-   0 — The trace for all executed queries is disabled (if no parent trace context is supplied).
+-   Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries.
+-   1 — The trace for all executed queries is enabled.
+
+Default value: `0`.
+
+## optimize_on_insert {#optimize-on-insert}
+
+Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine).
+
+Possible values:
+
+-   0 — Disabled.
+-   1 — Enabled.
+
+Default value: 1.
+
+**Example**
+
+The difference between enabled and disabled:
+
+Query:
+
+```sql
+SET optimize_on_insert = 1;
+
+CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable;
+
+INSERT INTO test1 SELECT number % 2 FROM numbers(5);
+
+SELECT * FROM test1;
+
+SET optimize_on_insert = 0;
+
+CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable;
+
+INSERT INTO test2 SELECT number % 2 FROM numbers(5);
+
+SELECT * FROM test2;
+```
+
+Result:
+
+``` text
+┌─FirstTable─┐
+│          0 │
+│          1 │
+└────────────┘
+
+┌─SecondTable─┐
+│           0 │
+│           0 │
+│           0 │
+│           1 │
+│           1 │
+└─────────────┘
+```
+
+Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
+
 [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
+
+## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
+
+Allows to select data from a file engine table without file.
+
+Possible values:
+- 0 — `SELECT` throws exception.
+- 1 — `SELECT` returns empty result.
+
+Default value: `0`.
+
+## engine_file_truncate_on_insert {#engine-file-truncate-on-insert}
+
+Enables or disables truncate before insert in file engine tables.
+
+Possible values:
+- 0 — Disabled.
+- 1 — Enabled.
+
+Default value: `0`.
diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md
index 643bdee6def..c252458af8a 100644
--- a/docs/en/operations/system-tables/distributed_ddl_queue.md
+++ b/docs/en/operations/system-tables/distributed_ddl_queue.md
@@ -1,22 +1,21 @@
 # system.distributed_ddl_queue {#system_tables-distributed_ddl_queue}
 
-Contains information about distributed ddl queries (ON CLUSTER queries) that were executed on a cluster.
+Contains information about [distributed ddl queries (ON CLUSTER clause)](../../sql-reference/distributed-ddl.md) that were executed on a cluster.
 
 Columns:
 
--   `entry`  ([String](../../sql-reference/data-types/string.md)) - Query id.
--   `host_name`  ([String](../../sql-reference/data-types/string.md)) - Hostname.
--   `host_address`  ([String](../../sql-reference/data-types/string.md)) - IP address that the Hostname resolves to.
--   `port`  ([UInt16](../../sql-reference/data-types/int-uint.md)) - Host Port.
--   `status`  ([Enum](../../sql-reference/data-types/enum.md)) - Stats of the query.
--   `cluster`  ([String](../../sql-reference/data-types/string.md)) - Cluster name.
--   `query`  ([String](../../sql-reference/data-types/string.md)) - Query executed.
--   `initiator`  ([String](../../sql-reference/data-types/string.md)) - Nod that executed the query.
--   `query_start_time` ([Date](../../sql-reference/data-types/date.md)) — Query start time.
--   `query_finish_time` ([Date](../../sql-reference/data-types/date.md)) — Query finish time.
--   `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution in milliseconds.
--   `exception_code`  ([Enum](../../sql-reference/data-types/enum.md)) - Exception code from ZooKeeper.
-
+-   `entry` ([String](../../sql-reference/data-types/string.md)) — Query id.
+-   `host_name` ([String](../../sql-reference/data-types/string.md)) — Hostname.
+-   `host_address` ([String](../../sql-reference/data-types/string.md)) — IP address that the Hostname resolves to.
+-   `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port.
+-   `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query.
+-   `cluster` ([String](../../sql-reference/data-types/string.md)) — Cluster name.
+-   `query` ([String](../../sql-reference/data-types/string.md)) — Query executed.
+-   `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query.
+-   `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time.
+-   `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time.
+-   `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution (in milliseconds).
+-   `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ZooKeeper](../../operations/tips.md#zookeeper).
 
 **Example**
 
@@ -62,6 +61,5 @@ exception_code:    ZOK
 2 rows in set. Elapsed: 0.025 sec.
 ```
 
-
 [Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->
  
\ No newline at end of file
diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md
new file mode 100644
index 00000000000..e45a989742c
--- /dev/null
+++ b/docs/en/operations/system-tables/opentelemetry_span_log.md
@@ -0,0 +1,53 @@
+# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log}
+
+Contains information about [trace spans](https://opentracing.io/docs/overview/spans/) for executed queries.
+
+Columns:
+
+-   `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — ID of the trace for executed query.
+
+-   `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`.
+
+-   `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`.
+
+-   `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation.
+
+-   `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds).
+
+-   `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds).
+
+-   `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`.
+
+-   `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard.
+
+-   `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard.
+
+**Example**
+
+Query:
+
+``` sql
+SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical;
+```
+
+Result:
+
+``` text
+Row 1:
+──────
+trace_id:         cdab0847-0d62-61d5-4d38-dd65b19a1914
+span_id:          701487461015578150
+parent_span_id:   2991972114672045096
+operation_name:   DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl()
+start_time_us:    1612374594529090
+finish_time_us:   1612374594529108
+finish_date:      2021-02-03
+attribute.names:  []
+attribute.values: []
+```
+
+**See Also**
+
+-   [OpenTelemetry](../../operations/opentelemetry.md)
+
+[Original article](https://clickhouse.tech/docs/en/operations/system_tables/opentelemetry_span_log) <!--hide-->
diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md
index 9aa95b1a493..579fdaefb0a 100644
--- a/docs/en/operations/system-tables/part_log.md
+++ b/docs/en/operations/system-tables/part_log.md
@@ -6,29 +6,65 @@ This table contains information about events that occurred with [data parts](../
 
 The `system.part_log` table contains the following columns:
 
--   `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values:
+-   `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part.
+-   `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values:
     -   `NEW_PART` — Inserting of a new data part.
     -   `MERGE_PARTS` — Merging of data parts.
     -   `DOWNLOAD_PART` — Downloading a data part.
     -   `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
     -   `MUTATE_PART` — Mutating of a data part.
     -   `MOVE_PART` — Moving the data part from the one disk to another one.
--   `event_date` (Date) — Event date.
--   `event_time` (DateTime) — Event time.
--   `duration_ms` (UInt64) — Duration.
--   `database` (String) — Name of the database the data part is in.
--   `table` (String) — Name of the table the data part is in.
--   `part_name` (String) — Name of the data part.
--   `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the ‘all’ value if the partitioning is by `tuple()`.
--   `rows` (UInt64) — The number of rows in the data part.
--   `size_in_bytes` (UInt64) — Size of the data part in bytes.
--   `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge).
--   `bytes_uncompressed` (UInt64) — Size of uncompressed bytes.
--   `read_rows` (UInt64) — The number of rows was read during the merge.
--   `read_bytes` (UInt64) — The number of bytes was read during the merge.
--   `error` (UInt16) — The code number of the occurred error.
--   `exception` (String) — Text message of the occurred error.
+-   `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
+-   `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
+-   `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision.
+
+-   `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration.
+-   `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in.
+-   `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in.
+-   `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part.
+-   `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`.
+-   `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files.
+-   `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part.
+-   `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes.
+-   `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — An array of names of the parts which the current part was made up from (after the merge).
+-   `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of uncompressed bytes.
+-   `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows was read during the merge.
+-   `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes was read during the merge.
+-   `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
+-   `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The code number of the occurred error.
+-   `exception` ([String](../../sql-reference/data-types/string.md)) — Text message of the occurred error.
 
 The `system.part_log` table is created after the first inserting data to the `MergeTree` table.
 
+**Example**
+
+``` sql
+SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical;
+```
+
+``` text
+Row 1:
+──────
+query_id:                      983ad9c7-28d5-4ae1-844e-603116b7de31
+event_type:                    NewPart
+event_date:                    2021-02-02
+event_time:                    2021-02-02 11:14:28
+event_time_microseconds:                    2021-02-02 11:14:28.861919
+duration_ms:                   35
+database:                      default
+table:                         log_mt_2
+part_name:                     all_1_1_0
+partition_id:                  all
+path_on_disk:                  db/data/default/log_mt_2/all_1_1_0/
+rows:                          115418
+size_in_bytes:                 1074311
+merged_from:                   []
+bytes_uncompressed:            0
+read_rows:                     0
+read_bytes:                    0
+peak_memory_usage:             0
+error:                         0
+exception:                   
+```
+
 [Original article](https://clickhouse.tech/docs/en/operations/system_tables/part_log) <!--hide-->
diff --git a/docs/en/operations/system-tables/quota_limits.md b/docs/en/operations/system-tables/quota_limits.md
index 065296f5df3..c2dcb4db34d 100644
--- a/docs/en/operations/system-tables/quota_limits.md
+++ b/docs/en/operations/system-tables/quota_limits.md
@@ -9,6 +9,8 @@ Columns:
 - `0` — Interval is not randomized.
 - `1` — Interval is randomized.
 - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of queries.
+- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select queries.
+- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert queries.
 - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
 - `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of result rows.
 - `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of RAM volume in bytes used to store a queries result.
diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md
index 0eb59fd6453..17af9ad9a30 100644
--- a/docs/en/operations/system-tables/quota_usage.md
+++ b/docs/en/operations/system-tables/quota_usage.md
@@ -9,6 +9,8 @@ Columns:
 - `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — End time for calculating resource consumption.
 - `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
 - `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests on this interval.
+- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests on this interval.
+- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests on this interval.
 - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
 - `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
 - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md
index f4f52a4a131..3e797c9bdc6 100644
--- a/docs/en/operations/system-tables/quotas.md
+++ b/docs/en/operations/system-tables/quotas.md
@@ -7,16 +7,16 @@ Columns:
 - `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Quota ID.
 - `storage`([String](../../sql-reference/data-types/string.md)) — Storage of quotas. Possible value: “users.xml” if a quota configured in the users.xml file, “disk” if a quota configured by an SQL-query.
 - `keys` ([Array](../../sql-reference/data-types/array.md)([Enum8](../../sql-reference/data-types/enum.md))) — Key specifies how the quota should be shared. If two connections use the same quota and key, they share the same amounts of resources. Values:
-- `[]` — All users share the same quota.
-- `['user_name']` — Connections with the same user name share the same quota.
-- `['ip_address']` — Connections from the same IP share the same quota.
-- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
-- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`.
-- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`.
+    - `[]` — All users share the same quota.
+    - `['user_name']` — Connections with the same user name share the same quota.
+    - `['ip_address']` — Connections from the same IP share the same quota.
+    - `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
+    - `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`.
+    - `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`.
 - `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds.
 - `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values:
-- `0` — The quota applies to users specify in the `apply_to_list`.
-- `1` — The quota applies to all users except those listed in `apply_to_except`.
+    - `0` — The quota applies to users specify in the `apply_to_list`.
+    - `1` — The quota applies to all users except those listed in `apply_to_except`.
 - `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../operations/access-rights.md#role-management) that the quota should be applied to.
 - `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/roles that the quota should not apply to.
 
diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md
index ed6be820b26..31aafd3e697 100644
--- a/docs/en/operations/system-tables/quotas_usage.md
+++ b/docs/en/operations/system-tables/quotas_usage.md
@@ -11,6 +11,10 @@ Columns:
 - `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
 - `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests in this interval.
 - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
+- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests in this interval.
+- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select requests.
+- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests in this interval.
+- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert requests.
 - `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
 - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
 - `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result.
diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md
index 8107f60b808..2903e0d3bd7 100644
--- a/docs/en/operations/system-tables/trace_log.md
+++ b/docs/en/operations/system-tables/trace_log.md
@@ -12,7 +12,7 @@ Columns:
 
 -   `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment.
 
--   `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment with microseconds precision.
+-   `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision.
 
 -   `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds.
 
diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md
index ddb4d305964..82ace5e81dc 100644
--- a/docs/en/operations/system-tables/zookeeper.md
+++ b/docs/en/operations/system-tables/zookeeper.md
@@ -1,12 +1,16 @@
 # system.zookeeper {#system-zookeeper}
 
 The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config.
-The query must have a ‘path’ equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for.
+The query must either have a ‘path =’   condition or a `path IN`  condition set with the `WHERE` clause as shown below. This corresponds to the path of the children in ZooKeeper that you want to get data for.
 
 The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node.
 To output data for all root nodes, write path = ‘/’.
 If the path specified in ‘path’ doesn’t exist, an exception will be thrown.
 
+The query `SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` outputs data for all children on the `/` and `/clickhouse` node.
+If in the specified ‘path’ collection has doesn't exist path, an exception will be thrown.
+It can be used to do a batch of ZooKeeper path queries.
+
 Columns:
 
 -   `name` (String) — The name of the node.
diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md
index edacf1ff973..9fa9c44e130 100644
--- a/docs/en/operations/update.md
+++ b/docs/en/operations/update.md
@@ -1,9 +1,9 @@
 ---
 toc_priority: 47
-toc_title: ClickHouse Update
+toc_title: ClickHouse Upgrade
 ---
 
-# ClickHouse Update {#clickhouse-update}
+# ClickHouse Upgrade {#clickhouse-upgrade}
 
 If ClickHouse was installed from `deb` packages, execute the following commands on the server:
 
@@ -16,3 +16,19 @@ $ sudo service clickhouse-server restart
 If you installed ClickHouse using something other than the recommended `deb` packages, use the appropriate update method.
 
 ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time.
+
+The upgrade of older version of ClickHouse to specific version:
+
+As an example:
+ 
+`xx.yy.a.b` is a current stable version. The latest stable version could be found [here](https://github.com/ClickHouse/ClickHouse/releases)
+
+```bash
+$ sudo apt-get update
+$ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b clickhouse-common-static=xx.yy.a.b
+$ sudo service clickhouse-server restart
+```
+
+
+
+
diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md
index 431968bc629..015c90e90c7 100644
--- a/docs/en/sql-reference/aggregate-functions/combinators.md
+++ b/docs/en/sql-reference/aggregate-functions/combinators.md
@@ -72,7 +72,7 @@ If an aggregate function doesn’t have input values, with this combinator it re
 <aggFunction>OrDefault(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Aggregate function parameters.
 
@@ -132,7 +132,7 @@ This combinator converts a result of an aggregate function to the [Nullable](../
 <aggFunction>OrNull(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Aggregate function parameters.
 
@@ -189,7 +189,7 @@ Lets you divide data into groups, and then separately aggregates the data in tho
 <aggFunction>Resample(start, end, step)(<aggFunction_params>, resampling_key)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `start` — Starting value of the whole required interval for `resampling_key` values.
 -   `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval doesn’t include the `stop` value `[start, stop)`.
diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
index 3b02e145ff4..035bc91b9ed 100644
--- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
@@ -17,10 +17,13 @@ histogram(number_of_bins)(values)
 
 The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf). The borders of histogram bins are adjusted as new data enters a function. In common case, the widths of bins are not equal.
 
+**Arguments**
+
+`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values.
+
 **Parameters**
 
 `number_of_bins` — Upper limit for the number of bins in the histogram. The function automatically calculates the number of bins. It tries to reach the specified number of bins, but if it fails, it uses fewer bins.
-`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values.
 
 **Returned values**
 
@@ -89,14 +92,16 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
 !!! warning "Warning"
     Events that occur at the same second may lay in the sequence in an undefined order affecting the result.
 
-**Parameters**
-
--   `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
+**Arguments**
 
 -   `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types.
 
 -   `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them.
 
+**Parameters**
+
+-   `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
+
 **Returned values**
 
 -   1, if the pattern is matched.
@@ -176,14 +181,16 @@ Counts the number of event chains that matched the pattern. The function searche
 sequenceCount(pattern)(timestamp, cond1, cond2, ...)
 ```
 
-**Parameters**
-
--   `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
+**Arguments**
 
 -   `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types.
 
 -   `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them.
 
+**Parameters**
+
+-   `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
+
 **Returned values**
 
 -   Number of non-overlapping event chains that are matched.
@@ -239,14 +246,17 @@ The function works according to the algorithm:
 windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
 ```
 
-**Parameters**
+**Arguments**
 
--   `window` — Length of the sliding window in seconds.
--   `mode` - It is an optional argument.
-    -   `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
 -   `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1).
 -   `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md).
 
+**Parameters**
+
+-   `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
+-   `mode` - It is an optional parameter.
+    -   `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
+
 **Returned value**
 
 The maximum number of consecutive triggered conditions from the chain within the sliding time window.
@@ -324,7 +334,7 @@ The conditions, except the first, apply in pairs: the result of the second will
 retention(cond1, cond2, ..., cond32);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `cond` — an expression that returns a `UInt8` result (1 or 0).
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
index 35e87d49e60..7639117042f 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
@@ -4,13 +4,42 @@ toc_priority: 106
 
 # argMax {#agg-function-argmax}
 
-Syntax: `argMax(arg, val)` or `argMax(tuple(arg, val))`
+Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, returns the first of these values encountered.
 
-Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, the first of these values encountered is output.
+Tuple version of this function will return the tuple with the maximum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
 
-Tuple version of this function will return the tuple with the maximum `val` value. It is convinient for use with `SimpleAggregateFunction`.
+**Syntax**
 
-**Example:**
+``` sql
+argMax(arg, val)
+```
+
+or
+
+``` sql
+argMax(tuple(arg, val))
+```
+
+**Arguments**
+
+-   `arg` — Argument.
+-   `val` — Value.
+
+**Returned value**
+
+-   `arg` value that corresponds to maximum `val` value.
+
+Type: matches `arg` type. 
+
+For tuple in the input:
+
+-   Tuple `(arg, val)`, where `val` is the maximum value and `arg` is a corresponding value.
+
+Type: [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Example**
+
+Input table:
 
 ``` text
 ┌─user─────┬─salary─┐
@@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the maximum `val` valu
 └──────────┴────────┘
 ```
 
+Query:
+
 ``` sql
-SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary
+SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
 ```
 
+Result:
+
 ``` text
 ┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
 │ director             │ ('director',5000)           │
 └──────────────────────┴─────────────────────────────┘
 ```
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
index 72c9bce6817..7ddc38cd28a 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
@@ -4,13 +4,42 @@ toc_priority: 105
 
 # argMin {#agg-function-argmin}
 
-Syntax: `argMin(arg, val)` or `argMin(tuple(arg, val))`
+Calculates the `arg` value for a minimum `val` value. If there are several different values of `arg` for minimum values of `val`, returns the first of these values encountered.
 
-Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, the first of these values encountered is output.
+Tuple version of this function will return the tuple with the minimum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
 
-Tuple version of this function will return the tuple with the minimal `val` value. It is convinient for use with `SimpleAggregateFunction`.
+**Syntax**
 
-**Example:**
+``` sql
+argMin(arg, val)
+```
+
+or
+
+``` sql
+argMin(tuple(arg, val))
+```
+
+**Arguments**
+
+-   `arg` — Argument.
+-   `val` — Value.
+
+**Returned value**
+
+-   `arg` value that corresponds to minimum `val` value.
+
+Type: matches `arg` type. 
+
+For tuple in the input:
+
+-   Tuple `(arg, val)`, where `val` is the minimum value and `arg` is a corresponding value.
+
+Type: [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Example**
+
+Input table:
 
 ``` text
 ┌─user─────┬─salary─┐
@@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the minimal `val` valu
 └──────────┴────────┘
 ```
 
+Query:
+
 ``` sql
-SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary
+SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary;
 ```
 
+Result:
+
 ``` text
 ┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐
 │ worker               │ ('worker',1000)             │
 └──────────────────────┴─────────────────────────────┘
 ```
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->
diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md
index e2e6aace734..12dc4ac1e9d 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/avg.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md
@@ -12,7 +12,7 @@ Calculates the arithmetic mean.
 avgWeighted(x)
 ```
 
-**Parameter**
+**Arguments**
 
 -   `x` — Values.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md
index 7b9c0de2755..2df09e560b4 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md
@@ -12,7 +12,7 @@ Calculates the [weighted arithmetic mean](https://en.wikipedia.org/wiki/Weighted
 avgWeighted(x, weight)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Values.
 -   `weight` — Weights of the values.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md
index e5d31429e12..0a5aef2fe97 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/count.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/count.md
@@ -10,7 +10,7 @@ ClickHouse supports the following syntaxes for `count`:
 - `count(expr)` or `COUNT(DISTINCT expr)`.
 - `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific.
 
-**Parameters**
+**Arguments**
 
 The function can take:
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md
new file mode 100644
index 00000000000..bb6f802ccaf
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md
@@ -0,0 +1,19 @@
+---
+toc_priority: 141
+---
+
+# deltaSum {#agg_functions-deltasum}
+
+Syntax: `deltaSum(value)`
+
+Adds the differences between consecutive rows. If the difference is negative, it is ignored. 
+`value` must be some integer or floating point type.
+
+Example:
+
+```sql
+select deltaSum(arrayJoin([1, 2, 3]));                  -- => 2
+select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));   -- => 7
+select deltaSum(arrayJoin([2.25, 3, 4.5])); -- => 2.25
+```
+
diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md
index f4b8665a0a4..68456bf7844 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md
@@ -17,7 +17,7 @@ If in one query several values are inserted into the same position, the function
 -   If a query is executed in a single thread, the first one of the inserted values is used.
 -   If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values.
 
-**Parameters**
+**Arguments**
 
 -   `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md).
 -   `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md
index 1cd40c2002f..c732efecf58 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md
@@ -13,7 +13,7 @@ groupArrayMovingAvg(window_size)(numbers_for_summing)
 
 The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column.
 
-**Parameters**
+**Arguments**
 
 -   `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value.
 -   `window_size` — Size of the calculation window.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md
index ef979cd5f6a..c3dfeda850e 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md
@@ -13,7 +13,7 @@ groupArrayMovingSum(window_size)(numbers_for_summing)
 
 The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column.
 
-**Parameters**
+**Arguments**
 
 -   `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value.
 -   `window_size` — Size of the calculation window.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md
index 36fa6a9d661..df0b8120eef 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md
@@ -12,7 +12,7 @@ Creates an array of sample argument values. The size of the resulting array is l
 groupArraySample(max_size[, seed])(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `max_size` — Maximum size of the resulting array. [UInt64](../../data-types/int-uint.md).
 -   `seed` — Seed for the random number generator. Optional. [UInt64](../../data-types/int-uint.md). Default value: `123456`.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
index 9be73fd54ec..1275ad7536c 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
@@ -10,7 +10,7 @@ Applies bitwise `AND` for series of numbers.
 groupBitAnd(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `UInt*` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md
index 9367652db38..9317ef98783 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md
@@ -10,7 +10,7 @@ Bitmap or Aggregate calculations from a unsigned integer column, return cardinal
 groupBitmap(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `UInt*` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md
index 7c0c89040bb..f59bb541a42 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md
@@ -10,7 +10,7 @@ Calculations the AND of a bitmap column, return cardinality of type UInt64, if a
 groupBitmapAnd(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md
index 894c6c90aab..a4d99fd29e3 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md
@@ -10,7 +10,7 @@ Calculations the OR of a bitmap column, return cardinality of type UInt64, if ad
 groupBitmapOr(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md
index 5d0ec0fb097..834f088d02f 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md
@@ -10,7 +10,7 @@ Calculations the XOR of a bitmap column, return cardinality of type UInt64, if a
 groupBitmapOr(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
index 7383e620060..e427a9ad970 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
@@ -10,7 +10,7 @@ Applies bitwise `OR` for series of numbers.
 groupBitOr(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `UInt*` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
index 01026012b91..4b8323f92db 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
@@ -10,7 +10,7 @@ Applies bitwise `XOR` for series of numbers.
 groupBitXor(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `UInt*` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md
index ea44d5f1ddd..313d6bf81f5 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md
@@ -13,7 +13,7 @@ Use it for tests or to process columns of types `AggregateFunction` and `Aggrega
 initializeAggregation (aggregate_function, column_1, column_2);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string).
 -   `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md
index 65e7e31b9b4..db402c99663 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md
@@ -10,7 +10,7 @@ Computes the [kurtosis](https://en.wikipedia.org/wiki/Kurtosis) of a sequence.
 kurtPop(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md
index 224bbbdb9e7..4bb9f76763b 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md
@@ -12,7 +12,7 @@ It represents an unbiased estimate of the kurtosis of a random variable if passe
 kurtSamp(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
new file mode 100644
index 00000000000..dc5fc45b878
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
@@ -0,0 +1,74 @@
+---
+toc_priority: 310
+toc_title: mannWhitneyUTest
+---
+
+# mannWhitneyUTest {#mannwhitneyutest}
+
+Applies the Mann-Whitney rank test to samples from two populations.
+
+**Syntax**
+
+``` sql
+mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index)
+```
+
+Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. 
+The null hypothesis is that two populations are stochastically equal. Also one-sided hypothesises can be tested. This test does not assume that data have normal distribution.
+
+**Arguments**
+
+-   `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Parameters**
+
+-   `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
+    -   `'two-sided'`;
+    -   `'greater'`;
+    -   `'less'`.
+-   `continuity_correction` - if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Returned values**
+
+[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+
+-   calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md).
+-   calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Example**
+
+Input table:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│          10 │            0 │
+│          11 │            0 │
+│          12 │            0 │
+│           1 │            1 │
+│           2 │            1 │
+│           3 │            1 │
+└─────────────┴──────────────┘
+```
+
+Query:
+
+``` sql
+SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest;
+```
+
+Result:
+
+``` text
+┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐
+│ (9,0.04042779918503192)                                │
+└────────────────────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [Mann–Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test)
+-   [Stochastic ordering](https://en.wikipedia.org/wiki/Stochastic_ordering)
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest/) <!--hide-->
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md
index 77f858a1735..d625ef4cfd9 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantile.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md
@@ -18,7 +18,7 @@ quantile(level)(expr)
 
 Alias: `median`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md
index 6046447dd10..a20ac26f599 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md
@@ -18,7 +18,7 @@ quantileDeterministic(level)(expr, determinator)
 
 Alias: `medianDeterministic`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md
index a39f724f368..06ef7ccfbd3 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md
@@ -18,7 +18,7 @@ quantileExact(level)(expr)
 
 Alias: `medianExact`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
@@ -77,7 +77,7 @@ quantileExact(level)(expr)
 
 Alias: `medianExactLow`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
@@ -128,7 +128,7 @@ quantileExactHigh(level)(expr)
 
 Alias: `medianExactHigh`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md
index 3251f8298a6..210f44e7587 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md
@@ -18,7 +18,7 @@ quantileExactWeighted(level)(expr, weight)
 
 Alias: `medianExactWeighted`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md
index bda98ea338d..dcc665a68af 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md
@@ -20,7 +20,7 @@ quantileTDigest(level)(expr)
 
 Alias: `medianTDigest`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md
index 309cbe95e95..56ef598f7e7 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md
@@ -20,7 +20,7 @@ quantileTDigest(level)(expr)
 
 Alias: `medianTDigest`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md
index 867e8b87e74..58ce6495a96 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md
@@ -18,7 +18,7 @@ quantileTiming(level)(expr)
 
 Alias: `medianTiming`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md
index 0f8606986c8..fb3b9dbf4d2 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md
@@ -18,7 +18,7 @@ quantileTimingWeighted(level)(expr, weight)
 
 Alias: `medianTimingWeighted`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 
@@ -79,6 +79,40 @@ Result:
 └───────────────────────────────────────────────┘
 ```
 
+# quantilesTimingWeighted {#quantilestimingweighted}
+
+Same as `quantileTimingWeighted`, but accept multiple parameters with quantile levels and return an Array filled with many values of that quantiles.
+
+
+**Example**
+
+Input table:
+
+``` text
+┌─response_time─┬─weight─┐
+│            68 │      1 │
+│           104 │      2 │
+│           112 │      3 │
+│           126 │      2 │
+│           138 │      1 │
+│           162 │      1 │
+└───────────────┴────────┘
+```
+
+Query:
+
+``` sql
+SELECT quantilesTimingWeighted(0,5, 0.99)(response_time, weight) FROM t
+```
+
+Result:
+
+``` text
+┌─quantilesTimingWeighted(0.5, 0.99)(response_time, weight)─┐
+│ [112,162]                                                 │
+└───────────────────────────────────────────────────────────┘
+```
+
 **See Also**
 
 -   [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md
index dc23029f239..55ee1b8289b 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md
@@ -8,7 +8,7 @@ Computes a rank correlation coefficient.
 rankCorr(x, y)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64).
 -   `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md
index d15a5ffdd47..b9dfc390f9d 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md
@@ -10,7 +10,7 @@ Computes the [skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence.
 skewPop(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md
index cb323f4b142..f7a6df8f507 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md
@@ -12,7 +12,7 @@ It represents an unbiased estimate of the skewness of a random variable if passe
 skewSamp(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
new file mode 100644
index 00000000000..a1d7ae33fe1
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
@@ -0,0 +1,66 @@
+---
+toc_priority: 300
+toc_title: studentTTest
+---
+
+# studentTTest {#studentttest}
+
+Applies Student's t-test to samples from two populations. 
+
+**Syntax**
+
+``` sql
+studentTTest(sample_data, sample_index)
+```
+
+Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
+The null hypothesis is that means of populations are equal. Normal distribution with equal variances is assumed.
+
+**Arguments**
+
+-   `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Returned values**
+
+[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+
+-   calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
+-   calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Example**
+
+Input table:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│        20.3 │            0 │
+│        21.1 │            0 │
+│        21.9 │            1 │
+│        21.7 │            0 │
+│        19.9 │            1 │
+│        21.8 │            1 │
+└─────────────┴──────────────┘
+```
+
+Query:
+
+``` sql
+SELECT studentTTest(sample_data, sample_index) FROM student_ttest;
+```
+
+Result:
+
+``` text
+┌─studentTTest(sample_data, sample_index)───┐
+│ (-0.21739130434783777,0.8385421208415731) │
+└───────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test)
+-   [welchTTest function](welchttest.md#welchttest)
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/studentttest/) <!--hide-->
diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md
index 004a67d33af..b3e79803ba1 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/topk.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md
@@ -16,7 +16,7 @@ This function doesn’t provide a guaranteed result. In certain situations, erro
 
 We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`.
 
-**Parameters**
+**Arguments**
 
 -   ‘N’ is the number of elements to return.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md
index b597317f44e..02b9f77ea6f 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md
@@ -12,7 +12,7 @@ Similar to `topK` but takes one additional argument of integer type - `weight`.
 topKWeighted(N)(x, weight)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `N` — The number of elements to return.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md
index 81d1ec6761e..7ba2cdc6cb8 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md
@@ -10,7 +10,7 @@ Calculates the approximate number of different values of the argument.
 uniq(x[, ...])
 ```
 
-**Parameters**
+**Arguments**
 
 The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md
index c52486bc38f..4434686ae61 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md
@@ -12,7 +12,7 @@ uniqCombined(HLL_precision)(x[, ...])
 
 The `uniqCombined` function is a good choice for calculating the number of different values.
 
-**Parameters**
+**Arguments**
 
 The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md
index 9a6224533c8..eee675016ee 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md
@@ -14,7 +14,7 @@ Use the `uniqExact` function if you absolutely need an exact result. Otherwise u
 
 The `uniqExact` function uses more memory than `uniq`, because the size of the state has unbounded growth as the number of different values increases.
 
-**Parameters**
+**Arguments**
 
 The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md
index fcddc22cc46..5b23ea81eae 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md
@@ -10,7 +10,7 @@ Calculates the approximate number of different argument values, using the [Hyper
 uniqHLL12(x[, ...])
 ```
 
-**Parameters**
+**Arguments**
 
 The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
new file mode 100644
index 00000000000..b391fb1d979
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
@@ -0,0 +1,66 @@
+---
+toc_priority: 301
+toc_title: welchTTest
+---
+
+# welchTTest {#welchttest}
+
+Applies Welch's t-test to samples from two populations. 
+
+**Syntax**
+
+``` sql
+welchTTest(sample_data, sample_index)
+```
+
+Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
+The null hypothesis is that means of populations are equal. Normal distribution is assumed. Populations may have unequal variance.
+
+**Arguments**
+
+-   `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Returned values**
+
+[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+
+-   calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
+-   calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Example**
+
+Input table:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│        20.3 │            0 │
+│        22.1 │            0 │
+│        21.9 │            0 │
+│        18.9 │            1 │
+│        20.3 │            1 │
+│          19 │            1 │
+└─────────────┴──────────────┘
+```
+
+Query:
+
+``` sql
+SELECT welchTTest(sample_data, sample_index) FROM welch_ttest;
+```
+
+Result:
+
+``` text
+┌─welchTTest(sample_data, sample_index)─────┐
+│ (2.7988719532211235,0.051807360348581945) │
+└───────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test)
+-   [studentTTest function](studentttest.md#studentttest)
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/welchTTest/) <!--hide-->
diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md
index 48957498d63..41e35aaa96f 100644
--- a/docs/en/sql-reference/data-types/array.md
+++ b/docs/en/sql-reference/data-types/array.md
@@ -45,6 +45,8 @@ SELECT [1, 2] AS x, toTypeName(x)
 
 ## Working with Data Types {#working-with-data-types}
 
+The maximum size of an array is limited to one million elements. 
+
 When creating an array on the fly, ClickHouse automatically defines the argument type as the narrowest data type that can store all the listed arguments. If there are any [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) or literal [NULL](../../sql-reference/syntax.md#null-literal) values, the type of an array element also becomes [Nullable](../../sql-reference/data-types/nullable.md).
 
 If ClickHouse couldn’t determine the data type, it generates an exception. For instance, this happens when trying to create an array with strings and numbers simultaneously (`SELECT array(1, 'a')`).
diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md
new file mode 100644
index 00000000000..58634e5b669
--- /dev/null
+++ b/docs/en/sql-reference/data-types/map.md
@@ -0,0 +1,83 @@
+---
+toc_priority: 65
+toc_title: Map(key, value)
+---
+
+# Map(key, value) {#data_type-map}
+
+`Map(key, value)` data type stores `key:value` pairs. 
+
+**Parameters** 
+-   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
+
+!!! warning "Warning"
+    Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
+
+To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity.
+
+**Examples**
+
+Consider the table:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
+INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30});
+```
+
+Select all `key2` values: 
+
+```sql
+SELECT a['key2'] FROM table_map;
+```
+Result:
+
+```text
+┌─arrayElement(a, 'key2')─┐
+│                      10 │
+│                      20 │
+│                      30 │
+└─────────────────────────┘
+```
+
+If there's no such `key` in the `Map()` column, the query returns zeros for numerical values, empty strings or empty arrays. 
+
+```sql
+INSERT INTO table_map VALUES ({'key3':100}), ({});
+SELECT a['key3'] FROM table_map;
+```
+
+Result:
+
+```text
+┌─arrayElement(a, 'key3')─┐
+│                     100 │
+│                       0 │
+└─────────────────────────┘
+┌─arrayElement(a, 'key3')─┐
+│                       0 │
+│                       0 │
+│                       0 │
+└─────────────────────────┘
+```
+
+## Convert Tuple to Map Type {#map-and-tuple}
+
+You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
+
+``` sql
+SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
+```
+
+``` text
+┌─map───────────────────────────┐
+│ {1:'Ready',2:'Steady',3:'Go'} │
+└───────────────────────────────┘
+```
+
+**See Also**
+
+-   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
+-   [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
+
+[Original article](https://clickhouse.tech/docs/en/data-types/map/) <!--hide-->
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
index 05c418b1f15..efef91b4b09 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
@@ -208,8 +208,8 @@ This function returns the value for the specified `id`s and the date range that
 Details of the algorithm:
 
 -   If the `id` is not found or a range is not found for the `id`, it returns the default value for the dictionary.
--   If there are overlapping ranges, you can use any.
--   If the range delimiter is `NULL` or an invalid date (such as 1900-01-01 or 2039-01-01), the range is left open. The range can be open on both sides.
+-   If there are overlapping ranges, it returns value for any (random) range.
+-   If the range delimiter is `NULL` or an invalid date (such as 1900-01-01), the range is open. The range can be open on both sides.
 
 Configuration example:
 
diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index dc7727bdfd8..c9c418d57a4 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -61,7 +61,7 @@ Combines arrays passed as arguments.
 arrayConcat(arrays)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type.
     **Example**
@@ -111,7 +111,7 @@ Checks whether one array is a subset of another.
 hasAll(set, subset)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `set` – Array of any type with a set of elements.
 -   `subset` – Array of any type with elements that should be tested to be a subset of `set`.
@@ -149,7 +149,7 @@ Checks whether two arrays have intersection by some elements.
 hasAny(array1, array2)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array1` – Array of any type with a set of elements.
 -   `array2` – Array of any type with a set of elements.
@@ -191,7 +191,7 @@ For Example:
 - `hasSubstr([1,2,3,4], [2,3])` returns 1. However, `hasSubstr([1,2,3,4], [3,2])` will return `0`.
 - `hasSubstr([1,2,3,4], [1,2,3])` returns 1. However, `hasSubstr([1,2,3,4], [1,2,4])` will return `0`.
 
-**Parameters**
+**Arguments**
 
 -   `array1` – Array of any type with a set of elements.
 -   `array2` – Array of any type with a set of elements.
@@ -369,7 +369,7 @@ Removes the last item from the array.
 arrayPopBack(array)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – Array.
 
@@ -393,7 +393,7 @@ Removes the first item from the array.
 arrayPopFront(array)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – Array.
 
@@ -417,7 +417,7 @@ Adds one item to the end of the array.
 arrayPushBack(array, single_value)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – Array.
 -   `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`.
@@ -442,7 +442,7 @@ Adds one element to the beginning of the array.
 arrayPushFront(array, single_value)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – Array.
 -   `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`.
@@ -467,7 +467,7 @@ Changes the length of the array.
 arrayResize(array, size[, extender])
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `array` — Array.
 -   `size` — Required length of the array.
@@ -509,7 +509,7 @@ Returns a slice of the array.
 arraySlice(array, offset[, length])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – Array of data.
 -   `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
@@ -751,7 +751,7 @@ Calculates the difference between adjacent array elements. Returns an array wher
 arrayDifference(array)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/).
 
@@ -803,7 +803,7 @@ Takes an array, returns an array containing the distinct elements only.
 arrayDistinct(array)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/).
 
@@ -871,7 +871,7 @@ Applies an aggregate function to array elements and returns its result. The name
 arrayReduce(agg_func, arr1, arr2, ..., arrN)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md).
 -   `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function.
@@ -936,7 +936,7 @@ Applies an aggregate function to array elements in given ranges and returns an a
 arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md).
 -   `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range.
@@ -1007,7 +1007,7 @@ flatten(array_of_arrays)
 
 Alias: `flatten`.
 
-**Parameters**
+**Arguments**
 
 -   `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`.
 
@@ -1033,7 +1033,7 @@ Removes consecutive duplicate elements from an array. The order of result values
 arrayCompact(arr)
 ```
 
-**Parameters**
+**Arguments**
 
 `arr` — The [array](../../sql-reference/data-types/array.md) to inspect.
 
@@ -1069,7 +1069,7 @@ Combines multiple arrays into a single array. The resulting array contains the c
 arrayZip(arr1, arr2, ..., arrN)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `arrN` — [Array](../../sql-reference/data-types/array.md).
 
@@ -1107,7 +1107,7 @@ Calculate AUC (Area Under the Curve, which is a concept in machine learning, see
 arrayAUC(arr_scores, arr_labels)
 ```
 
-**Parameters**
+**Arguments**
 - `arr_scores` — scores prediction model gives.
 - `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negtive sample.
 
@@ -1288,73 +1288,226 @@ Returns the index of the first element in the `arr1` array for which `func` retu
 
 Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
 
-## arrayMin(\[func,\] arr1, …) {#array-min}
+## arrayMin {#array-min}
 
-Returns the min of the `func` values. If the function is omitted, it just returns the min of the array elements.
+Returns the minimum of elements in the source array. 
+
+If the `func` function is specified, returns the mininum of elements converted by this function.
 
 Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
-Examples:
+**Syntax**
+
 ```sql
-SELECT arrayMin([1, 2, 4]) AS res
+arrayMin([func,] arr)
+```
+
+**Arguments**
+
+-   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — Array. [Array](../../sql-reference/data-types/array.md).
+
+**Returned value**
+
+-   The minimum of function values (or the array minimum). 
+
+Type: if `func` is specified, matches `func` return value type, else matches the array elements type. 
+
+**Examples**
+
+Query:
+
+```sql
+SELECT arrayMin([1, 2, 4]) AS res;
+```
+
+Result:
+
+```text
 ┌─res─┐
 │   1 │
 └─────┘
+```
 
+Query:
 
-SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res
+```sql
+SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res;
+```
+
+Result:
+
+```text
 ┌─res─┐
 │  -4 │
 └─────┘
 ```
 
-## arrayMax(\[func,\] arr1, …) {#array-max}
+## arrayMax {#array-max}
 
-Returns the max of the `func` values. If the function is omitted, it just returns the max of the array elements.
+Returns the maximum of elements in the source array. 
+
+If the `func` function is specified, returns the maximum of elements converted by this function.
 
 Note that the `arrayMax` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
-Examples:
+**Syntax**
+
 ```sql
-SELECT arrayMax([1, 2, 4]) AS res
+arrayMax([func,] arr)
+```
+
+**Arguments**
+
+-   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — Array. [Array](../../sql-reference/data-types/array.md).
+
+**Returned value**
+
+-   The maximum of function values (or the array maximum). 
+
+Type: if `func` is specified, matches `func` return value type, else matches the array elements type. 
+
+**Examples**
+
+Query:
+
+```sql
+SELECT arrayMax([1, 2, 4]) AS res;
+```
+
+Result:
+
+```text
 ┌─res─┐
 │   4 │
 └─────┘
+```
 
+Query:
 
-SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res
+```sql
+SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res;
+```
+
+Result:
+
+```text
 ┌─res─┐
 │  -1 │
 └─────┘
 ```
 
-## arraySum(\[func,\] arr1, …) {#array-sum}
+## arraySum {#array-sum}
 
-Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements.
+Returns the sum of elements in the source array. 
+
+If the `func` function is specified, returns the sum of elements converted by this function.
 
 Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
-Examples:
+**Syntax**
+
 ```sql
-SELECT arraySum([2,3]) AS res
+arraySum([func,] arr)
+```
+
+**Arguments**
+
+-   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — Array. [Array](../../sql-reference/data-types/array.md).   
+
+**Returned value**
+
+-   The sum of the function values (or the array sum).
+
+Type: for decimal numbers in source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md).
+
+**Examples**
+
+Query:
+
+```sql
+SELECT arraySum([2, 3]) AS res;
+```
+
+Result:
+
+```text
 ┌─res─┐
 │   5 │
 └─────┘
+```
 
+Query:
 
-SELECT arraySum(x -> x*x, [2, 3]) AS res
+```sql
+SELECT arraySum(x -> x*x, [2, 3]) AS res;
+```
+
+Result:
+
+```text
 ┌─res─┐
 │  13 │
 └─────┘
 ```
 
+## arrayAvg {#array-avg}
 
-## arrayAvg(\[func,\] arr1, …) {#array-avg}
+Returns the average of elements in the source array. 
 
-Returns the average of the `func` values. If the function is omitted, it just returns the average of the array elements.
+If the `func` function is specified, returns the average of elements converted by this function.
 
 Note that the `arrayAvg` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
+**Syntax**
+
+```sql
+arrayAvg([func,] arr)
+```
+
+**Arguments**
+
+-   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — Array. [Array](../../sql-reference/data-types/array.md).   
+
+**Returned value**
+
+-   The average of function values (or the array average).
+
+Type: [Float64](../../sql-reference/data-types/float.md).
+
+**Examples**
+
+Query:
+
+```sql
+SELECT arrayAvg([1, 2, 4]) AS res;
+```
+
+Result:
+
+```text
+┌────────────────res─┐
+│ 2.3333333333333335 │
+└────────────────────┘
+```
+
+Query:
+
+```sql
+SELECT arrayAvg(x -> (x * x), [2, 4]) AS res;
+```
+
+Result:
+
+```text
+┌─res─┐
+│  10 │
+└─────┘
+```
+
 ## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
 
 Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by this function before summing.
diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md
index 57c2ae42ada..a3d0c82d8ab 100644
--- a/docs/en/sql-reference/functions/bit-functions.md
+++ b/docs/en/sql-reference/functions/bit-functions.md
@@ -35,7 +35,7 @@ Takes any integer and converts it into [binary form](https://en.wikipedia.org/wi
 SELECT bitTest(number, index)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `number` – integer number.
 -   `index` – position of bit.
@@ -100,7 +100,7 @@ The conjuction for bitwise operations:
 SELECT bitTestAll(number, index1, index2, index3, index4, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `number` – integer number.
 -   `index1`, `index2`, `index3`, `index4` – positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`).
@@ -165,7 +165,7 @@ The disjunction for bitwise operations:
 SELECT bitTestAny(number, index1, index2, index3, index4, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `number` – integer number.
 -   `index1`, `index2`, `index3`, `index4` – positions of bit.
@@ -220,7 +220,7 @@ Calculates the number of bits set to one in the binary representation of a numbe
 bitCount(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers.
 
diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md
index a66098beffb..bfff70576f2 100644
--- a/docs/en/sql-reference/functions/bitmap-functions.md
+++ b/docs/en/sql-reference/functions/bitmap-functions.md
@@ -21,7 +21,7 @@ Build a bitmap from unsigned integer array.
 bitmapBuild(array)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – unsigned integer array.
 
@@ -45,7 +45,7 @@ Convert bitmap to integer array.
 bitmapToArray(bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -69,7 +69,7 @@ Return subset in specified range (not include the range_end).
 bitmapSubsetInRange(bitmap, range_start, range_end)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
 -   `range_start` – range start point. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
@@ -97,7 +97,7 @@ Creates a subset of bitmap with n elements taken between `range_start` and `card
 bitmapSubsetLimit(bitmap, range_start, cardinality_limit)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
 -   `range_start` – The subset starting point. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
@@ -133,7 +133,7 @@ Checks whether the bitmap contains an element.
 bitmapContains(haystack, needle)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` – [Bitmap object](#bitmap_functions-bitmapbuild), where the function searches.
 -   `needle` – Value that the function searches. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
@@ -167,7 +167,7 @@ bitmapHasAny(bitmap1, bitmap2)
 
 If you are sure that `bitmap2` contains strictly one element, consider using the [bitmapContains](#bitmap_functions-bitmapcontains) function. It works more efficiently.
 
-**Parameters**
+**Arguments**
 
 -   `bitmap*` – bitmap object.
 
@@ -197,7 +197,7 @@ If the second argument is an empty bitmap then returns 1.
 bitmapHasAll(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -221,7 +221,7 @@ Retrun bitmap cardinality of type UInt64.
 bitmapCardinality(bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -243,7 +243,7 @@ Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is em
 
     bitmapMin(bitmap)
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -263,7 +263,7 @@ Retrun the greatest value of type UInt64 in the set, 0 if the set is empty.
 
     bitmapMax(bitmap)
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -283,7 +283,7 @@ Transform an array of values in a bitmap to another array of values, the result
 
     bitmapTransform(bitmap, from_array, to_array)
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 -   `from_array` – UInt32 array. For idx in range \[0, from_array.size()), if bitmap contains from_array\[idx\], then replace it with to_array\[idx\]. Note that the result depends on array ordering if there are common elements between from_array and to_array.
@@ -307,7 +307,7 @@ Two bitmap and calculation, the result is a new bitmap.
 bitmapAnd(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -331,7 +331,7 @@ Two bitmap or calculation, the result is a new bitmap.
 bitmapOr(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -355,7 +355,7 @@ Two bitmap xor calculation, the result is a new bitmap.
 bitmapXor(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -379,7 +379,7 @@ Two bitmap andnot calculation, the result is a new bitmap.
 bitmapAndnot(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -403,7 +403,7 @@ Two bitmap and calculation, return cardinality of type UInt64.
 bitmapAndCardinality(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -427,7 +427,7 @@ Two bitmap or calculation, return cardinality of type UInt64.
 bitmapOrCardinality(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -451,7 +451,7 @@ Two bitmap xor calculation, return cardinality of type UInt64.
 bitmapXorCardinality(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -475,7 +475,7 @@ Two bitmap andnot calculation, return cardinality of type UInt64.
 bitmapAndnotCardinality(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md
index 446a4729ff2..2d57cbb3bd5 100644
--- a/docs/en/sql-reference/functions/conditional-functions.md
+++ b/docs/en/sql-reference/functions/conditional-functions.md
@@ -17,7 +17,7 @@ SELECT if(cond, then, else)
 
 If the condition `cond` evaluates to a non-zero value, returns the result of the expression `then`, and the result of the expression `else`, if present, is skipped. If the `cond` is zero or `NULL`, then the result of the `then` expression is skipped and the result of the `else` expression, if present, is returned.
 
-**Parameters**
+**Arguments**
 
 -   `cond` – The condition for evaluation that can be zero or not. The type is UInt8, Nullable(UInt8) or NULL.
 -   `then` - The expression to return if condition is met.
@@ -117,7 +117,7 @@ Allows you to write the [CASE](../../sql-reference/operators/index.md#operator_c
 
 Syntax: `multiIf(cond_1, then_1, cond_2, then_2, ..., else)`
 
-**Parameters:**
+**Arguments:**
 
 -   `cond_N` — The condition for the function to return `then_N`.
 -   `then_N` — The result of the function when executed.
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 9de780fb596..f26e1bee6c9 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -186,7 +186,7 @@ Truncates sub-seconds.
 toStartOfSecond(value[, timezone])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
 -   `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
@@ -328,7 +328,7 @@ For mode values with a meaning of “contains January 1”, the week contains Ja
 toWeek(date, [, mode][, Timezone])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `date` – Date or DateTime.
 -   `mode` – Optional parameter, Range of values is \[0,9\], default is 0.
@@ -378,9 +378,9 @@ date_trunc(unit, value[, timezone])
 
 Alias: `dateTrunc`. 
 
-**Parameters**
+**Arguments**
 
--   `unit` — Part of date. [String](../syntax.md#syntax-string-literal).
+-   `unit` — The type of interval to truncate the result. [String Literal](../syntax.md#syntax-string-literal).
     Possible values:
 
     - `second`
@@ -435,6 +435,201 @@ Result:
 
 -   [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone)
 
+## date\_add {#date_add}
+
+Adds specified date/time interval to the provided date.
+
+**Syntax** 
+
+``` sql
+date_add(unit, value, date)
+```
+
+Aliases: `dateAdd`, `DATE_ADD`. 
+
+**Arguments**
+
+-   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
+
+        Supported values: second, minute, hour, day, week, month, quarter, year.
+-   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md)    
+-   `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+
+
+**Returned value**
+
+Returns Date or DateTime with `value` expressed in `unit` added to `date`. 
+
+**Example**
+
+```sql
+select date_add(YEAR, 3, toDate('2018-01-01'));
+```
+
+```text
+┌─plus(toDate('2018-01-01'), toIntervalYear(3))─┐
+│                                    2021-01-01 │
+└───────────────────────────────────────────────┘
+```
+
+## date\_diff {#date_diff}
+
+Returns the difference between two Date or DateTime values.
+
+**Syntax**
+
+``` sql
+date_diff('unit', startdate, enddate, [timezone])
+```
+
+Aliases: `dateDiff`, `DATE_DIFF`. 
+
+**Arguments**
+
+-   `unit` — The type of interval for result [String](../../sql-reference/data-types/string.md).
+
+        Supported values: second, minute, hour, day, week, month, quarter, year.
+
+-   `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+
+-   `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+
+-   `timezone` — Optional parameter. If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified.
+
+**Returned value**
+
+Difference between `enddate` and `startdate` expressed in `unit`.
+
+Type: `int`.
+
+**Example**
+
+Query:
+
+``` sql
+SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'));
+```
+
+Result:
+
+``` text
+┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐
+│                                                                                     25 │
+└────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+## date\_sub {#date_sub}
+
+Subtracts a time/date interval from the provided date.
+
+**Syntax**
+
+``` sql
+date_sub(unit, value, date)
+```
+
+Aliases: `dateSub`, `DATE_SUB`. 
+
+**Arguments**
+
+-   `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md).
+
+        Supported values: second, minute, hour, day, week, month, quarter, year.
+-   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md)    
+-   `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md) to subtract value from.
+
+**Returned value**
+
+Returns Date or DateTime with `value` expressed in `unit` subtracted from `date`. 
+
+**Example**
+
+Query:
+
+``` sql
+SELECT date_sub(YEAR, 3, toDate('2018-01-01'));
+```
+
+Result:
+
+``` text
+┌─minus(toDate('2018-01-01'), toIntervalYear(3))─┐
+│                                     2015-01-01 │
+└────────────────────────────────────────────────┘
+```
+
+## timestamp\_add {#timestamp_add}
+
+Adds the specified time value with the provided date or date time value.
+
+**Syntax** 
+
+``` sql
+timestamp_add(date, INTERVAL value unit)
+```
+
+Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. 
+
+**Arguments**
+    
+-   `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+-   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md)
+-   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
+
+        Supported values: second, minute, hour, day, week, month, quarter, year.
+
+**Returned value**
+
+Returns Date or DateTime with the specified `value`  expressed in `unit` added to `date`. 
+    
+**Example**
+
+```sql
+select timestamp_add(toDate('2018-01-01'), INTERVAL 3 MONTH);
+```
+
+```text
+┌─plus(toDate('2018-01-01'), toIntervalMonth(3))─┐
+│                                     2018-04-01 │
+└────────────────────────────────────────────────┘
+```
+
+## timestamp\_sub {#timestamp_sub}
+
+Returns the difference between two dates in the specified unit.
+
+**Syntax** 
+
+``` sql
+timestamp_sub(unit, value, date)
+```
+
+Aliases: `timeStampSub`, `TIMESTAMP_SUB`. 
+
+**Arguments**
+
+-   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
+
+        Supported values: second, minute, hour, day, week, month, quarter, year.
+-   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md).   
+-   `date`- [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+
+**Returned value**
+
+Difference between `date` and the specified `value` expressed in `unit`.
+
+**Example**
+
+```sql
+select timestamp_sub(MONTH, 5, toDateTime('2018-12-18 01:02:03'));
+```
+
+```text
+┌─minus(toDateTime('2018-12-18 01:02:03'), toIntervalMonth(5))─┐
+│                                          2018-07-18 01:02:03 │
+└──────────────────────────────────────────────────────────────┘
+```
+    
 ## now {#now}
 
 Returns the current date and time. 
@@ -445,7 +640,7 @@ Returns the current date and time.
 now([timezone])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md).
 
@@ -550,50 +745,6 @@ SELECT
 └──────────────────────────┴───────────────────────────────┘
 ```
 
-## dateDiff {#datediff}
-
-Returns the difference between two Date or DateTime values.
-
-**Syntax**
-
-``` sql
-dateDiff('unit', startdate, enddate, [timezone])
-```
-
-**Parameters**
-
--   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-
-        Supported values: second, minute, hour, day, week, month, quarter, year.
-
--   `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
-
--   `enddate` — The second time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
-
--   `timezone` — Optional parameter. If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified.
-
-**Returned value**
-
-Difference between `startdate` and `enddate` expressed in `unit`.
-
-Type: `int`.
-
-**Example**
-
-Query:
-
-``` sql
-SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'));
-```
-
-Result:
-
-``` text
-┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐
-│                                                                                     25 │
-└────────────────────────────────────────────────────────────────────────────────────────┘
-```
-
 ## timeSlots(StartTime, Duration,\[, Size\]) {#timeslotsstarttime-duration-size}
 
 For a time interval starting at ‘StartTime’ and continuing for ‘Duration’ seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the ‘Size’ in seconds. ‘Size’ is an optional parameter: a constant UInt32, set to 1800 by default.
@@ -704,7 +855,7 @@ Converts a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Prolepti
 toModifiedJulianDay(date)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
 
@@ -740,7 +891,7 @@ Similar to [toModifiedJulianDay()](#tomodifiedjulianday), but instead of raising
 toModifiedJulianDayOrNull(date)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
 
@@ -776,7 +927,7 @@ Converts a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Varian
 fromModifiedJulianDay(day)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md).
 
@@ -812,7 +963,7 @@ Similar to [fromModifiedJulianDayOrNull()](#frommodifiedjuliandayornull), but in
 fromModifiedJulianDayOrNull(day)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md).
 
diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md
index bc3f5ca4345..31e84c08b39 100644
--- a/docs/en/sql-reference/functions/encoding-functions.md
+++ b/docs/en/sql-reference/functions/encoding-functions.md
@@ -15,7 +15,7 @@ Returns the string with the length as the number of passed arguments and each by
 char(number_1, [number_2, ..., number_n]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md).
 
@@ -107,7 +107,7 @@ For `String` and `FixedString`, all bytes are simply encoded as two hexadecimal
 
 Values of floating point and Decimal types are encoded as their representation in memory. As we support little endian architecture, they are encoded in little endian. Zero leading/trailing bytes are not omitted.
 
-**Parameters**
+**Arguments**
 
 -   `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md
index bef2f8137d0..0dd7469b25e 100644
--- a/docs/en/sql-reference/functions/encryption-functions.md
+++ b/docs/en/sql-reference/functions/encryption-functions.md
@@ -11,7 +11,7 @@ Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128
 
 Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored). 
 
-Note that these functions work slowly.
+Note that these functions work slowly until ClickHouse 21.1.
 
 ## encrypt {#encrypt}
 
@@ -31,7 +31,7 @@ This function encrypts data using these modes:
 encrypt('mode', 'plaintext', 'key' [, iv, aad])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
 -   `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
@@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
 
 **Returned value**
 
--   Ciphered String. [String](../../sql-reference/data-types/string.md#string).
+-   Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
 
 **Examples**
 
@@ -52,57 +52,38 @@ Query:
 ``` sql
 CREATE TABLE encryption_test
 (
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
+    `comment` String,
+    `secret` String
+)
+ENGINE = Memory
 ```
 
-Insert this data:
+Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes:
 
 Query:
 
 ``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
+INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
+('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
+('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
+('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
 ```
 
-Example without `iv`:
-
 Query:
 
 ``` sql
-SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
+SELECT comment, hex(secret) FROM encryption_test;
 ```
 
 Result:
 
 ``` text
-┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
-│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F                                 │
-│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03                                 │
-│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
-└─────────────┴──────────────────────────────────────────────────────────────────┘
-```
-
-Example with `iv`:
-
-Query:
-
-``` sql
-SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
-```
-
-Result:
-
-``` text
-┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
-│ aes-256-ctr │                                               │
-│ aes-256-ctr │ 7FB039F7                                      │
-│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949        │
-└─────────────┴───────────────────────────────────────────────┘
+┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
+│ aes-256-cfb128 no IV                │ B4972BDC4459                     │
+│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9                     │
+│ aes-256-cfb128 with IV              │ 5E6CB398F653                     │
+│ aes-256-cbc no IV                   │ 1BC0629A92450D9E73A00E7D02CF4142 │
+└─────────────────────────────────────┴──────────────────────────────────┘
 ```
 
 Example with `-gcm`:
@@ -110,40 +91,26 @@ Example with `-gcm`:
 Query:
 
 ``` sql
-SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
+INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \
+('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad'));
+
+SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
 ```
 
 Result:
 
 ``` text
-┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
-│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA                                       │
-│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414                               │
-│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
-└─────────────┴────────────────────────────────────────────────────────────────────────┘
-```
-
-Example with `-gcm` mode and with `aad`:
-
-Query:
-
-``` sql
-SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
-```
-
-Result:
-
-``` text
-┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
-│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB                                       │
-│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447                               │
-│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
-└─────────────┴────────────────────────────────────────────────────────────────────────┘
+┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
+│ aes-256-gcm          │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
+│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
+└──────────────────────┴──────────────────────────────────────────────┘
 ```
 
 ## aes_encrypt_mysql {#aes_encrypt_mysql}
 
-Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
+Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
+
+Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`.
 
 Supported encryption modes:
 
@@ -156,86 +123,106 @@ Supported encryption modes:
 
 **Syntax**
 
-```sql
+``` sql
 aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
 -   `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
--   `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
--   `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
+-   `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
+-   `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
 
 **Returned value**
 
--   Ciphered String. [String](../../sql-reference/data-types/string.md#string).
+- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
+
 
 **Examples**
 
-Create this table:
+Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext:
 
 Query:
 
 ``` sql
-CREATE TABLE encryption_test
-(
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
+SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
 ```
 
-Insert this data:
+Result:
 
-Query:
-
-``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
+```
+┌─ciphertexts_equal─┐
+│                 1 │
+└───────────────────┘
 ```
 
-Example without `iv`:
+
+But `encrypt` fails when `key` or `iv` is longer than expected:
 
 Query:
 
 ``` sql
-SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
+SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
 ```
 
 Result:
 
 ``` text
-┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
-│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83                                 │
-│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A                                 │
-│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
-└─────────────┴──────────────────────────────────────────────────────────────────┘
+Received exception from server (version 21.1.2):
+Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123'). 
 ```
 
-Example with `iv`:
+While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
 
 Query:
 
 ``` sql
-SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
+SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
+```
+
+Result:
+
+```text
+┌─ciphertext───┐
+│ 24E9E4966469 │
+└──────────────┘
+```
+
+Notice how supplying even longer `IV` produces the same result
+
+Query:
+
+``` sql
+SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
 ```
 
 Result:
 
 ``` text
-┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
-│ aes-256-cfb128 │                                                            │
-│ aes-256-cfb128 │ 7FB039F7                                                   │
-│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F                     │
-└────────────────┴────────────────────────────────────────────────────────────┘
+┌─ciphertext───┐
+│ 24E9E4966469 │
+└──────────────┘
+```
+
+Which is binary equal to what MySQL produces on same inputs:
+
+``` sql
+mysql> SET  block_encryption_mode='aes-256-cfb128';
+Query OK, 0 rows affected (0.00 sec)
+
+mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
++------------------------+
+| ciphertext             |
++------------------------+
+| 0x24E9E4966469         |
++------------------------+
+1 row in set (0.00 sec)
 ```
 
 ## decrypt {#decrypt}
 
-This function decrypts data using these modes:
+This function decrypts ciphertext into a plaintext using these modes:
 
 -   aes-128-ecb, aes-192-ecb, aes-256-ecb
 -   aes-128-cbc, aes-192-cbc, aes-256-cbc
@@ -247,11 +234,11 @@ This function decrypts data using these modes:
 
 **Syntax**
 
-```sql
+``` sql
 decrypt('mode', 'ciphertext', 'key' [, iv, aad])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
 -   `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
@@ -265,51 +252,56 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
 
 **Examples**
 
-Create this table:
+Re-using table from [encrypt](./encryption-functions.md#encrypt).
 
 Query:
 
 ``` sql
-CREATE TABLE encryption_test
-(
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
-```
-
-Insert this data:
-
-Query:
-
-``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
-```
-
-Query:
-
-``` sql
-
-SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
+SELECT comment, hex(secret) FROM encryption_test;
 ```
 
 Result:
 
-```text
-┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
-│ aes-128-ecb │                                                                     │
-│ aes-128-ecb │ text                                                                │
-│ aes-128-ecb │ What Is ClickHouse?                                                 │
-└─────────────┴─────────────────────────────────────────────────────────────────────┘
+``` text
+┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
+│ aes-256-gcm          │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
+│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
+└──────────────────────┴──────────────────────────────────────────────┘
+┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
+│ aes-256-cfb128 no IV                │ B4972BDC4459                     │
+│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9                     │
+│ aes-256-cfb128 with IV              │ 5E6CB398F653                     │
+│ aes-256-cbc no IV                   │ 1BC0629A92450D9E73A00E7D02CF4142 │
+└─────────────────────────────────────┴──────────────────────────────────┘
 ```
 
+Now let's try to decrypt all that data.
+
+Query:
+
+``` sql
+SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test
+```
+
+Result:
+``` text
+┌─comment─────────────────────────────┬─plaintext─┐
+│ aes-256-cfb128 no IV                │ Secret    │
+│ aes-256-cfb128 no IV, different key │ �4�
+                                           �         │
+│ aes-256-cfb128 with IV              │ ���6�~        │
+ │aes-256-cbc no IV                   │ �2*4�h3c�4w��@
+└─────────────────────────────────────┴───────────┘
+```
+
+Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
+
 ## aes_decrypt_mysql {#aes_decrypt_mysql}
 
 Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
 
+Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_decrypt_mysql` will stick to what MySQL's `aes_decrypt` does: 'fold' `key` and ignore excess bits of `IV`.
+
 Supported decryption modes:
 
 -   aes-128-ecb, aes-192-ecb, aes-256-ecb
@@ -321,11 +313,11 @@ Supported decryption modes:
 
 **Syntax**
 
-```sql
+``` sql
 aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
 -   `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
@@ -338,44 +330,30 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
 
 **Examples**
 
-Create this table:
-
-Query:
-
+Let's decrypt data we've previously encrypted with MySQL:
 ``` sql
-CREATE TABLE encryption_test
-(
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
-```
+mysql> SET  block_encryption_mode='aes-256-cfb128';
+Query OK, 0 rows affected (0.00 sec)
 
-Insert this data:
-
-Query:
-
-``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
+mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
++------------------------+
+| ciphertext             |
++------------------------+
+| 0x24E9E4966469         |
++------------------------+
+1 row in set (0.00 sec)
 ```
 
 Query:
-
 ``` sql
-SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
+SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
 ```
 
 Result:
-
 ``` text
-┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
-│ aes-128-cbc │                                                                                     │
-│ aes-128-cbc │ text                                                                                │
-│ aes-128-cbc │ What Is ClickHouse?                                                                 │
-└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
+┌─plaintext─┐
+│ Secret    │
+└───────────┘
 ```
 
 [Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->
diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md
index 7df6ef54f2a..834fcdf8282 100644
--- a/docs/en/sql-reference/functions/ext-dict-functions.md
+++ b/docs/en/sql-reference/functions/ext-dict-functions.md
@@ -19,7 +19,7 @@ dictGet('dict_name', 'attr_name', id_expr)
 dictGetOrDefault('dict_name', 'attr_name', id_expr, default_value_expr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
 -   `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
@@ -108,7 +108,7 @@ Checks whether a key is present in a dictionary.
 dictHas('dict_name', id_expr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
 -   `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
@@ -130,7 +130,7 @@ Creates an array, containing all the parents of a key in the [hierarchical dicti
 dictGetHierarchy('dict_name', key)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
 -   `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value.
@@ -149,7 +149,7 @@ Checks the ancestor of a key through the whole hierarchical chain in the diction
 dictIsIn('dict_name', child_id_expr, ancestor_id_expr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
 -   `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value.
@@ -185,7 +185,7 @@ dictGet[Type]('dict_name', 'attr_name', id_expr)
 dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
 -   `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md
index c32af7194fb..df75e96c8fb 100644
--- a/docs/en/sql-reference/functions/functions-for-nulls.md
+++ b/docs/en/sql-reference/functions/functions-for-nulls.md
@@ -13,7 +13,7 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal
 isNull(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — A value with a non-compound data type.
 
@@ -53,7 +53,7 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal
 isNotNull(x)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `x` — A value with a non-compound data type.
 
@@ -93,7 +93,7 @@ Checks from left to right whether `NULL` arguments were passed and returns the f
 coalesce(x,...)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   Any number of parameters of a non-compound type. All parameters must be compatible by data type.
 
@@ -136,7 +136,7 @@ Returns an alternative value if the main argument is `NULL`.
 ifNull(x,alt)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `x` — The value to check for `NULL`.
 -   `alt` — The value that the function returns if `x` is `NULL`.
@@ -176,7 +176,7 @@ Returns `NULL` if the arguments are equal.
 nullIf(x, y)
 ```
 
-**Parameters:**
+**Arguments:**
 
 `x`, `y` — Values for comparison. They must be compatible types, or ClickHouse will generate an exception.
 
@@ -215,7 +215,7 @@ Results in a value of type [Nullable](../../sql-reference/data-types/nullable.md
 assumeNotNull(x)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `x` — The original value.
 
@@ -277,7 +277,7 @@ Converts the argument type to `Nullable`.
 toNullable(x)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `x` — The value of any non-compound type.
 
diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md
index 6f288a7687d..c27eab0b421 100644
--- a/docs/en/sql-reference/functions/geo/geohash.md
+++ b/docs/en/sql-reference/functions/geo/geohash.md
@@ -72,7 +72,7 @@ Returns an array of [geohash](#geohash)-encoded strings of given precision that
 geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md).
 -   `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md).
diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md
index 4ed651e4e9e..9dda947b3a7 100644
--- a/docs/en/sql-reference/functions/geo/h3.md
+++ b/docs/en/sql-reference/functions/geo/h3.md
@@ -162,7 +162,7 @@ Returns [H3](#h3index) point index `(lon, lat)` with specified resolution.
 geoToH3(lon, lat, resolution)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md).
 -   `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md).
@@ -201,7 +201,7 @@ Result:
 h3kRing(h3index, k)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 -   `k` — Raduis. Type: [integer](../../../sql-reference/data-types/int-uint.md)
@@ -315,7 +315,7 @@ Returns whether or not the provided [H3](#h3index) indexes are neighbors.
 h3IndexesAreNeighbors(index1, index2)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 -   `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
@@ -353,7 +353,7 @@ Returns an array of child indexes for the given [H3](#h3index) index.
 h3ToChildren(index, resolution)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 -   `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
@@ -390,7 +390,7 @@ Returns the parent (coarser) index containing the given [H3](#h3index) index.
 h3ToParent(index, resolution)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 -   `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 9394426b20b..6bf1bebabaa 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -9,7 +9,7 @@ Hash functions can be used for the deterministic pseudo-random shuffling of elem
 
 ## halfMD5 {#hash-functions-halfmd5}
 
-[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
+[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
 
 ``` sql
 halfMD5(par1, ...)
@@ -18,9 +18,9 @@ halfMD5(par1, ...)
 The function is relatively slow (5 million short strings per second per processor core).
 Consider using the [sipHash64](#hash_functions-siphash64) function instead.
 
-**Parameters**
+**Arguments**
 
-The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -54,16 +54,16 @@ sipHash64(par1,...)
 
 This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function.
 
-Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
+Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
 
 1.  After hashing all the input parameters, the function gets the array of hashes.
 2.  Function takes the first and the second elements and calculates a hash for the array of them.
 3.  Then the function takes the hash value, calculated at the previous step, and the third element of the initial hash array, and calculates a hash for the array of them.
 4.  The previous step is repeated for all the remaining elements of the initial hash array.
 
-**Parameters**
+**Arguments**
 
-The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -97,9 +97,9 @@ cityHash64(par1,...)
 
 This is a fast non-cryptographic hash function. It uses the CityHash algorithm for string parameters and implementation-specific fast non-cryptographic hash function for parameters with other data types. The function uses the CityHash combinator to get the final results.
 
-**Parameters**
+**Arguments**
 
-The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -166,9 +166,9 @@ farmHash64(par1, ...)
 
 These functions use the `Fingerprint64` and `Hash64` methods respectively from all [available methods](https://github.com/google/farmhash/blob/master/src/farmhash.h).
 
-**Parameters**
+**Arguments**
 
-The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -226,7 +226,7 @@ Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add97
 javaHashUTF16LE(stringUtf16le)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `stringUtf16le` — a string in UTF-16LE encoding.
 
@@ -292,9 +292,9 @@ Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/
 metroHash64(par1, ...)
 ```
 
-**Parameters**
+**Arguments**
 
-The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -327,9 +327,9 @@ murmurHash2_32(par1, ...)
 murmurHash2_64(par1, ...)
 ```
 
-**Parameters**
+**Arguments**
 
-Both functions take a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -358,7 +358,7 @@ Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash val
 gccMurmurHash(par1, ...);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `par1, ...` — A variable number of parameters that can be any of the [supported data types](../../sql-reference/data-types/index.md#data_types).
 
@@ -395,9 +395,9 @@ murmurHash3_32(par1, ...)
 murmurHash3_64(par1, ...)
 ```
 
-**Parameters**
+**Arguments**
 
-Both functions take a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -424,7 +424,7 @@ Produces a 128-bit [MurmurHash3](https://github.com/aappleby/smhasher) hash valu
 murmurHash3_128( expr )
 ```
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expressions](../../sql-reference/syntax.md#syntax-expressions) returning a [String](../../sql-reference/data-types/string.md)-type value.
 
diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md
index bfa1998d68a..964265a461b 100644
--- a/docs/en/sql-reference/functions/introspection.md
+++ b/docs/en/sql-reference/functions/introspection.md
@@ -32,7 +32,7 @@ If you use official ClickHouse packages, you need to install the `clickhouse-com
 addressToLine(address_of_binary_instruction)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process.
 
@@ -123,7 +123,7 @@ Converts virtual memory address inside ClickHouse server process to the symbol f
 addressToSymbol(address_of_binary_instruction)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process.
 
@@ -220,7 +220,7 @@ Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol)
 demangle(symbol)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file.
 
@@ -345,7 +345,7 @@ Emits trace log message to server log for each [Block](https://clickhouse.tech/d
 logTrace('message')
 ```
 
-**Parameters**
+**Arguments**
 
 -   `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string).
 
diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index faf551601ac..eaea5e250fb 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -115,9 +115,20 @@ LIMIT 10
 
 ## IPv6StringToNum(s) {#ipv6stringtonums}
 
-The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes.
+The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes. 
+If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
 HEX can be uppercase or lowercase.
 
+``` sql
+SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
+```
+
+``` text
+┌─cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0)─┐
+│ ::ffff:127.0.0.1                            │
+└─────────────────────────────────────────────┘
+```
+
 ## IPv4ToIPv6(x) {#ipv4toipv6x}
 
 Takes a `UInt32` number. Interprets it as an IPv4 address in [big endian](https://en.wikipedia.org/wiki/Endianness). Returns a `FixedString(16)` value containing the IPv6 address in binary format. Examples:
@@ -214,6 +225,7 @@ SELECT
 ## toIPv6(string) {#toipv6string}
 
 An alias to `IPv6StringToNum()` that takes a string form of IPv6 address and returns value of [IPv6](../../sql-reference/data-types/domains/ipv6.md) type, which is binary equal to value returned by `IPv6StringToNum()`.
+If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
 
 ``` sql
 WITH
@@ -243,33 +255,91 @@ SELECT
 └───────────────────────────────────┴──────────────────────────────────┘
 ```
 
-
-## isIPv4String
-
-Determines if the input string is an IPv4 address or not. Returns `1` if true `0` otherwise.
-
 ``` sql
-SELECT isIPv4String('127.0.0.1')
+SELECT toIPv6('127.0.0.1')
 ```
 
 ``` text
-┌─isIPv4String('127.0.0.1')─┐
-│                         1 │
-└───────────────────────────┘
+┌─toIPv6('127.0.0.1')─┐
+│ ::ffff:127.0.0.1    │
+└─────────────────────┘
 ```
 
-## isIPv6String
+## isIPv4String {#isipv4string}
 
-Determines if the input string is an IPv6 address or not. Returns `1` if true `0` otherwise.
+Determines whether the input string is an IPv4 address or not. If `string` is IPv6 address returns `0`.
 
-``` sql
-SELECT isIPv6String('2001:438:ffff::407d:1bc1')
+**Syntax**
+
+```sql
+isIPv4String(string)
 ```
 
+**Arguments**
+
+-   `string` — IP address. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   `1` if `string` is IPv4 address, `0` otherwise.
+
+Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Examples**
+
+Query:
+
+```sql
+SELECT addr, isIPv4String(addr) FROM ( SELECT ['0.0.0.0', '127.0.0.1', '::ffff:127.0.0.1'] AS addr ) ARRAY JOIN addr
+```
+
+Result:
+
 ``` text
-┌─isIPv6String('2001:438:ffff::407d:1bc1')─┐
-│                                        1 │
-└──────────────────────────────────────────┘
+┌─addr─────────────┬─isIPv4String(addr)─┐
+│ 0.0.0.0          │                  1 │
+│ 127.0.0.1        │                  1 │
+│ ::ffff:127.0.0.1 │                  0 │
+└──────────────────┴────────────────────┘
+```
+
+## isIPv6String {#isipv6string}
+
+Determines whether the input string is an IPv6 address or not. If `string` is IPv4 address returns `0`.
+
+**Syntax**
+
+```sql
+isIPv6String(string)
+```
+
+**Arguments**
+
+-   `string` — IP address. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   `1` if `string` is IPv6 address, `0` otherwise.
+
+Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0.0.1', '127.0.0.1'] AS addr ) ARRAY JOIN addr
+```
+
+Result:
+
+``` text
+┌─addr─────────────┬─isIPv6String(addr)─┐
+│ ::               │                  1 │
+│ 1111::ffff       │                  1 │
+│ ::ffff:127.0.0.1 │                  1 │
+│ 127.0.0.1        │                  0 │
+└──────────────────┴────────────────────┘
 ```
 
 [Original article](https://clickhouse.tech/docs/en/query_language/functions/ip_address_functions/) <!--hide-->
diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md
index 05e755eaddc..edee048eb77 100644
--- a/docs/en/sql-reference/functions/json-functions.md
+++ b/docs/en/sql-reference/functions/json-functions.md
@@ -236,7 +236,7 @@ Extracts raw data from a JSON object.
 JSONExtractKeysAndValuesRaw(json[, p, a, t, h])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `json` — [String](../../sql-reference/data-types/string.md) with valid JSON.
 -   `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter.
diff --git a/docs/en/sql-reference/functions/machine-learning-functions.md b/docs/en/sql-reference/functions/machine-learning-functions.md
index 8627fc26bad..f103a4ea421 100644
--- a/docs/en/sql-reference/functions/machine-learning-functions.md
+++ b/docs/en/sql-reference/functions/machine-learning-functions.md
@@ -27,7 +27,7 @@ Compares test groups (variants) and calculates for each group the probability to
 bayesAB(distribution_name, higher_is_better, variant_names, x, y)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values:
 
diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md
index 8dc287593c7..f56a721c0c0 100644
--- a/docs/en/sql-reference/functions/math-functions.md
+++ b/docs/en/sql-reference/functions/math-functions.md
@@ -121,7 +121,7 @@ Accepts a numeric argument and returns a UInt64 number close to 10 to the power
 cosh(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -157,7 +157,7 @@ Result:
 acosh(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -197,7 +197,7 @@ Result:
 sinh(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -233,7 +233,7 @@ Result:
 asinh(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -273,7 +273,7 @@ Result:
 atanh(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -309,7 +309,7 @@ The [function](https://en.wikipedia.org/wiki/Atan2) calculates the angle in the
 atan2(y, x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `y` — y-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 -   `x` — x-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64).
@@ -346,7 +346,7 @@ Calculates the length of the hypotenuse of a right-angle triangle. The [function
 hypot(x, y)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — The first cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 -   `y` — The second cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64).
@@ -383,7 +383,7 @@ Calculates `log(1+x)`. The [function](https://en.wikipedia.org/wiki/Natural_loga
 log1p(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Values from the interval: `-1 < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -423,7 +423,7 @@ The `sign` function can extract the sign of a real number.
 sign(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Values from  `-∞` to `+∞`. Support all numeric types in ClickHouse.
 
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 08d34770f57..04e921b5c55 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -19,7 +19,7 @@ Gets a named value from the [macros](../../operations/server-configuration-param
 getMacro(name);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string).
 
@@ -108,7 +108,7 @@ Extracts the trailing part of a string after the last slash or backslash. This f
 basename( expr )
 ```
 
-**Parameters**
+**Arguments**
 
 -   `expr` — Expression resulting in a [String](../../sql-reference/data-types/string.md) type value. All the backslashes must be escaped in the resulting value.
 
@@ -182,13 +182,102 @@ If `NULL` is passed to the function as input, then it returns the `Nullable(Noth
 Gets the size of the block.
 In ClickHouse, queries are always run on blocks (sets of column parts). This function allows getting the size of the block that you called it for.
 
-## byteSize(...) {#function-bytesize}
+## byteSize {#function-bytesize}
 
-Get an estimate of uncompressed byte size of its arguments in memory.
-E.g. for UInt32 argument it will return constant 4, for String argument - the string length + 9 (terminating zero + length).
-The function can take multiple arguments. The typical application is byteSize(*).
+Returns estimation of uncompressed byte size of its arguments in memory.
 
-Use case: Suppose you have a service that stores data for multiple clients in one table. Users will pay per data volume. So, you need to implement accounting of users data volume. The function will allow to calculate the data size on per-row basis.
+**Syntax**
+
+```sql
+byteSize(argument [, ...])
+```
+
+**Arguments**
+
+-   `argument` — Value.
+
+**Returned value**
+
+-   Estimation of byte size of the arguments in memory.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Examples**
+
+For [String](../../sql-reference/data-types/string.md) arguments the funtion returns the string length + 9 (terminating zero + length).
+
+Query:
+
+```sql
+SELECT byteSize('string');
+```
+
+Result:
+
+```text
+┌─byteSize('string')─┐
+│                 15 │
+└────────────────────┘
+```
+
+Query:
+
+```sql
+CREATE TABLE test
+(
+    `key` Int32,
+    `u8` UInt8,
+    `u16` UInt16,
+    `u32` UInt32,
+    `u64` UInt64,
+    `i8` Int8,
+    `i16` Int16,
+    `i32` Int32,
+    `i64` Int64,
+    `f32` Float32,
+    `f64` Float64
+)
+ENGINE = MergeTree
+ORDER BY key;
+
+INSERT INTO test VALUES(1, 8, 16, 32, 64,  -8, -16, -32, -64, 32.32, 64.64);
+
+SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)`, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)`, byteSize(f32) AS `byteSize(Float32)`, byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical;
+```
+
+Result:
+
+``` text
+Row 1:
+──────
+key:               1
+byteSize(UInt8):   1
+byteSize(UInt16):  2
+byteSize(UInt32):  4
+byteSize(UInt64):  8
+byteSize(Int8):    1
+byteSize(Int16):   2
+byteSize(Int32):   4
+byteSize(Int64):   8
+byteSize(Float32): 4
+byteSize(Float64): 8
+```
+
+If the function takes multiple arguments, it returns their combined byte size.
+
+Query:
+
+```sql
+SELECT byteSize(NULL, 1, 0.3, '');
+```
+
+Result:
+
+```text
+┌─byteSize(NULL, 1, 0.3, '')─┐
+│                         19 │
+└────────────────────────────┘
+```
 
 ## materialize(x) {#materializex}
 
@@ -260,7 +349,7 @@ The function is intended for development, debugging and demonstration.
 isConstant(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Expression to check.
 
@@ -331,7 +420,7 @@ Checks whether floating point value is finite.
 
     ifNotFinite(x,y)
 
-**Parameters**
+**Arguments**
 
 -   `x` — Value to be checked for infinity. Type: [Float\*](../../sql-reference/data-types/float.md).
 -   `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md).
@@ -371,7 +460,7 @@ Allows building a unicode-art diagram.
 
 `bar(x, min, max, width)` draws a band with a width proportional to `(x - min)` and equal to `width` characters when `x = max`.
 
-Parameters:
+**Arguments**
 
 -   `x` — Size to display.
 -   `min, max` — Integer constants. The value must fit in `Int64`.
@@ -556,7 +645,7 @@ Accepts the time delta in seconds. Returns a time delta with (year, month, day,
 formatReadableTimeDelta(column[, maximum_unit])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `column` — A column with numeric time delta.
 -   `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years.
@@ -641,7 +730,7 @@ The result of the function depends on the affected data blocks and the order of
 The rows order used during the calculation of `neighbor` can differ from the order of rows returned to the user.
 To prevent that you can make a subquery with ORDER BY and call the function from outside the subquery.
 
-**Parameters**
+**Arguments**
 
 -   `column` — A column name or scalar expression.
 -   `offset` — The number of rows forwards or backwards from the current row of `column`. [Int64](../../sql-reference/data-types/int-uint.md).
@@ -820,6 +909,66 @@ WHERE diff != 1
 
 Same as for [runningDifference](../../sql-reference/functions/other-functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row.
 
+## runningConcurrency {#runningconcurrency}
+
+Given a series of beginning time and ending time of events, this function calculates concurrency of the events at each of the data point, that is, the beginning time.
+
+!!! warning "Warning"
+    Events spanning multiple data blocks will not be processed correctly. The function resets its state for each new data block.
+
+The result of the function depends on the order of data in the block. It assumes the beginning time is sorted in ascending order.
+
+**Syntax**
+
+``` sql
+runningConcurrency(begin, end)
+```
+
+**Arguments**
+
+-   `begin` — A column for the beginning time of events (inclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
+-   `end` — A column for the ending time of events (exclusive).  [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
+
+Note that two columns `begin` and `end` must have the same type.
+
+**Returned values**
+
+-   The concurrency of events at the data point.
+
+Type: [UInt32](../../sql-reference/data-types/int-uint.md)
+
+**Example**
+
+Input table:
+
+``` text
+┌───────────────begin─┬─────────────────end─┐
+│ 2020-12-01 00:00:00 │ 2020-12-01 00:59:59 │
+│ 2020-12-01 00:30:00 │ 2020-12-01 00:59:59 │
+│ 2020-12-01 00:40:00 │ 2020-12-01 01:30:30 │
+│ 2020-12-01 01:10:00 │ 2020-12-01 01:30:30 │
+│ 2020-12-01 01:50:00 │ 2020-12-01 01:59:59 │
+└─────────────────────┴─────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT runningConcurrency(begin, end) FROM example
+```
+
+Result:
+
+``` text
+┌─runningConcurrency(begin, end)─┐
+│                              1 │
+│                              2 │
+│                              3 │
+│                              2 │
+│                              1 │
+└────────────────────────────────┘
+```
+
 ## MACNumToString(num) {#macnumtostringnum}
 
 Accepts a UInt64 number. Interprets it as a MAC address in big endian. Returns a string containing the corresponding MAC address in the format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form).
@@ -840,7 +989,7 @@ Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md).
 getSizeOfEnumType(value)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `value` — Value of type `Enum`.
 
@@ -869,7 +1018,7 @@ Returns size on disk (without taking into account compression).
 blockSerializedSize(value[, value[, ...]])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `value` — Any value.
 
@@ -901,7 +1050,7 @@ Returns the name of the class that represents the data type of the column in RAM
 toColumnTypeName(value)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `value` — Any type of value.
 
@@ -941,7 +1090,7 @@ Outputs a detailed description of data structures in RAM
 dumpColumnStructure(value)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `value` — Any type of value.
 
@@ -971,7 +1120,7 @@ Does not include default values for custom columns set by the user.
 defaultValueOfArgumentType(expression)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `expression` — Arbitrary type of value or an expression that results in a value of an arbitrary type.
 
@@ -1013,7 +1162,7 @@ Does not include default values for custom columns set by the user.
 defaultValueOfTypeName(type)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `type` — A string representing a type name.
 
@@ -1055,7 +1204,7 @@ Used for internal implementation of [arrayJoin](../../sql-reference/functions/ar
 SELECT replicate(x, arr);
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `arr` — Original array. ClickHouse creates a new array of the same length as the original and fills it with the value `x`.
 -   `x` — The value that the resulting array will be filled with.
@@ -1188,7 +1337,7 @@ Takes state of aggregate function. Returns result of aggregation (or finalized s
 finalizeAggregation(state)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction).
 
@@ -1292,7 +1441,7 @@ Accumulates states of an aggregate function for each row of a data block.
 runningAccumulate(agg_state[, grouping]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction).
 -   `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined.
@@ -1398,7 +1547,7 @@ Only supports tables created with the `ENGINE = Join(ANY, LEFT, <join_keys>)` st
 joinGet(join_storage_table_name, `value_column`, join_keys)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example.
 -   `value_column` — name of the column of the table that contains required data.
@@ -1502,7 +1651,7 @@ Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/AS
 randomPrintableASCII(length)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `length` — Resulting string length. Positive integer.
 
@@ -1538,7 +1687,7 @@ Generates a binary string of the specified length filled with random bytes (incl
 randomString(length)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `length` — String length. Positive integer.
 
@@ -1586,7 +1735,7 @@ Generates a binary string of the specified length filled with random bytes (incl
 randomFixedString(length);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
 
@@ -1624,7 +1773,7 @@ Generates a random string of a specified length. Result string contains valid UT
 randomStringUTF8(length);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `length` — Required length of the resulting string in code points. [UInt64](../../sql-reference/data-types/int-uint.md).
 
@@ -1696,7 +1845,7 @@ Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is
 isDecimalOverflow(d, [p])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `d` — value. [Decimal](../../sql-reference/data-types/decimal.md).
 -   `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
@@ -1733,7 +1882,7 @@ Returns number of decimal digits you need to represent the value.
 countDigits(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value.
 
@@ -1792,7 +1941,7 @@ Returns [native interface](../../interfaces/tcp.md) TCP port number listened by
 tcpPort()
 ```
 
-**Parameters**
+**Arguments**
 
 -   None.
 
diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md
index 68998928398..2b9846344e4 100644
--- a/docs/en/sql-reference/functions/random-functions.md
+++ b/docs/en/sql-reference/functions/random-functions.md
@@ -32,7 +32,7 @@ Produces a constant column with a random value.
 randConstant([x])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter.
 
@@ -81,7 +81,7 @@ fuzzBits([s], [prob])
 
 Inverts bits of `s`, each with probability `prob`.
 
-**Parameters**
+**Arguments**
 - `s` - `String` or `FixedString`
 - `prob` - constant `Float32/64`
 
diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index 922cf7374d7..83db1975366 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -35,7 +35,7 @@ The function returns the nearest number of the specified order. In case when giv
 round(expression [, decimal_places])
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types).
 -   `decimal-places` — An integer value.
@@ -114,7 +114,7 @@ For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding:
 roundBankers(expression [, decimal_places])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types).
 -   `decimal-places` — Decimal places. An integer number.
diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md
index 25f41211b47..c70ee20f076 100644
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@@ -16,7 +16,7 @@ Returns an array of selected substrings. Empty substrings may be selected if the
 splitByChar(<separator>, <s>)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md).
 -   `s` — The string to split. [String](../../sql-reference/data-types/string.md).
@@ -53,7 +53,7 @@ Splits a string into substrings separated by a string. It uses a constant string
 splitByString(<separator>, <s>)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `separator` — The separator. [String](../../sql-reference/data-types/string.md).
 -   `s` — The string to split. [String](../../sql-reference/data-types/string.md).
@@ -121,7 +121,7 @@ Extracts all groups from non-overlapping substrings matched by a regular express
 extractAllGroups(text, regexp) 
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 -   `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 2b93dd924a3..191bd100dda 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -76,7 +76,7 @@ Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running i
 toValidUTF8( input_string )
 ```
 
-Parameters:
+**Arguments**
 
 -   input_string — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
 
@@ -104,7 +104,7 @@ Repeats a string as many times as specified and concatenates the replicated valu
 repeat(s, n)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `s` — The string to repeat. [String](../../sql-reference/data-types/string.md).
 -   `n` — The number of times to repeat the string. [UInt](../../sql-reference/data-types/int-uint.md).
@@ -173,7 +173,7 @@ Concatenates the strings listed in the arguments, without a separator.
 concat(s1, s2, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 Values of type String or FixedString.
 
@@ -211,7 +211,7 @@ The function is named “injective” if it always returns different result for
 concatAssumeInjective(s1, s2, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 Values of type String or FixedString.
 
@@ -328,7 +328,7 @@ By default removes all consecutive occurrences of common whitespace (ASCII chara
 trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `trim_character` — specified characters for trim. [String](../../sql-reference/data-types/string.md).
 -   `input_string` — string for trim. [String](../../sql-reference/data-types/string.md).
@@ -367,7 +367,7 @@ trimLeft(input_string)
 
 Alias: `ltrim(input_string)`.
 
-**Parameters**
+**Arguments**
 
 -   `input_string` — string to trim. [String](../../sql-reference/data-types/string.md).
 
@@ -405,7 +405,7 @@ trimRight(input_string)
 
 Alias: `rtrim(input_string)`.
 
-**Parameters**
+**Arguments**
 
 -   `input_string` — string to trim. [String](../../sql-reference/data-types/string.md).
 
@@ -443,7 +443,7 @@ trimBoth(input_string)
 
 Alias: `trim(input_string)`.
 
-**Parameters**
+**Arguments**
 
 -   `input_string` — string to trim. [String](../../sql-reference/data-types/string.md).
 
@@ -496,7 +496,7 @@ Replaces literals, sequences of literals and complex aliases with placeholders.
 normalizeQuery(x)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md).
 
@@ -532,7 +532,7 @@ Returns identical 64bit hash values without the values of literals for similar q
 normalizedQueryHash(x)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md).
 
@@ -570,7 +570,7 @@ The following five XML predefined entities will be replaced: `<`, `&`, `>`, `"`,
 encodeXMLComponent(x)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `x` — The sequence of characters. [String](../../sql-reference/data-types/string.md).
 
@@ -600,4 +600,46 @@ Hello, &quot;world&quot;!
 &apos;foo&apos;
 ```
 
+## decodeXMLComponent {#decode-xml-component}
+
+Replaces XML predefined entities with characters. Predefined entities are `&quot;` `&amp;` `&apos;` `&gt;` `&lt;`
+This function also replaces numeric character references with Unicode characters. Both decimal (like `&#10003;`) and hexadecimal (`&#x2713;`) forms are supported.
+
+**Syntax**
+
+``` sql
+decodeXMLComponent(x)
+```
+
+**Parameters**
+
+-   `x` — A sequence of characters. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   The sequence of characters after replacement.
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT decodeXMLComponent('&apos;foo&apos;');
+SELECT decodeXMLComponent('&lt; &#x3A3; &gt;');
+```
+
+Result:
+
+``` text
+'foo' 
+< Σ >
+```
+
+**See Also**
+
+-   [List of XML and HTML character entity references](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references)
+
+
 [Original article](https://clickhouse.tech/docs/en/query_language/functions/string_functions/) <!--hide-->
diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 4036974dd37..83b0edea438 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -14,8 +14,6 @@ The search is case-sensitive by default in all these functions. There are separa
 
 Returns the position (in bytes) of the found substring in the string, starting from 1.
 
-Works under the assumption that the string contains a set of bytes representing a single-byte encoded text. If this assumption is not met and a character can’t be represented using a single byte, the function doesn’t throw an exception and returns some unexpected result. If character can be represented using two bytes, it will use two bytes and so on.
-
 For a case-insensitive search, use the function [positionCaseInsensitive](#positioncaseinsensitive).
 
 **Syntax**
@@ -26,7 +24,7 @@ position(haystack, needle[, start_pos])
 
 Alias: `locate(haystack, needle[, start_pos])`.
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -97,7 +95,7 @@ Works under the assumption that the string contains a set of bytes representing
 positionCaseInsensitive(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -140,7 +138,7 @@ For a case-insensitive search, use the function [positionCaseInsensitiveUTF8](#p
 positionUTF8(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -213,7 +211,7 @@ Works under the assumption that the string contains a set of bytes representing
 positionCaseInsensitiveUTF8(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -258,7 +256,7 @@ The search is performed on sequences of bytes without respect to string encoding
 multiSearchAllPositions(haystack, [needle1, needle2, ..., needlen])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -373,7 +371,7 @@ Matches all groups of the `haystack` string using the `pattern` regular expressi
 extractAllGroupsHorizontal(haystack, pattern)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md).
 -   `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). 
@@ -414,7 +412,7 @@ Matches all groups of the `haystack` string using the `pattern` regular expressi
 extractAllGroupsVertical(haystack, pattern)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md).
 -   `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md).
@@ -473,7 +471,7 @@ Case insensitive variant of [like](https://clickhouse.tech/docs/en/sql-reference
 ilike(haystack, pattern)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — Input string. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `pattern` — If `pattern` doesn't contain percent signs or underscores, then the `pattern` only represents the string itself. An underscore (`_`) in `pattern` stands for (matches) any single character. A percent sign (`%`) matches any sequence of zero or more characters.
@@ -550,7 +548,7 @@ For a case-insensitive search, use [countSubstringsCaseInsensitive](../../sql-re
 countSubstrings(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -616,7 +614,7 @@ Returns the number of substring occurrences case-insensitive.
 countSubstringsCaseInsensitive(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -682,7 +680,7 @@ Returns the number of substring occurrences in `UTF-8` case-insensitive.
 SELECT countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -734,7 +732,7 @@ Returns the number of regular expression matches for a `pattern` in a `haystack`
 countMatches(haystack, pattern)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md).
diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index dcbcd3e374b..1006b68b8ee 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -45,7 +45,7 @@ untuple(x)
 
 You can use the `EXCEPT` expression to skip columns as a result of the query.
 
-**Parameters**
+**Arguments**
 
 -   `x` - A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md).
 
diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index a46c36395b8..2b3a9d9103f 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -5,6 +5,68 @@ toc_title: Working with maps
 
 # Functions for maps {#functions-for-working-with-tuple-maps}
 
+## map {#function-map}
+
+Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types/map.md) data type.
+
+**Syntax** 
+
+``` sql
+map(key1, value1[, key2, value2, ...])
+```
+
+**Arguments** 
+
+-   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
+
+**Returned value**
+
+-  Data structure as `key:value` pairs.
+
+Type: [Map(key, value)](../../sql-reference/data-types/map.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
+```
+
+Result:
+
+``` text
+┌─map('key1', number, 'key2', multiply(number, 2))─┐
+│ {'key1':0,'key2':0}                              │
+│ {'key1':1,'key2':2}                              │
+│ {'key1':2,'key2':4}                              │
+└──────────────────────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
+INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
+SELECT a['key2'] FROM table_map;
+```
+
+Result:
+
+``` text
+┌─arrayElement(a, 'key2')─┐
+│                       0 │
+│                       2 │
+│                       4 │
+└─────────────────────────┘
+```
+
+**See Also** 
+
+-   [Map(key, value)](../../sql-reference/data-types/map.md) data type
+
+
 ## mapAdd {#function-mapadd}
 
 Collect all the keys and sum corresponding values.
@@ -15,7 +77,7 @@ Collect all the keys and sum corresponding values.
 mapAdd(Tuple(Array, Array), Tuple(Array, Array) [, ...])
 ```
 
-**Parameters** 
+**Arguments** 
 
 Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
 
@@ -49,7 +111,7 @@ Collect all the keys and subtract corresponding values.
 mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
 ```
 
-**Parameters** 
+**Arguments** 
 
 Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
 
@@ -87,7 +149,7 @@ Generates a map, where keys are a series of numbers, from minimum to maximum key
 
 The number of elements in `keys` and `values` must be the same for each row.
 
-**Parameters**
+**Arguments**
 
 -   `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
 -   `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
@@ -112,4 +174,4 @@ Result:
 └──────────────────────────────┴───────────────────────────────────┘
 ```
 
-[Original article](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) <!--hide-->
+[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->
diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 6237cd6a976..0ea2bf0f1a6 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -22,7 +22,7 @@ Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md)
 -   `toInt128(expr)` — Results in the `Int128` data type.
 -   `toInt256(expr)` — Results in the `Int256` data type.
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
 
@@ -88,7 +88,7 @@ Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md
 -   `toUInt64(expr)` — Results in the `UInt64` data type.
 -   `toUInt256(expr)` — Results in the `UInt256` data type.
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
 
@@ -154,7 +154,7 @@ Converts an input string to a [Nullable(Decimal(P,S))](../../sql-reference/data-
 
 These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `NULL` value instead of an exception in the event of an input value parsing error.
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
 -   `S` — Scale, the number of decimal places in the resulting value.
@@ -199,7 +199,7 @@ Converts an input value to the [Decimal(P,S)](../../sql-reference/data-types/dec
 
 These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `0` value instead of an exception in the event of an input value parsing error.
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
 -   `S` — Scale, the number of decimal places in the resulting value.
@@ -303,81 +303,48 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
 └────────────┴───────┘
 ```
 
-## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
+## reinterpret(x, T) {#type_conversion_function-reinterpret}
 
-## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}
+Performs byte reinterpretation of ‘x’ as ‘t’ data type.
 
-## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264}
-
-## reinterpretAsDate {#reinterpretasdate}
-
-## reinterpretAsDateTime {#reinterpretasdatetime}
-
-These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn’t long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch.
-
-## reinterpretAsString {#type_conversion_functions-reinterpretAsString}
-
-This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
-
-## reinterpretAsFixedString {#reinterpretasfixedstring}
-
-This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
-
-## reinterpretAsUUID {#reinterpretasuuid}
-
-This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. 
-
-**Syntax**
+Following reinterpretations are allowed:
+1. Any type that has fixed size and value of that type can be represented continuously into FixedString.
+2. Any type that if value of that type can be represented continuously into String. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
+3. FixedString, String, types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
 
 ``` sql
-reinterpretAsUUID(fixed_string)
+SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
+    reinterpret(toInt8(1), 'Float32') as int_to_float,
+    reinterpret('1', 'UInt32') as string_to_int;
 ```
 
-**Parameters**
-
--   `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
-
-**Returned value**
-
--   The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
-
-**Examples**
-
-String to UUID.
-
-Query:
-
-``` sql
-SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))
-```
-
-Result:
-
 ``` text
-┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐
-│                                  08090a0b-0c0d-0e0f-0001-020304050607 │
-└───────────────────────────────────────────────────────────────────────┘
+┌─int_to_uint─┬─int_to_float─┬─string_to_int─┐
+│         255 │        1e-45 │            49 │
+└─────────────┴──────────────┴───────────────┘
 ```
 
-Going back and forth from String to UUID.
+## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretAsUInt8163264256}
 
-Query:
+## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretAsInt8163264128256}
 
-``` sql
-WITH
-    generateUUIDv4() AS uuid,
-    identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str,
-    reinterpretAsUUID(reverse(unhex(str))) AS uuid2
-SELECT uuid = uuid2;
-```
+## reinterpretAsDecimal(32\|64\|128\|256) {#reinterpretAsDecimal3264128256}
 
-Result:
+## reinterpretAsFloat(32\|64) {#type_conversion_function-reinterpretAsFloat}
 
-``` text
-┌─equals(uuid, uuid2)─┐
-│                   1 │
-└─────────────────────┘
-```
+## reinterpretAsDate {#type_conversion_function-reinterpretAsDate}
+
+## reinterpretAsDateTime {#type_conversion_function-reinterpretAsDateTime}
+
+## reinterpretAsDateTime64 {#type_conversion_function-reinterpretAsDateTime64}
+
+## reinterpretAsString {#type_conversion_function-reinterpretAsString}
+
+## reinterpretAsFixedString {#type_conversion_function-reinterpretAsFixedString}
+
+## reinterpretAsUUID {#type_conversion_function-reinterpretAsUUID}
+
+These functions are aliases for `reinterpret` function.
 
 ## CAST(x, T) {#type_conversion_function-cast}
 
@@ -438,7 +405,7 @@ bounds of type T.
 
 Example
 ``` sql
-SELECT cast(-1, 'UInt8') as uint8; 
+SELECT cast(-1, 'UInt8') as uint8;
 ```
 
 
@@ -459,7 +426,7 @@ Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in c
 
 ## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
 
-Converts ‘x’ to the ‘t’ data type. Always returns nullable type and returns NULL 
+Converts ‘x’ to the ‘t’ data type. Always returns nullable type and returns NULL
 if the casted value is not representable in the target type.
 
 Example:
@@ -504,7 +471,7 @@ toIntervalQuarter(number)
 toIntervalYear(number)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `number` — Duration of interval. Positive integer number.
 
@@ -542,7 +509,7 @@ The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 112
 parseDateTimeBestEffort(time_string [, time_zone]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md).
 -   `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md).
@@ -654,7 +621,7 @@ This function is similar to [‘parseDateTimeBestEffort’](#parsedatetimebestef
 parseDateTimeBestEffortUS(time_string [, time_zone]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md).
 -   `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md).
@@ -738,7 +705,7 @@ To convert data from the `LowCardinality` data type use the [CAST](#type_convers
 toLowCardinality(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../sql-reference/data-types/index.md#data_types).
 
@@ -778,7 +745,7 @@ Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Inpu
 toUnixTimestamp64Milli(value)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `value` — DateTime64 value with any precision.
 
@@ -830,7 +797,7 @@ Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and
 fromUnixTimestamp64Milli(value [, ti])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `value` — `Int64` value with any precision.
 -   `timezone` — `String` (optional) timezone name of the result.
@@ -854,15 +821,15 @@ SELECT fromUnixTimestamp64Milli(i64, 'UTC')
 
 ## formatRow {#formatrow}
 
-Converts arbitrary expressions into a string via given format. 
+Converts arbitrary expressions into a string via given format.
 
-**Syntax** 
+**Syntax**
 
 ``` sql
 formatRow(format, x, y, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
 -   `x`,`y`, ... — Expressions.
@@ -897,13 +864,13 @@ Result:
 
 Converts arbitrary expressions into a string via given format. The function trims the last `\n` if any.
 
-**Syntax** 
+**Syntax**
 
 ``` sql
 formatRowNoNewline(format, x, y, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
 -   `x`,`y`, ... — Expressions.
diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md
index 006542f494a..9e79ef2d0cb 100644
--- a/docs/en/sql-reference/functions/url-functions.md
+++ b/docs/en/sql-reference/functions/url-functions.md
@@ -25,7 +25,7 @@ Extracts the hostname from a URL.
 domain(url)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `url` — URL. Type: [String](../../sql-reference/data-types/string.md).
 
@@ -76,7 +76,7 @@ Extracts the the top-level domain from a URL.
 topLevelDomain(url)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `url` — URL. Type: [String](../../sql-reference/data-types/string.md).
 
@@ -133,10 +133,9 @@ For example:
 
 ### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom}
 
-Same as `cutToFirstSignificantSubdomain` but accept custom TLD list name, useful if:
+Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom [TLD list](https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains) name.
 
-- you need fresh TLD list,
-- or you have custom.
+Can be useful if you need fresh TLD list or you have custom.
 
 Configuration example:
 
@@ -149,21 +148,150 @@ Configuration example:
 </top_level_domains_lists>
 ```
 
-Example:
+**Syntax**
 
--   `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/', 'public_suffix_list') = 'yandex.com.tr'`.
+``` sql
+cutToFirstSignificantSubdomain(URL, TLD)
+```
+
+**Parameters**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   Part of the domain that includes top-level subdomains up to the first significant subdomain.
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
+```
+
+Result:
+
+```text
+┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
+│ foo.there-is-no-such-domain                                                                   │
+└───────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
 
 ### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww}
 
-Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name.
+Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. Accepts custom TLD list name.
+
+Can be useful if you need fresh TLD list or you have custom.
+
+Configuration example:
+
+```xml
+<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
+<top_level_domains_lists>
+    <!-- https://publicsuffix.org/list/public_suffix_list.dat -->
+    <public_suffix_list>public_suffix_list.dat</public_suffix_list>
+    <!-- NOTE: path is under top_level_domains_path -->
+</top_level_domains_lists>
+```
+
+**Syntax**
+
+```sql
+cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD)
+```
+
+**Parameters**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`.
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list');
+```
+
+Result:
+
+```text
+┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐
+│ www.foo                                                                      │
+└──────────────────────────────────────────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
 
 ### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom}
 
-Same as `firstSignificantSubdomain` but accept custom TLD list name.
+Returns the first significant subdomain. Accepts customs TLD list name.
 
-### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww}
+Can be useful if you need fresh TLD list or you have custom.
 
-Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name.
+Configuration example:
+
+```xml
+<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
+<top_level_domains_lists>
+    <!-- https://publicsuffix.org/list/public_suffix_list.dat -->
+    <public_suffix_list>public_suffix_list.dat</public_suffix_list>
+    <!-- NOTE: path is under top_level_domains_path -->
+</top_level_domains_lists>
+```
+
+**Syntax**
+
+```sql
+firstSignificantSubdomainCustom(URL, TLD)
+```
+
+**Parameters**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   First significant subdomain.
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
+```
+
+Result:
+
+```text 
+┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
+│ foo                                                                                      │
+└──────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
 
 ### port(URL\[, default_port = 0\]) {#port}
 
@@ -242,7 +370,7 @@ Extracts network locality (`username:password@host:port`) from a URL.
 netloc(URL)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `url` — URL. [String](../../sql-reference/data-types/string.md).
 
diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md
index f70532252c7..56530b5e83b 100644
--- a/docs/en/sql-reference/functions/ym-dict-functions.md
+++ b/docs/en/sql-reference/functions/ym-dict-functions.md
@@ -115,7 +115,7 @@ Finds the highest continent in the hierarchy for the region.
 regionToTopContinent(id[, geobase]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `id` — Region ID from the Yandex geobase. [UInt32](../../sql-reference/data-types/int-uint.md).
 -   `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional.
diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index 0ea4d4b3dc5..16aa266ebf9 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -20,10 +20,12 @@ The following actions are supported:
 
 -   [ADD COLUMN](#alter_add-column) — Adds a new column to the table.
 -   [DROP COLUMN](#alter_drop-column) — Deletes the column.
+-   [RENAME COLUMN](#alter_rename-column) — Renames the column.
 -   [CLEAR COLUMN](#alter_clear-column) — Resets column values.
 -   [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
 -   [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
 -   [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
+-   [RENAME COLUMN](#alter_rename-column) — Renames an existing column.
 
 These actions are described in detail below.
 
@@ -78,6 +80,22 @@ Example:
 ALTER TABLE visits DROP COLUMN browser
 ```
 
+## RENAME COLUMN {#alter_rename-column}
+
+``` sql
+RENAME COLUMN [IF EXISTS] name to new_name
+```
+
+Renames the column `name` to `new_name`. If the `IF EXISTS` clause is specified, the query won’t return an error if the column doesn’t exist. Since renaming does not involve the underlying data, the query is completed almost instantly.
+
+**NOTE**: Columns specified in the key expression of the table (either with `ORDER BY` or `PRIMARY KEY`) cannot be renamed. Trying to change these columns will produce `SQL Error [524]`. 
+
+Example:
+
+``` sql
+ALTER TABLE visits RENAME COLUMN webBrowser TO browser
+```
+
 ## CLEAR COLUMN {#alter_clear-column}
 
 ``` sql
@@ -166,6 +184,22 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
 
 - [REMOVE TTL](ttl.md).
 
+## RENAME COLUMN {#alter_rename-column}
+
+Renames an existing column.
+
+Syntax:
+
+```sql
+ALTER TABLE table_name RENAME COLUMN column_name TO new_column_name;
+```
+
+**Example**
+
+```sql
+ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new;
+```
+
 ## Limitations {#alter-query-limitations}
 
 The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.
diff --git a/docs/en/sql-reference/statements/alter/quota.md b/docs/en/sql-reference/statements/alter/quota.md
index 2923fd04c4b..a43b5255598 100644
--- a/docs/en/sql-reference/statements/alter/quota.md
+++ b/docs/en/sql-reference/statements/alter/quota.md
@@ -12,9 +12,28 @@ Syntax:
 ``` sql
 ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
     [RENAME TO new_name]
-    [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
-    [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
+    [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
+    [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
+        {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
         NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
+Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
+
+Parameters `queries`, `query_selects`, 'query_inserts', errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
+
+`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
+
+**Examples**
+
+Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
+
+``` sql
+ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
+```
+
+For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
+
+``` sql
+ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
+```
diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md
index 20537b98a46..71416abf588 100644
--- a/docs/en/sql-reference/statements/create/quota.md
+++ b/docs/en/sql-reference/statements/create/quota.md
@@ -11,19 +11,29 @@ Syntax:
 
 ``` sql
 CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
-    [KEYED BY {'none' | 'user name' | 'ip address' | 'forwarded ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
-    [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
+    [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
+    [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
+        {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
          NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
 
+Keys `user_name`,  `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
+
+Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
+
 `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
 
-## Example {#create-quota-example}
+**Examples** 
 
 Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
 
 ``` sql
-CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER
+CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
+```
+
+For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
+
+``` sql
+CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
 ```
diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md
index d5343cce7be..c1a52e3b864 100644
--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@@ -12,7 +12,7 @@ Syntax:
 ``` sql
 CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] 
         [, name2 [ON CLUSTER cluster_name2] ...]
-    [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}]
+    [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH|LDAP_SERVER}] BY {'password'|'hash'}]
     [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
     [DEFAULT ROLE role [,...]]
     [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
@@ -30,6 +30,7 @@ There are multiple ways of user identification:
 -   `IDENTIFIED WITH sha256_hash BY 'hash'`
 -   `IDENTIFIED WITH double_sha1_password BY 'qwerty'`
 -   `IDENTIFIED WITH double_sha1_hash BY 'hash'`
+-   `IDENTIFIED WITH ldap_server BY 'server'`
 
 ## User Host {#user-host}
 
diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 4370735b8d9..8acd58f4338 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -41,7 +41,6 @@ SELECT a, b, c FROM (SELECT ...)
 CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
 ```
 
-
 Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query.
 
 When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` – the table engine for storing data.
@@ -59,6 +58,197 @@ A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Note
 
 The execution of [ALTER](../../../sql-reference/statements/alter/index.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
 
+Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged before the insertion into a view.
+
 Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
 
 There isn’t a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
+
+## Live View (Experimental) {#live-view}
+
+!!! important "Important"
+    This is an experimental feature that may change in backwards-incompatible ways in the future releases.
+    Enable usage of live views and `WATCH` query using `set allow_experimental_live_view = 1`.
+
+
+```sql
+CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
+```
+
+Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
+
+Live views are triggered by insert into the innermost table specified in the query. 
+
+Live views work similarly to how a query in a distributed table works. But instead of combining partial results from different servers they combine partial result from current data with partial result from the new data. When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery.
+
+!!! info "Limitations"
+    - [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table.
+    - Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view.
+    - Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved.
+    - Does not work with replicated or distributed tables where inserts are performed on different nodes.
+    - Can't be triggered by multiple tables.
+
+    See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround.
+
+You can watch for changes in the live view query result using the [WATCH](../../../sql-reference/statements/watch.md) query
+
+```sql
+WATCH [db.]live_view
+```
+
+**Example:**
+
+```sql
+CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
+CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt;
+```
+
+Watch a live view while doing a parallel insert into the source table.
+
+```sql
+WATCH lv
+```
+
+```bash
+┌─sum(x)─┬─_version─┐
+│      1 │        1 │
+└────────┴──────────┘
+┌─sum(x)─┬─_version─┐
+│      2 │        2 │
+└────────┴──────────┘
+┌─sum(x)─┬─_version─┐
+│      6 │        3 │
+└────────┴──────────┘
+...
+```
+
+```sql
+INSERT INTO mt VALUES (1);
+INSERT INTO mt VALUES (2);
+INSERT INTO mt VALUES (3);
+```
+
+or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events.
+
+```sql
+WATCH [db.]live_view EVENTS
+```
+
+**Example:**
+
+```sql
+WATCH lv EVENTS
+```
+
+```bash
+┌─version─┐
+│       1 │
+└─────────┘
+┌─version─┐
+│       2 │
+└─────────┘
+┌─version─┐
+│       3 │
+└─────────┘
+...
+```
+
+You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view in the same way as for any regular view or a table. If the query result is cached it will return the result immediately without running the stored query on the underlying tables.
+
+```sql
+SELECT * FROM [db.]live_view WHERE ...
+```
+
+### Force Refresh {#live-view-alter-refresh}
+
+You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement.
+
+### With Timeout {#live-view-with-timeout}
+
+When a live view is create with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view. 
+
+```sql
+CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
+```
+
+If the timeout value is not specified then the value specified by the `temporary_live_view_timeout` setting is used.
+
+**Example:**
+
+```sql
+CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
+CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
+```
+
+### With Refresh {#live-view-with-refresh}
+
+When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger.
+
+```sql
+CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ...
+```
+
+If the refresh value is not specified then the value specified by the `periodic_live_view_refresh` setting is used.
+
+**Example:**
+
+```sql
+CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
+WATCH lv
+```
+
+```bash
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 08:47:05 │        1 │
+└─────────────────────┴──────────┘
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 08:47:10 │        2 │
+└─────────────────────┴──────────┘
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 08:47:15 │        3 │
+└─────────────────────┴──────────┘
+```
+
+You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND` clause. 
+
+```sql
+CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
+```
+
+**Example:**
+
+```sql
+CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
+```
+
+After 15 sec the live view will be automatically dropped if there are no active `WATCH` queries.
+
+```sql
+WATCH lv
+```
+
+```
+Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv doesn't exist.. 
+```
+
+### Usage
+
+Most common uses of live view tables include:
+
+- Providing push notifications for query result changes to avoid polling.
+- Caching results of most frequent queries to provide immediate query results.
+- Watching for table changes and triggering a follow-up select queries.
+- Watching metrics from system tables using periodic refresh.
+
+### Settings {#live-view-settings}
+
+You can use the following settings to control the behaviour of live views.
+
+- `allow_experimental_live_view` - enable live views. Default is `0`.
+- `live_view_heartbeat_interval` - the heartbeat interval in seconds to indicate live query is alive. Default is `15` seconds.
+- `max_live_view_insert_blocks_before_refresh` - maximum number of inserted blocks after which
+   mergeable blocks are dropped and query is re-executed. Default is `64` inserts.
+- `temporary_live_view_timeout` - interval after which live view with timeout is deleted. Default is `5` seconds.
+- `periodic_live_view_refresh` - interval after which periodically refreshed live view is forced to refresh. Default is `60` seconds.
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/) <!--hide-->
diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md
index 2928e50224d..c517a515ab7 100644
--- a/docs/en/sql-reference/statements/insert-into.md
+++ b/docs/en/sql-reference/statements/insert-into.md
@@ -13,7 +13,7 @@ Basic query format:
 INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
 ```
 
-You can specify a list of columns to insert using  the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
+You can specify a list of columns to insert using  the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
 
 For example, consider the table:
 
@@ -62,8 +62,6 @@ If a list of columns doesn't include all existing columns, the rest of the colum
 -   The values calculated from the `DEFAULT` expressions specified in the table definition.
 -   Zeros and empty strings, if `DEFAULT` expressions are not defined.
 
-If [strict\_insert\_defaults=1](../../operations/settings/settings.md), columns that do not have `DEFAULT` defined must be listed in the query.
-
 Data can be passed to the INSERT in any [format](../../interfaces/formats.md#formats) supported by ClickHouse. The format must be specified explicitly in the query:
 
 ``` sql
diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md
index 7c13772ffdf..e99ebef838c 100644
--- a/docs/en/sql-reference/statements/select/index.md
+++ b/docs/en/sql-reference/statements/select/index.md
@@ -278,5 +278,4 @@ Other ways to make settings see [here](../../../operations/settings/index.md).
 SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1;
 ```
 
-[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)
-<!--hide-->
+[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)<!--hide-->
diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md
new file mode 100644
index 00000000000..761bc8a041e
--- /dev/null
+++ b/docs/en/sql-reference/statements/watch.md
@@ -0,0 +1,106 @@
+---
+toc_priority: 53
+toc_title: WATCH
+---
+
+# WATCH Statement (Experimental) {#watch}
+
+!!! important "Important"
+    This is an experimental feature that may change in backwards-incompatible ways in the future releases.
+    Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`.
+
+
+``` sql
+WATCH [db.]live_view
+[EVENTS]
+[LIMIT n]
+[FORMAT format]
+```
+
+The `WATCH` query performs continuous data retrieval from a [live view](./create/view.md#live-view) table. Unless the `LIMIT` clause is specified it provides an infinite stream of query results from a [live view](./create/view.md#live-view).
+
+```sql
+WATCH [db.]live_view
+```
+
+The virtual `_version` column in the query result indicates the current result version.
+
+**Example:**
+
+```sql
+CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
+WATCH lv
+```
+
+```bash
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 09:17:21 │        1 │
+└─────────────────────┴──────────┘
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 09:17:26 │        2 │
+└─────────────────────┴──────────┘
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 09:17:31 │        3 │
+└─────────────────────┴──────────┘
+...
+```
+
+By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../sql-reference/statements/insert-into.md) it can be forwarded to a different table.
+
+```sql
+INSERT INTO [db.]table WATCH [db.]live_view ...
+```
+
+## EVENTS Clause {#events-clause}
+
+The `EVENTS` clause can be used to obtain a short form of the `WATCH` query where instead of the query result you will just get the latest query result version.
+
+```sql
+WATCH [db.]live_view EVENTS
+```
+
+**Example:**
+
+```sql
+CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
+WATCH lv EVENTS
+```
+
+```bash
+┌─version─┐
+│       1 │
+└─────────┘
+┌─version─┐
+│       2 │
+└─────────┘
+...
+```
+
+## LIMIT Clause {#limit-clause}
+
+The `LIMIT n` clause species the number of updates the `WATCH` query should wait for before terminating. By default there is no limit on the number of updates and therefore the query will not terminate. The value of `0` indicates that the `WATCH` query should not wait for any new query results and therefore will return immediately once query is evaluated.
+
+```sql
+WATCH [db.]live_view LIMIT 1
+```
+
+**Example:**
+
+```sql
+CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
+WATCH lv EVENTS LIMIT 1
+```
+
+```bash
+┌─version─┐
+│       1 │
+└─────────┘
+```
+
+## FORMAT Clause {#format-clause}
+
+The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/statements/select/format.md#format-clause).
+
+!!! info "Note"
+    The [JSONEachRowWithProgress](../../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
+
diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md
index 5bbd22dfe4e..be6ba2b8bc4 100644
--- a/docs/en/sql-reference/table-functions/generate.md
+++ b/docs/en/sql-reference/table-functions/generate.md
@@ -13,7 +13,7 @@ Supports all data types that can be stored in table except `LowCardinality` and
 generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `name` — Name of corresponding column.
 -   `TypeName` — Type of corresponding column.
diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md
index eec4a1d0c46..14cd4369285 100644
--- a/docs/en/sql-reference/table-functions/mysql.md
+++ b/docs/en/sql-reference/table-functions/mysql.md
@@ -13,7 +13,7 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a
 mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause'])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `host:port` — MySQL server address.
 
diff --git a/docs/en/sql-reference/table-functions/view.md b/docs/en/sql-reference/table-functions/view.md
index 9997971af65..08096c2b019 100644
--- a/docs/en/sql-reference/table-functions/view.md
+++ b/docs/en/sql-reference/table-functions/view.md
@@ -13,7 +13,7 @@ Turns a subquery into a table. The function implements views (see [CREATE VIEW](
 view(subquery)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `subquery` — `SELECT` query.
 
diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
new file mode 100644
index 00000000000..cbf03a44d46
--- /dev/null
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -0,0 +1,60 @@
+---
+toc_priority: 62
+toc_title: Window Functions
+---
+
+# [experimental] Window Functions
+
+!!! warning "Warning"
+This is an experimental feature that is currently in development and is not ready
+for general use. It will change in unpredictable backwards-incompatible ways in
+the future releases. Set `allow_experimental_window_functions = 1` to enable it.
+
+ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported:
+
+| Feature | Support or workaround |
+| --------| ----------|
+| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported |
+| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) |
+| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | supported |
+| `ROWS` frame | supported |
+| `RANGE` frame | supported, the default |
+| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead |
+| `GROUPS` frame | not supported |
+| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
+| `rank()`, `dense_rank()`, `row_number()` | supported |
+| `lag/lead(value, offset)` | not supported, replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`| 
+
+## References
+
+### GitHub Issues
+
+The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097).
+
+All GitHub issues related to window funtions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
+
+### Tests
+
+These tests contain the examples of the currently supported grammar:
+
+https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml
+
+https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql
+
+### Postgres Docs
+
+https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW
+
+https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS
+
+https://www.postgresql.org/docs/devel/functions-window.html
+
+https://www.postgresql.org/docs/devel/tutorial-window.html
+
+### MySQL Docs
+
+https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
+
+https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html
+
+https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html
diff --git a/docs/en/whats-new/changelog/2020.md b/docs/en/whats-new/changelog/2020.md
index 5975edd3c6c..bf4e4fb0fcc 100644
--- a/docs/en/whats-new/changelog/2020.md
+++ b/docs/en/whats-new/changelog/2020.md
@@ -5,6 +5,22 @@ toc_title: '2020'
 
 ### ClickHouse release 20.12
 
+### ClickHouse release v20.12.5.14-stable, 2020-12-28
+
+#### Bug Fix
+
+* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)).
+* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)).
+* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)).
+
+#### Build/Testing/Packaging Improvement
+
+* Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)).
+
+
 ### ClickHouse release v20.12.4.5-stable, 2020-12-24
 
 #### Bug Fix
@@ -142,6 +158,70 @@ toc_title: '2020'
 
 ## ClickHouse release 20.11
 
+### ClickHouse release v20.11.7.16-stable, 2021-03-02
+
+#### Improvement
+
+* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)).
+
+#### Bug Fix
+
+* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)).
+* Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([tavplubix](https://github.com/tavplubix)).
+* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)).
+* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)).
+* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([tavplubix](https://github.com/tavplubix)).
+* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)).
+* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)).
+* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)).
+* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)).
+* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)).
+* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([tavplubix](https://github.com/tavplubix)).
+* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)).
+* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)).
+* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)).
+* Attach partition should reset the mutation. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)).
+* Fix possible hang at shutdown in clickhouse-local. This fixes [#18891](https://github.com/ClickHouse/ClickHouse/issues/18891). [#18893](https://github.com/ClickHouse/ClickHouse/pull/18893) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)).
+* Asynchronous distributed INSERTs can be rejected by the server if the setting `network_compression_method` is globally set to non-default value. This fixes [#18741](https://github.com/ClickHouse/ClickHouse/issues/18741). [#18776](https://github.com/ClickHouse/ClickHouse/pull/18776) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)).
+* Fix Logger with unmatched arg size. [#18717](https://github.com/ClickHouse/ClickHouse/pull/18717) ([sundyli](https://github.com/sundy-li)).
+* Add FixedString Data type support. I'll get this exception "Code: 50, e.displayText() = DB::Exception: Unsupported type FixedString(1)" when replicating data from MySQL to ClickHouse. This patch fixes bug [#18450](https://github.com/ClickHouse/ClickHouse/issues/18450) Also fixes [#6556](https://github.com/ClickHouse/ClickHouse/issues/6556). [#18553](https://github.com/ClickHouse/ClickHouse/pull/18553) ([awesomeleo](https://github.com/awesomeleo)).
+* Fix possible `Pipeline stuck` error while using `ORDER BY` after subquery with `RIGHT` or `FULL` join. [#18550](https://github.com/ClickHouse/ClickHouse/pull/18550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix bug which may lead to `ALTER` queries hung after corresponding mutation kill. Found by thread fuzzer. [#18518](https://github.com/ClickHouse/ClickHouse/pull/18518) ([alesapin](https://github.com/alesapin)).
+* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)).
+* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)).
+* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)).
+* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)).
+* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)).
+* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)).
+* `SELECT JOIN` now requires the `SELECT` privilege on each of the joined tables. This PR fixes [#17654](https://github.com/ClickHouse/ClickHouse/issues/17654). [#18232](https://github.com/ClickHouse/ClickHouse/pull/18232) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix possible incomplete query result while reading from `MergeTree*` in case of read backoff (message `<Debug> MergeTreeReadPool: Will lower number of threads` in logs). Was introduced in [#16423](https://github.com/ClickHouse/ClickHouse/issues/16423). Fixes [#18137](https://github.com/ClickHouse/ClickHouse/issues/18137). [#18216](https://github.com/ClickHouse/ClickHouse/pull/18216) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)).
+* Fix indeterministic functions with predicate optimizer. This fixes [#17244](https://github.com/ClickHouse/ClickHouse/issues/17244). [#17273](https://github.com/ClickHouse/ClickHouse/pull/17273) ([Winter Zhang](https://github.com/zhang2014)).
+* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)).
+
+#### Build/Testing/Packaging Improvement
+
+* Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)).
+
+
+
 ### ClickHouse release v20.11.6.6-stable, 2020-12-24
 
 #### Bug Fix
@@ -588,6 +668,60 @@ toc_title: '2020'
 
 ## ClickHouse release 20.9
 
+### ClickHouse release v20.9.7.11-stable, 2020-12-07
+
+#### Performance Improvement
+
+* Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)).
+
+#### Bug Fix
+
+* Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)).
+* Fixed segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([tavplubix](https://github.com/tavplubix)).
+* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)).
+* Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([tavplubix](https://github.com/tavplubix)).
+* When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)).
+* Fix bug when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)).
+* Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)).
+* Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)).
+* Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([tavplubix](https://github.com/tavplubix)).
+* Bug fix for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)).
+* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)).
+* TODO. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([tavplubix](https://github.com/tavplubix)).
+* Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)).
+
+#### Build/Testing/Packaging Improvement
+
+* Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)).
+
+
+### ClickHouse release v20.9.6.14-stable, 2020-11-20
+
+#### Improvement
+
+* Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)).
+* Conditional aggregate functions (for example: `avgIf`, `sumIf`, `maxIf`) should return `NULL` when miss rows and use nullable arguments. [#13964](https://github.com/ClickHouse/ClickHouse/pull/13964) ([Winter Zhang](https://github.com/zhang2014)).
+
+#### Bug Fix
+
+* Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)).
+* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)).
+* Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)).
+* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)).
+* Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)).
+* Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)).
+* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)).
+* This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)).
+* fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) fix remote query failure when using 'if' suffix aggregate function. [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)).
+* Query is finished faster in case of exception. Cancel execution on remote replicas if exception happens. [#15578](https://github.com/ClickHouse/ClickHouse/pull/15578) ([Azat Khuzhin](https://github.com/azat)).
+
+
 ### ClickHouse release v20.9.5.5-stable, 2020-11-13
 
 #### Bug Fix
@@ -744,6 +878,23 @@ toc_title: '2020'
 
 ## ClickHouse release 20.8
 
+### ClickHouse release v20.8.12.2-lts, 2021-01-16
+
+#### Bug Fix
+
+* Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)).
+* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)).
+
+
+### ClickHouse release v20.8.11.17-lts, 2020-12-25
+
+#### Bug Fix
+
+* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)).
+* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)).
+* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)).
+
+
 ### ClickHouse release v20.8.10.13-lts, 2020-12-24
 
 #### Bug Fix
diff --git a/docs/es/operations/backup.md b/docs/es/operations/backup.md
index a6297070663..be33851574a 100644
--- a/docs/es/operations/backup.md
+++ b/docs/es/operations/backup.md
@@ -5,7 +5,7 @@ toc_title: Copia de seguridad de datos
 
 # Copia de seguridad de datos {#data-backup}
 
-Mientras que la [replicación](../engines/table-engines/mergetree-family/replication.md) proporciona protección contra fallos de hardware, no protege de errores humanos: el borrado accidental de datos, elminar la tabla equivocada o una tabla en el clúster equivocado, y bugs de software que dan como resultado un procesado incorrecto de los datos o la corrupción de los datos. En muchos casos, errores como estos afectarán a todas las réplicas. ClickHouse dispone de salvaguardas para prevenir algunos tipos de errores — por ejemplo, por defecto [no se puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Sin embargo, estas salvaguardas no cubren todos los casos posibles y pueden eludirse.
+Mientras que la [replicación](../engines/table-engines/mergetree-family/replication.md) proporciona protección contra fallos de hardware, no protege de errores humanos: el borrado accidental de datos, elminar la tabla equivocada o una tabla en el clúster equivocado, y bugs de software que dan como resultado un procesado incorrecto de los datos o la corrupción de los datos. En muchos casos, errores como estos afectarán a todas las réplicas. ClickHouse dispone de salvaguardas para prevenir algunos tipos de errores — por ejemplo, por defecto [no se puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](server-configuration-parameters/settings.md#max-table-size-to-drop). Sin embargo, estas salvaguardas no cubren todos los casos posibles y pueden eludirse.
 
 Para mitigar eficazmente los posibles errores humanos, debe preparar cuidadosamente una estrategia para realizar copias de seguridad y restaurar sus datos **previamente**.
 
diff --git a/docs/fr/operations/backup.md b/docs/fr/operations/backup.md
index 9a463372947..953a96a04eb 100644
--- a/docs/fr/operations/backup.md
+++ b/docs/fr/operations/backup.md
@@ -7,7 +7,7 @@ toc_title: "La Sauvegarde Des Donn\xE9es"
 
 # La Sauvegarde Des Données {#data-backup}
 
-Alors [réplication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [vous ne pouvez pas simplement supprimer des tables avec un moteur de type MergeTree contenant plus de 50 Go de données](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Toutefois, ces garanties ne couvrent pas tous les cas possibles et peuvent être contournés.
+Alors [réplication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [vous ne pouvez pas simplement supprimer des tables avec un moteur de type MergeTree contenant plus de 50 Go de données](server-configuration-parameters/settings.md#max-table-size-to-drop). Toutefois, ces garanties ne couvrent pas tous les cas possibles et peuvent être contournés.
 
 Afin d'atténuer efficacement les erreurs humaines possibles, vous devez préparer soigneusement une stratégie de sauvegarde et de restauration de vos données **préalablement**.
 
diff --git a/docs/ja/operations/backup.md b/docs/ja/operations/backup.md
index 994271371a4..b0cde00e23c 100644
--- a/docs/ja/operations/backup.md
+++ b/docs/ja/operations/backup.md
@@ -7,7 +7,7 @@ toc_title: "\u30C7\u30FC\u30BF\u30D0\u30C3\u30AF\u30A2"
 
 # データバックア {#data-backup}
 
-ながら [複製](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [50Gbを超えるデータを含むMergeTreeのようなエンジンでは、テーブルを削除することはできません](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). しかし、これらの保障措置がカバーしないすべてのケースで回避.
+ながら [複製](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [50Gbを超えるデータを含むMergeTreeのようなエンジンでは、テーブルを削除することはできません](server-configuration-parameters/settings.md#max-table-size-to-drop). しかし、これらの保障措置がカバーしないすべてのケースで回避.
 
 ヒューマンエラーを効果的に軽減するには、データのバックアップと復元のための戦略を慎重に準備する必要があります **事前に**.
 
diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md
index 4d71dca46a7..1b211259bbb 100644
--- a/docs/ru/development/style.md
+++ b/docs/ru/development/style.md
@@ -714,6 +714,7 @@ auto s = std::string{"Hello"};
 ### Пользовательская ошибка {#error-messages-user-error}
 
 Такая ошибка вызвана действиями пользователя (неверный синтаксис запроса) или конфигурацией внешних систем (кончилось место на диске). Предполагается, что пользователь может устранить её самостоятельно. Для этого в сообщении об ошибке должна содержаться следующая информация:
+
 * что произошло. Это должно объясняться в пользовательских терминах (`Function pow() is not supported for data type UInt128`), а не загадочными конструкциями из кода (`runtime overload resolution failed in DB::BinaryOperationBuilder<FunctionAdaptor<pow>::Impl, UInt128, Int8>::kaboongleFastPath()`).
 * почему/где/когда -- любой контекст, который помогает отладить проблему. Представьте, как бы её отлаживали вы (программировать и пользоваться отладчиком нельзя).
 * что можно предпринять для устранения ошибки. Здесь можно перечислить типичные причины проблемы, настройки, влияющие на это поведение, и так далее.
diff --git a/docs/ru/engines/database-engines/materialize-mysql.md b/docs/ru/engines/database-engines/materialize-mysql.md
index f23ac0cddd6..3022542e294 100644
--- a/docs/ru/engines/database-engines/materialize-mysql.md
+++ b/docs/ru/engines/database-engines/materialize-mysql.md
@@ -93,6 +93,7 @@ DDL-запросы в MySQL конвертируются в соответств
 - Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializeMySQL`.
 - Репликация может быть легко нарушена.
 - Прямые операции изменения данных в таблицах и базах данных `MaterializeMySQL` запрещены.
+- На работу `MaterializeMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializeMySQL`.
 
 ## Примеры использования {#examples-of-use}
 
@@ -156,4 +157,4 @@ SELECT * FROM mysql.test;
 └───┴─────┴──────┘
 ```
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/database_engines/materialize-mysql/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/database-engines/materialize-mysql/) <!--hide-->
diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
new file mode 100644
index 00000000000..9b68bcfc770
--- /dev/null
+++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
@@ -0,0 +1,44 @@
+---
+toc_priority: 6
+toc_title: EmbeddedRocksDB
+---
+
+# Движок EmbeddedRocksDB {#EmbeddedRocksDB-engine}
+
+Этот движок позволяет интегрировать ClickHouse с [rocksdb](http://rocksdb.org/).
+
+## Создание таблицы {#table_engine-EmbeddedRocksDB-creating-a-table}
+
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
+(
+    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
+    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
+    ...
+) ENGINE = EmbeddedRocksDB 
+PRIMARY KEY(primary_key_name);
+```
+
+Обязательные параметры:
+
+- `primary_key_name` может быть любое имя столбца из списка столбцов.
+- Указание первичного ключа `primary key` является обязательным. Он будет сериализован в двоичном формате как ключ `rocksdb`. 
+- Поддерживается только один столбец в первичном ключе.
+- Столбцы, которые отличаются от первичного ключа, будут сериализованы в двоичном формате как значение `rockdb` в соответствующем порядке. 
+- Запросы с фильтрацией по ключу `equals` или `in` оптимизируются для поиска по нескольким ключам из `rocksdb`.
+
+Пример:
+
+``` sql
+CREATE TABLE test
+(
+    `key` String,
+    `v1` UInt32,
+    `v2` String,
+    `v3` Float32,
+)
+ENGINE = EmbeddedRocksDB
+PRIMARY KEY key;
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/embedded-rocksdb/) <!--hide-->
\ No newline at end of file
diff --git a/docs/ru/engines/table-engines/integrations/index.md b/docs/ru/engines/table-engines/integrations/index.md
index 02189cf9e55..db7e527442e 100644
--- a/docs/ru/engines/table-engines/integrations/index.md
+++ b/docs/ru/engines/table-engines/integrations/index.md
@@ -12,7 +12,10 @@ toc_priority: 30
 -   [ODBC](../../../engines/table-engines/integrations/odbc.md)
 -   [JDBC](../../../engines/table-engines/integrations/jdbc.md)
 -   [MySQL](../../../engines/table-engines/integrations/mysql.md)
+-   [MongoDB](../../../engines/table-engines/integrations/mongodb.md)
 -   [HDFS](../../../engines/table-engines/integrations/hdfs.md)
 -   [Kafka](../../../engines/table-engines/integrations/kafka.md)
+-   [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
+-   [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/) <!--hide-->
diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md
new file mode 100644
index 00000000000..0765b3909de
--- /dev/null
+++ b/docs/ru/engines/table-engines/integrations/mongodb.md
@@ -0,0 +1,57 @@
+---
+toc_priority: 7
+toc_title: MongoDB
+---
+
+# MongoDB {#mongodb}
+
+Движок таблиц MongoDB позволяет читать данные из коллекций СУБД MongoDB. В таблицах допустимы только плоские (не вложенные) типы данных. Запись (`INSERT`-запросы) не поддерживается.
+
+## Создание таблицы {#creating-a-table}
+
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name
+(
+    name1 [type1],
+    name2 [type2],
+    ...
+) ENGINE = MongoDB(host:port, database, collection, user, password);
+```
+
+**Параметры движка**
+
+-   `host:port` — адрес сервера MongoDB.
+
+-   `database` — имя базы данных на удалённом сервере.
+
+-   `collection` — имя коллекции на удалённом сервере.
+
+-   `user` — пользователь MongoDB.
+
+-   `password` — пароль пользователя.
+
+## Примеры использования {#usage-example}
+
+Таблица в ClickHouse для чтения данных из колекции MongoDB:
+
+``` text
+CREATE TABLE mongo_table
+(
+    key UInt64, 
+    data String
+) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
+```
+
+Запрос к таблице:
+
+``` sql
+SELECT COUNT() FROM mongo_table;
+```
+
+``` text
+┌─count()─┐
+│       4 │
+└─────────┘
+```
+
+[Original article](https://clickhouse.tech/docs/ru/operations/table_engines/integrations/mongodb/) <!--hide-->
diff --git a/docs/ru/engines/table-engines/integrations/rabbitmq.md b/docs/ru/engines/table-engines/integrations/rabbitmq.md
index dedb5842d68..f55163c1988 100644
--- a/docs/ru/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/ru/engines/table-engines/integrations/rabbitmq.md
@@ -52,10 +52,26 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 -   `rabbitmq_max_block_size`
 -   `rabbitmq_flush_interval_ms`
 
-Требуемая конфигурация:
+Настройки форматов данных также могут быть добавлены в списке RabbitMQ настроек.
+
+Example:
+
+``` sql
+  CREATE TABLE queue (
+    key UInt64,
+    value UInt64,
+    date DateTime
+  ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
+                            rabbitmq_exchange_name = 'exchange1',
+                            rabbitmq_format = 'JSONEachRow',
+                            rabbitmq_num_consumers = 5,
+                            date_time_input_format = 'best_effort';
+```
 
 Конфигурация сервера RabbitMQ добавляется с помощью конфигурационного файла ClickHouse.
 
+Требуемая конфигурация:
+
 ``` xml
  <rabbitmq>
     <username>root</username>
@@ -63,16 +79,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
  </rabbitmq>
 ```
 
-Example:
+Дополнительная конфигурация:
 
-``` sql
-  CREATE TABLE queue (
-    key UInt64,
-    value UInt64
-  ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
-                            rabbitmq_exchange_name = 'exchange1',
-                            rabbitmq_format = 'JSONEachRow',
-                            rabbitmq_num_consumers = 5;
+``` xml
+ <rabbitmq>
+    <vhost>clickhouse</vhost>
+ </rabbitmq>
 ```
 
 ## Описание {#description}
@@ -98,6 +110,7 @@ Example:
 -   `consistent_hash` - данные равномерно распределяются между всеми связанными таблицами, где имя точки обмена совпадает. Обратите внимание, что этот тип обмена должен быть включен с помощью плагина RabbitMQ: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`.
 
 Настройка `rabbitmq_queue_base` может быть использована в следующих случаях:
+
 1.   чтобы восстановить чтение из ранее созданных очередей, если оно прекратилось по какой-либо причине, но очереди остались непустыми. Для восстановления чтения из одной конкретной очереди, нужно написать ее имя в `rabbitmq_queue_base` настройку и не указывать настройки `rabbitmq_num_consumers` и `rabbitmq_num_queues`. Чтобы восстановить чтение из всех очередей, которые были созданы для конкретной таблицы, необходимо совпадение следующих настроек: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. По умолчанию, если настройка `rabbitmq_queue_base` не указана, будут использованы уникальные для каждой таблицы имена очередей.
 2.   чтобы объявить одни и те же очереди для разных таблиц, что позволяет создавать несколько параллельных подписчиков на каждую из очередей. То есть обеспечивается лучшая производительность. В данном случае, для таких таблиц также необходимо совпадение настроек: `rabbitmq_num_consumers`, `rabbitmq_num_queues`.
 3.   чтобы повторно использовать созданные c `durable` настройкой очереди, так как они не удаляются автоматически (но могут быть удалены с помощью любого RabbitMQ CLI).
diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
index 9b2a5eafca3..6fc566b7c31 100644
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@@ -37,7 +37,10 @@ ORDER BY expr
 [PARTITION BY expr]
 [PRIMARY KEY expr]
 [SAMPLE BY expr]
-[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
+[TTL expr 
+    [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ]
+    [WHERE conditions] 
+    [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ] 
 [SETTINGS name=value, ...]
 ```
 
@@ -71,7 +74,7 @@ ORDER BY expr
     
     Выражение должно возвращать столбец `Date` или `DateTime`. Пример: `TTL date + INTERVAL 1 DAY`.   
 
-    Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` указывает действие, которое будет выполнено с частью, удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`). Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`.
+    Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` указывает действие, которое будет выполнено с частью: удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`), или агрегирование данных в устаревших строках. Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`.
     
     Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](#table_engine-mergetree-ttl)
 
@@ -91,6 +94,7 @@ ORDER BY expr
 	-   `max_parts_in_total` — максимальное количество кусков во всех партициях.
 	-   `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
 	-   `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
+    -   `max_partitions_to_read` — Ограничивает максимальное число партиций для чтения в одном запросе. Также возможно указать настройку [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) в глобальных настройках.
 
 **Пример задания секций**
 
@@ -443,16 +447,28 @@ ALTER TABLE example_table
 Для таблицы можно задать одно выражение для устаревания данных, а также несколько выражений, по срабатывании которых данные переместятся на [некоторый диск или том](#table_engine-mergetree-multiple-volumes). Когда некоторые данные в таблице устаревают, ClickHouse удаляет все соответствующие строки.
 
 ``` sql
-TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ...
+TTL expr 
+    [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
+    [WHERE conditions] 
+    [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] 
 ```
 
 За каждым TTL выражением может следовать тип действия, которое выполняется после достижения времени, соответствующего результату TTL выражения:
 
 -   `DELETE` - удалить данные (действие по умолчанию);
 -   `TO DISK 'aaa'` - переместить данные на диск `aaa`;
--   `TO VOLUME 'bbb'` - переместить данные на том `bbb`.
+-   `TO VOLUME 'bbb'` - переместить данные на том `bbb`;
+-   `GROUP BY` -  агрегировать данные.
 
-Примеры:
+В секции `WHERE` можно задать условие удаления или агрегирования устаревших строк (для перемещения условие `WHERE` не применимо).
+
+Колонки, по которым агрегируются данные в `GROUP BY`, должны являться префиксом первичного ключа таблицы. 
+
+Если колонка не является частью выражения `GROUP BY` и не задается напрямую в секции `SET`, в результирующих строках она будет содержать случайное значение, взятое из одной из сгруппированных строк (как будто к ней применяется агрегирующая функция `any`).
+
+**Примеры**
+
+Создание таблицы с TTL: 
 
 ``` sql
 CREATE TABLE example_table
@@ -468,13 +484,43 @@ TTL d + INTERVAL 1 MONTH [DELETE],
     d + INTERVAL 2 WEEK TO DISK 'bbb';
 ```
 
-Изменение TTL
+Изменение TTL:
 
 ``` sql
 ALTER TABLE example_table
     MODIFY TTL d + INTERVAL 1 DAY;
 ```
 
+Создание таблицы, в которой строки устаревают через месяц. Устаревшие строки удаляются, если дата выпадает на понедельник:
+
+``` sql
+CREATE TABLE table_with_where
+(
+    d DateTime, 
+    a Int
+)
+ENGINE = MergeTree
+PARTITION BY toYYYYMM(d)
+ORDER BY d
+TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1;
+```
+
+Создание таблицы, где устаревшие строки агрегируются. В результирующих строках колонка `x` содержит максимальное значение по сгруппированным строкам, `y` — минимальное значение, а `d` — случайное значение из одной из сгуппированных строк.
+
+``` sql
+CREATE TABLE table_for_aggregation
+(
+    d DateTime, 
+    k1 Int, 
+    k2 Int, 
+    x Int, 
+    y Int
+)
+ENGINE = MergeTree
+ORDER BY k1, k2
+TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
+```
+
 **Удаление данных**
 
 Данные с истекшим TTL удаляются, когда ClickHouse мёржит куски данных.
@@ -666,4 +712,4 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
 
 После выполнения фоновых слияний или мутаций старые куски не удаляются сразу, а через некоторое время (табличная настройка `old_parts_lifetime`). Также они не перемещаются на другие тома или диски, поэтому до момента удаления они продолжают учитываться при подсчёте занятого дискового пространства.
 
-[Оригинальная статья](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/mergetree-family/mergetree/) <!--hide-->
diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md
index 0dcb6fd307d..165b54d9b62 100644
--- a/docs/ru/operations/backup.md
+++ b/docs/ru/operations/backup.md
@@ -5,7 +5,7 @@ toc_title: "\u0420\u0435\u0437\u0435\u0440\u0432\u043d\u043e\u0435\u0020\u043a\u
 
 # Резервное копирование данных {#rezervnoe-kopirovanie-dannykh}
 
-[Репликация](../engines/table-engines/mergetree-family/replication.md) обеспечивает защиту от аппаратных сбоев, но не защищает от человеческих ошибок: случайного удаления данных, удаления не той таблицы, которую надо было, или таблицы на не том кластере, а также программных ошибок, которые приводят к неправильной обработке данных или их повреждению. Во многих случаях подобные ошибки влияют на все реплики. ClickHouse имеет встроенные средства защиты для предотвращения некоторых типов ошибок — например, по умолчанию [не получится удалить таблицы \*MergeTree, содержащие более 50 Гб данных, одной командой](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Однако эти средства защиты не охватывают все возможные случаи и могут быть обойдены.
+[Репликация](../engines/table-engines/mergetree-family/replication.md) обеспечивает защиту от аппаратных сбоев, но не защищает от человеческих ошибок: случайного удаления данных, удаления не той таблицы, которую надо было, или таблицы на не том кластере, а также программных ошибок, которые приводят к неправильной обработке данных или их повреждению. Во многих случаях подобные ошибки влияют на все реплики. ClickHouse имеет встроенные средства защиты для предотвращения некоторых типов ошибок — например, по умолчанию [не получится удалить таблицы \*MergeTree, содержащие более 50 Гб данных, одной командой](server-configuration-parameters/settings.md#max-table-size-to-drop). Однако эти средства защиты не охватывают все возможные случаи и могут быть обойдены.
 
 Для того чтобы эффективно уменьшить возможные человеческие ошибки, следует тщательно подготовить стратегию резервного копирования и восстановления данных **заранее**.
 
diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md
index e78d4c98683..bfc0b0a2644 100644
--- a/docs/ru/operations/settings/merge-tree-settings.md
+++ b/docs/ru/operations/settings/merge-tree-settings.md
@@ -181,4 +181,16 @@ Eсли суммарное число активных кусков во все
 
 При старте ClickHouse читает все куски всех таблиц (читает файлы с метаданными кусков), чтобы построить в ОЗУ список всех кусков. В некоторых системах с большим количеством кусков этот процесс может занимать длительное время, и это время можно сократить, увеличив `max_part_loading_threads` (если при этом процессе есть недозагруженность CPU и диска).
 
-{## [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/merge-tree-settings/) ##}
+## max_partitions_to_read {#max-partitions-to-read}
+
+Ограничивает максимальное число партиций для чтения в одном запросе.
+
+Указанное при создании таблицы значение настройки может быть переназначено настройкой на уровне запроса.
+
+Возможные значения:
+
+-   Любое положительное целое число.
+
+Значение по умолчанию: -1 (неограниченно).
+
+[Original article](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) <!--hide-->
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 73dc0b9d944..7322b6c9184 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -283,12 +283,10 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
 
 ## input_format_tsv_empty_as_default {#settings-input-format-tsv-empty-as-default}
 
-Если эта настройка включена, замените пустые поля ввода в TSV значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`.
+Если эта настройка включена, все пустые поля во входящем TSV заменяются значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`.
 
 По умолчанию отключена.
 
-Disabled by default.
-
 ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number}
 
 Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV.
@@ -406,21 +404,46 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102	2;
 
 Возможные значения:
 
--   `'best_effort'` — включает расширенный парсинг.
+-   `best_effort` — включает расширенный парсинг.
 
-ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `'2018-06-08T01:02:03.000Z'`.
+ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `2018-06-08T01:02:03.000Z`.
 
--   `'basic'` — используется базовый парсер.
+-   `basic` — используется базовый парсер.
 
-ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `'2019-08-20 10:18:56'` или `2019-08-20`.
+ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `2019-08-20 10:18:56` или `2019-08-20`.
 
-Значение по умолчанию: `'basic'`.
+Значение по умолчанию: `basic`.
 
 См. также:
 
 -   [Тип данных DateTime.](../../sql-reference/data-types/datetime.md)
 -   [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md)
 
+## date_time_output_format {#settings-date_time_output_format}
+
+Позволяет выбрать разные выходные форматы текстового представления даты и времени.
+
+Возможные значения:
+
+-   `simple` - простой выходной формат.
+
+    Выходные дата и время Clickhouse в формате `YYYY-MM-DD hh:mm:ss`. Например, `2019-08-20 10:18:56`. Расчет выполняется в соответствии с часовым поясом типа данных (если он есть) или часовым поясом сервера.
+
+-   `iso` - выходной формат ISO.
+
+    Выходные дата и время Clickhouse в формате [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ`. Например, `2019-08-20T10:18:56Z`. Обратите внимание, что выходные данные отображаются в формате UTC (`Z` означает UTC).
+
+-   `unix_timestamp` - выходной формат Unix.
+
+    Выходные дата и время в формате [Unix](https://en.wikipedia.org/wiki/Unix_time). Например `1566285536`.
+
+Значение по умолчанию: `simple`.
+
+См. также:
+
+-   [Тип данных DateTime](../../sql-reference/data-types/datetime.md)
+-   [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
+
 ## join_default_strictness {#settings-join_default_strictness}
 
 Устанавливает строгость по умолчанию для [JOIN](../../sql-reference/statements/select/join.md#select-join).
@@ -683,7 +706,7 @@ ClickHouse использует этот параметр при чтении д
 
 Установка логирования запроса.
 
-Запросы, переданные в ClickHouse с этой установкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log).
+Запросы, переданные в ClickHouse с этой настройкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log).
 
 Пример:
 
@@ -1496,7 +1519,7 @@ ClickHouse генерирует исключение
 -   Тип: секунды
 -   Значение по умолчанию: 60 секунд
 
-Управляет скоростью обнуления ошибок в распределенных таблицах. Если реплика недоступна в течение некоторого времени, накапливает 5 ошибок, а distributed_replica_error_half_life установлена на 1 секунду, то реплика считается нормальной через 3 секунды после последней ошибки.
+Управляет скоростью обнуления счетчика ошибок в распределенных таблицах. Предположим, реплика остается недоступна в течение какого-то времени, и за этот период накопилось 5 ошибок. Если настройка `distributed_replica_error_half_life` установлена в значение 1 секунда, то реплика снова будет считаться доступной через 3 секунды после последней ошибки.
 
 См. также:
 
@@ -1648,7 +1671,7 @@ ClickHouse генерирует исключение
 -   Тип: bool
 -   Значение по умолчанию: True
 
-Обеспечивает параллельный анализ форматов данных с сохранением порядка. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow.
+Включает режим, при котором входящие данные парсятся параллельно, но с сохранением исходного порядка следования. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow.
 
 ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}
 
@@ -1962,7 +1985,7 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
 
 ## output_format_pretty_grid_charset {#output-format-pretty-grid-charset}
 
-Позволяет изменить кодировку, которая используется для печати грид-границ. Доступны следующие кодировки: UTF-8, ASCII.
+Позволяет изменить кодировку, которая используется для отрисовки таблицы при выводе результатов запросов. Доступны следующие кодировки: UTF-8, ASCII.
 
 **Пример**
 
@@ -2448,4 +2471,70 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
 
 Значение по умолчанию: `16`.
 
+## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability}
+
+Задает вероятность того, что ClickHouse начнет трассировку для выполненных запросов (если не указан [входящий контекст](https://www.w3.org/TR/trace-context/) трассировки).
+
+Возможные значения:
+
+-   0 — трассировка для выполненных запросов отключена (если не указан входящий контекст трассировки).
+-   Положительное число с плавающей точкой в диапазоне [0..1]. Например, при значении настройки, равной `0,5`, ClickHouse начнет трассировку в среднем для половины запросов.
+-   1 — трассировка для всех выполненных запросов включена.
+
+Значение по умолчанию: `0`.
+
+## optimize_on_insert {#optimize-on-insert}
+
+Включает или выключает преобразование данных перед добавлением в таблицу, как будто над добавляемым блоком предварительно было произведено слияние (в соответствии с движком таблицы).
+
+Возможные значения:
+
+-   0 — выключена
+-   1 — включена.
+
+Значение по умолчанию: 1.
+
+**Пример**
+
+Сравните добавление данных при включенной и выключенной настройке:
+
+Запрос:
+
+```sql
+SET optimize_on_insert = 1;
+
+CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable;
+
+INSERT INTO test1 SELECT number % 2 FROM numbers(5);
+
+SELECT * FROM test1;
+
+SET optimize_on_insert = 0;
+
+CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable;
+
+INSERT INTO test2 SELECT number % 2 FROM numbers(5);
+
+SELECT * FROM test2;
+```
+
+Результат:
+
+``` text
+┌─FirstTable─┐
+│          0 │
+│          1 │
+└────────────┘
+
+┌─SecondTable─┐
+│           0 │
+│           0 │
+│           0 │
+│           1 │
+│           1 │
+└─────────────┘
+```
+
+Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
diff --git a/docs/ru/operations/system-tables/distributed_ddl_queue.md b/docs/ru/operations/system-tables/distributed_ddl_queue.md
new file mode 100644
index 00000000000..058ed06f639
--- /dev/null
+++ b/docs/ru/operations/system-tables/distributed_ddl_queue.md
@@ -0,0 +1,65 @@
+# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue}
+
+Содержит информацию о [распределенных ddl запросах (секция ON CLUSTER)](../../sql-reference/distributed-ddl.md), которые были выполнены на кластере.
+
+Столбцы:
+
+-   `entry` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса.
+-   `host_name` ([String](../../sql-reference/data-types/string.md)) — имя хоста.
+-   `host_address` ([String](../../sql-reference/data-types/string.md)) — IP-адрес хоста.
+-   `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт для соединения с сервером.
+-   `status` ([Enum8](../../sql-reference/data-types/enum.md)) — состояние запроса.
+-   `cluster` ([String](../../sql-reference/data-types/string.md)) — имя кластера.
+-   `query` ([String](../../sql-reference/data-types/string.md)) — выполненный запрос.
+-   `initiator` ([String](../../sql-reference/data-types/string.md)) — узел, выполнивший запрос.
+-   `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса.
+-   `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время окончания запроса.
+-   `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — продолжительность выполнения запроса (в миллисекундах).
+-   `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — код исключения из [ZooKeeper](../../operations/tips.md#zookeeper).
+
+**Пример**
+
+``` sql
+SELECT *
+FROM system.distributed_ddl_queue
+WHERE cluster = 'test_cluster'
+LIMIT 2
+FORMAT Vertical
+
+Query id: f544e72a-6641-43f1-836b-24baa1c9632a
+
+Row 1:
+──────
+entry:             query-0000000000
+host_name:         clickhouse01
+host_address:      172.23.0.11
+port:              9000
+status:            Finished
+cluster:           test_cluster
+query:             CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
+initiator:         clickhouse01:9000
+query_start_time:  2020-12-30 13:07:51
+query_finish_time: 2020-12-30 13:07:51
+query_duration_ms: 6
+exception_code:    ZOK
+
+Row 2:
+──────
+entry:             query-0000000000
+host_name:         clickhouse02
+host_address:      172.23.0.12
+port:              9000
+status:            Finished
+cluster:           test_cluster
+query:             CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
+initiator:         clickhouse01:9000
+query_start_time:  2020-12-30 13:07:51
+query_finish_time: 2020-12-30 13:07:51
+query_duration_ms: 6
+exception_code:    ZOK
+
+2 rows in set. Elapsed: 0.025 sec.
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->
+ 
\ No newline at end of file
diff --git a/docs/ru/operations/system-tables/opentelemetry_span_log.md b/docs/ru/operations/system-tables/opentelemetry_span_log.md
new file mode 100644
index 00000000000..96555064b0e
--- /dev/null
+++ b/docs/ru/operations/system-tables/opentelemetry_span_log.md
@@ -0,0 +1,49 @@
+# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log}
+
+Содержит информацию о [trace spans](https://opentracing.io/docs/overview/spans/) для выполненных запросов.
+
+Столбцы:
+
+-   `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — идентификатор трассировки для выполненного запроса.
+
+-   `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор `trace span`.
+
+-   `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор родительского `trace span`.
+
+-   `operation_name` ([String](../../sql-reference/data-types/string.md)) — имя операции.
+
+-   `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время начала `trace span` (в микросекундах).
+
+-   `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время окончания `trace span` (в микросекундах).
+
+-   `finish_date` ([Date](../../sql-reference/data-types/date.md)) — дата окончания `trace span`.
+
+-   `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — имена [атрибутов](https://opentelemetry.io/docs/go/instrumentation/#attributes) в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте [OpenTelemetry](https://opentelemetry.io/).
+
+-   `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — значения атрибутов в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте `OpenTelemetry`.
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical;
+```
+
+Результат:
+
+``` text
+Row 1:
+──────
+trace_id:         cdab0847-0d62-61d5-4d38-dd65b19a1914
+span_id:          701487461015578150
+parent_span_id:   2991972114672045096
+operation_name:   DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl()
+start_time_us:    1612374594529090
+finish_time_us:   1612374594529108
+finish_date:      2021-02-03
+attribute.names:  []
+attribute.values: []
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/opentelemetry_span_log) <!--hide-->
diff --git a/docs/ru/operations/system-tables/part_log.md b/docs/ru/operations/system-tables/part_log.md
index 255ece76ee2..bba4fda6135 100644
--- a/docs/ru/operations/system-tables/part_log.md
+++ b/docs/ru/operations/system-tables/part_log.md
@@ -6,29 +6,62 @@
 
 Столбцы:
 
--   `event_type` (Enum) — тип события. Столбец может содержать одно из следующих значений:
+-   `query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса `INSERT`, создавшего этот кусок.
+-   `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип события. Столбец может содержать одно из следующих значений:
     -   `NEW_PART` — вставка нового куска.
     -   `MERGE_PARTS` — слияние кусков.
     -   `DOWNLOAD_PART` — загрузка с реплики.
     -   `REMOVE_PART` — удаление или отсоединение из таблицы с помощью [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
     -   `MUTATE_PART` — изменение куска.
     -   `MOVE_PART` — перемещение куска между дисками.
--   `event_date` (Date) — дата события.
--   `event_time` (DateTime) — время события.
--   `duration_ms` (UInt64) — длительность.
--   `database` (String) — имя базы данных, в которой находится кусок.
--   `table` (String) — имя таблицы, в которой находится кусок.
--   `part_name` (String) — имя куска.
--   `partition_id` (String) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение ‘all’, если таблица партициируется по выражению `tuple()`.
--   `rows` (UInt64) — число строк в куске.
--   `size_in_bytes` (UInt64) — размер куска данных в байтах.
--   `merged_from` (Array(String)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска).
--   `bytes_uncompressed` (UInt64) — количество прочитанных разжатых байт.
--   `read_rows` (UInt64) — сколько было прочитано строк при слиянии кусков.
--   `read_bytes` (UInt64) — сколько было прочитано байт при слиянии кусков.
--   `error` (UInt16) — код ошибки, возникшей при текущем событии.
--   `exception` (String) — текст ошибки.
+-   `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события.
+-   `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события.
+-   `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — длительность.
+-   `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится кусок.
+-   `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы, в которой находится кусок.
+-   `part_name` ([String](../../sql-reference/data-types/string.md)) — имя куска.
+-   `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение `all`, если таблица партициируется по выражению `tuple()`.
+-   `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к папке с файлами кусков данных.
+-   `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — число строк в куске.
+-   `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер куска данных в байтах.
+-   `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска).
+-   `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество прочитанных не сжатых байт.
+-   `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано строк при слиянии кусков.
+-   `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано байт при слиянии кусков.
+-   `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между выделенной и освобождённой памятью в контексте потока.
+-   `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — код ошибки, возникшей при текущем событии.
+-   `exception` ([String](../../sql-reference/data-types/string.md)) — текст ошибки.
 
 Системная таблица `system.part_log` будет создана после первой вставки данных в таблицу `MergeTree`.
 
+**Пример**
+
+``` sql
+SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical;
+```
+
+``` text
+Row 1:
+──────
+query_id:                      983ad9c7-28d5-4ae1-844e-603116b7de31
+event_type:                    NewPart
+event_date:                    2021-02-02
+event_time:                    2021-02-02 11:14:28
+duration_ms:                   35
+database:                      default
+table:                         log_mt_2
+part_name:                     all_1_1_0
+partition_id:                  all
+path_on_disk:                  db/data/default/log_mt_2/all_1_1_0/
+rows:                          115418
+size_in_bytes:                 1074311
+merged_from:                   []
+bytes_uncompressed:            0
+read_rows:                     0
+read_bytes:                    0
+peak_memory_usage:             0
+error:                         0
+exception:                   
+```
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/part_log) <!--hide-->
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
index 97edd5773c8..f44e65831a9 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
@@ -4,8 +4,63 @@ toc_priority: 106
 
 # argMax {#agg-function-argmax}
 
-Синтаксис: `argMax(arg, val)`
+Вычисляет значение `arg` при максимальном значении `val`. Если есть несколько разных значений `arg` для максимальных значений `val`, возвращает первое попавшееся из таких значений.
 
-Вычисляет значение arg при максимальном значении val. Если есть несколько разных значений arg для максимальных значений val, то выдаётся первое попавшееся из таких значений.
+Если функции передан кортеж, то будет выведен кортеж с максимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
 
-[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
+**Синтаксис**
+
+``` sql
+argMax(arg, val)
+```
+
+или
+
+``` sql
+argMax(tuple(arg, val))
+```
+
+**Параметры**
+
+-   `arg` — аргумент.
+-   `val` — значение.
+
+**Возвращаемое значение**
+
+-   Значение `arg`, соответствующее максимальному значению `val`.
+
+Тип: соответствует типу `arg`. 
+
+Если передан кортеж:
+
+-   Кортеж `(arg, val)` c максимальным значением `val` и соответствующим ему `arg`.
+
+Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Пример**
+
+Исходная таблица:
+
+``` text
+┌─user─────┬─salary─┐
+│ director │   5000 │
+│ manager  │   3000 │
+│ worker   │   1000 │
+└──────────┴────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
+```
+
+Результат:
+
+``` text
+┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
+│ director             │ ('director',5000)           │
+└──────────────────────┴─────────────────────────────┘
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
index 58161cd226a..8c25b79f92a 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
@@ -4,11 +4,42 @@ toc_priority: 105
 
 # argMin {#agg-function-argmin}
 
-Синтаксис: `argMin(arg, val)`
+Вычисляет значение `arg` при минимальном значении `val`. Если есть несколько разных значений `arg` для минимальных значений `val`, возвращает первое попавшееся из таких значений.
 
-Вычисляет значение arg при минимальном значении val. Если есть несколько разных значений arg для минимальных значений val, то выдаётся первое попавшееся из таких значений.
+Если функции передан кортеж, то будет выведен кортеж с минимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
 
-**Пример:**
+**Синтаксис**
+
+``` sql
+argMin(arg, val)
+```
+
+или
+
+``` sql
+argMin(tuple(arg, val))
+```
+
+**Параметры**
+
+-   `arg` — аргумент.
+-   `val` — значение.
+
+**Возвращаемое значение**
+
+-   Значение `arg`, соответствующее минимальному значению `val`.
+
+Тип: соответствует типу `arg`. 
+
+Если передан кортеж:
+
+-   Кортеж `(arg, val)` c минимальным значением `val` и соответствующим ему `arg`.
+
+Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Пример**
+
+Исходная таблица:
 
 ``` text
 ┌─user─────┬─salary─┐
@@ -18,14 +49,18 @@ toc_priority: 105
 └──────────┴────────┘
 ```
 
+Запрос:
+
 ``` sql
-SELECT argMin(user, salary) FROM salary
+SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary;
 ```
 
+Результат:
+
 ``` text
-┌─argMin(user, salary)─┐
-│ worker               │
-└──────────────────────┘
+┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐
+│ worker               │ ('worker',1000)             │
+└──────────────────────┴─────────────────────────────┘
 ```
 
-[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
new file mode 100644
index 00000000000..a4647ecfb34
--- /dev/null
+++ b/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
@@ -0,0 +1,72 @@
+---
+toc_priority: 310
+toc_title: mannWhitneyUTest
+---
+
+# mannWhitneyUTest {#mannwhitneyutest}
+
+Вычисляет U-критерий Манна — Уитни для выборок из двух генеральных совокупностей.
+
+**Синтаксис**
+
+``` sql
+mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index)
+```
+
+Значения выборок берутся из столбца `sample_data`. Если  `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
+Проверяется нулевая гипотеза, что генеральные совокупности стохастически равны. Наряду с двусторонней гипотезой могут быть проверены и односторонние.
+Для применения U-критерия Манна — Уитни закон распределения генеральных совокупностей не обязан быть нормальным.
+
+**Параметры**
+
+-   `alternative` — альтернативная гипотеза. (Необязательный параметр, по умолчанию: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
+    -   `'two-sided'`;
+    -   `'greater'`;
+    -   `'less'`.
+-   `continuity_correction` - если не 0, то при вычислении p-значения применяется коррекция непрерывности. (Необязательный параметр, по умолчанию: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+
+**Возвращаемые значения**
+
+[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
+
+-   вычисленное значение критерия Манна — Уитни. [Float64](../../../sql-reference/data-types/float.md).
+-   вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Пример**
+
+Таблица:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│          10 │            0 │
+│          11 │            0 │
+│          12 │            0 │
+│           1 │            1 │
+│           2 │            1 │
+│           3 │            1 │
+└─────────────┴──────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest;
+```
+
+Результат:
+
+``` text
+┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐
+│ (9,0.04042779918503192)                                │
+└────────────────────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [U-критерий Манна — Уитни](https://ru.wikipedia.org/wiki/U-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%9C%D0%B0%D0%BD%D0%BD%D0%B0_%E2%80%94_%D0%A3%D0%B8%D1%82%D0%BD%D0%B8)
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest/) <!--hide-->
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md b/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md
new file mode 100644
index 00000000000..77378de95d1
--- /dev/null
+++ b/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md
@@ -0,0 +1,66 @@
+---
+toc_priority: 300
+toc_title: studentTTest
+---
+
+# studentTTest {#studentttest}
+
+Вычисляет t-критерий Стьюдента для выборок из двух генеральных совокупностей. 
+
+**Синтаксис**
+
+``` sql
+studentTTest(sample_data, sample_index)
+```
+
+Значения выборок берутся из столбца `sample_data`. Если  `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
+Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Стьюдента распределение в генеральных совокупностях должно быть нормальным и дисперсии должны совпадать.
+
+**Параметры**
+
+-   `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
+
+-   вычисленное значение критерия Стьюдента. [Float64](../../../sql-reference/data-types/float.md).
+-   вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Пример**
+
+Таблица:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│        20.3 │            0 │
+│        21.1 │            0 │
+│        21.9 │            1 │
+│        21.7 │            0 │
+│        19.9 │            1 │
+│        21.8 │            1 │
+└─────────────┴──────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT studentTTest(sample_data, sample_index) FROM student_ttest;
+```
+
+Результат:
+
+``` text
+┌─studentTTest(sample_data, sample_index)───┐
+│ (-0.21739130434783777,0.8385421208415731) │
+└───────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [t-критерий Стьюдента](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A1%D1%82%D1%8C%D1%8E%D0%B4%D0%B5%D0%BD%D1%82%D0%B0)
+-   [welchTTest](welchttest.md#welchttest)
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/studentttest/) <!--hide-->
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md b/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md
new file mode 100644
index 00000000000..16c122d1b49
--- /dev/null
+++ b/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md
@@ -0,0 +1,66 @@
+---
+toc_priority: 301
+toc_title: welchTTest
+---
+
+# welchTTest {#welchttest}
+
+Вычисляет t-критерий Уэлча для выборок из двух генеральных совокупностей. 
+
+**Синтаксис**
+
+``` sql
+welchTTest(sample_data, sample_index)
+```
+
+Значения выборок берутся из столбца `sample_data`. Если  `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
+Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Уэлча распределение в генеральных совокупностях должно быть нормальным. Дисперсии могут не совпадать.
+
+**Параметры**
+
+-   `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
+
+-   вычисленное значение критерия Уэлча. [Float64](../../../sql-reference/data-types/float.md).
+-   вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Пример**
+
+Таблица:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│        20.3 │            0 │
+│        22.1 │            0 │
+│        21.9 │            0 │
+│        18.9 │            1 │
+│        20.3 │            1 │
+│          19 │            1 │
+└─────────────┴──────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT welchTTest(sample_data, sample_index) FROM welch_ttest;
+```
+
+Результат:
+
+``` text
+┌─welchTTest(sample_data, sample_index)─────┐
+│ (2.7988719532211235,0.051807360348581945) │
+└───────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [t-критерий Уэлча](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A3%D1%8D%D0%BB%D1%87%D0%B0)
+-   [studentTTest](studentttest.md#studentttest)
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/welchTTest/) <!--hide-->
diff --git a/docs/ru/sql-reference/data-types/array.md b/docs/ru/sql-reference/data-types/array.md
index 906246b66ee..86a23ed041b 100644
--- a/docs/ru/sql-reference/data-types/array.md
+++ b/docs/ru/sql-reference/data-types/array.md
@@ -47,6 +47,8 @@ SELECT [1, 2] AS x, toTypeName(x)
 
 ## Особенности работы с типами данных {#osobennosti-raboty-s-tipami-dannykh}
 
+Максимальный размер массива ограничен одним миллионом элементов.
+
 При создании массива «на лету» ClickHouse автоматически определяет тип аргументов как наиболее узкий тип данных, в котором можно хранить все перечисленные аргументы. Если среди аргументов есть [NULL](../../sql-reference/data-types/array.md#null-literal) или аргумент типа [Nullable](nullable.md#data_type-nullable), то тип элементов массива — [Nullable](nullable.md).
 
 Если ClickHouse не смог подобрать тип данных, то он сгенерирует исключение. Это произойдёт, например, при попытке создать массив одновременно со строками и числами `SELECT array(1, 'a')`.
diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md
index 9894fa2802b..ffdf83e5bd0 100644
--- a/docs/ru/sql-reference/data-types/datetime.md
+++ b/docs/ru/sql-reference/data-types/datetime.md
@@ -27,7 +27,7 @@ DateTime([timezone])
 
 Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`.
 
-ClickHouse отображает значения типа `DateTime` в формате `YYYY-MM-DD hh:mm:ss`. Отображение можно поменять с помощью функции [formatDateTime](../../sql-reference/data-types/datetime.md#formatdatetime).
+ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime).
 
 При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format).
 
diff --git a/docs/ru/sql-reference/data-types/map.md b/docs/ru/sql-reference/data-types/map.md
new file mode 100644
index 00000000000..6cb8ccf1143
--- /dev/null
+++ b/docs/ru/sql-reference/data-types/map.md
@@ -0,0 +1,69 @@
+---
+toc_priority: 65
+toc_title: Map(key, value)
+---
+
+# Map(key, value) {#data_type-map}
+
+Тип данных `Map(key, value)` хранит пары `ключ:значение`. 
+
+**Параметры** 
+-   `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md).
+
+!!! warning "Предупреждение"
+    Сейчас использование типа данных `Map` является экспериментальной возможностью. Чтобы использовать этот тип данных, включите настройку `allow_experimental_map_type = 1`.
+
+Чтобы получить значение из колонки `a Map('key', 'value')`, используйте синтаксис `a['key']`. В настоящее время такая подстановка работает по алгоритму с линейной сложностью.
+
+**Примеры**
+
+Рассмотрим таблицу:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
+INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30});
+```
+
+Выборка всех значений ключа `key2`: 
+
+```sql
+SELECT a['key2'] FROM table_map;
+```
+Результат:
+
+```text
+┌─arrayElement(a, 'key2')─┐
+│                      10 │
+│                      20 │
+│                      30 │
+└─────────────────────────┘
+```
+
+Если для какого-то ключа `key` в колонке с типом `Map()` нет значения, запрос возвращает нули для числовых колонок, пустые строки или пустые массивы. 
+
+```sql
+INSERT INTO table_map VALUES ({'key3':100}), ({});
+SELECT a['key3'] FROM table_map;
+```
+
+Результат:
+
+```text
+┌─arrayElement(a, 'key3')─┐
+│                     100 │
+│                       0 │
+└─────────────────────────┘
+┌─arrayElement(a, 'key3')─┐
+│                       0 │
+│                       0 │
+│                       0 │
+└─────────────────────────┘
+```
+
+**См. также**
+
+-   функция [map()](../../sql-reference/functions/tuple-map-functions.md#function-map)
+-   функция [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast)
+
+[Original article](https://clickhouse.tech/docs/ru/data-types/map/) <!--hide-->
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
index fc4a3ac7285..f6b8b670563 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
@@ -205,8 +205,8 @@ RANGE(MIN first MAX last)
 Особенности алгоритма:
 
 -   Если не найден `id` или для найденного `id` не найден диапазон, то возвращается значение по умолчанию для словаря.
--   Если есть перекрывающиеся диапазоны, то можно использовать любой подходящий.
--   Если граница диапазона `NULL` или некорректная дата (1900-01-01, 2039-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон.
+-   Если есть перекрывающиеся диапазоны, то возвращается значение из любого (случайного) подходящего диапазона.
+-   Если граница диапазона `NULL` или некорректная дата (1900-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон.
 
 Пример конфигурации:
 
diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index 015d14b9de5..80057e6f0e0 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -1135,11 +1135,225 @@ SELECT
 
 Функция `arrayFirstIndex` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
 
-## arraySum(\[func,\] arr1, …) {#array-sum}
+## arrayMin {#array-min}
 
-Возвращает сумму значений функции `func`. Если функция не указана - просто возвращает сумму элементов массива.
+Возвращает значение минимального элемента в исходном массиве. 
 
-Функция `arraySum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию.
+Если передана функция `func`, возвращается минимум из элементов массива, преобразованных этой функцией.
+
+Функция `arrayMin` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию.
+
+**Синтаксис**
+
+```sql
+arrayMin([func,] arr)
+```
+
+**Параметры**
+
+-   `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — массив. [Array](../../sql-reference/data-types/array.md).
+
+**Возвращаемое значение**
+
+-   Минимальное значение функции (или минимальный элемент массива).
+
+Тип: если передана `func`, соответствует типу ее возвращаемого значения, иначе соответствует типу элементов массива.
+
+**Примеры**
+
+Запрос:
+
+```sql
+SELECT arrayMin([1, 2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│   1 │
+└─────┘
+```
+
+Запрос:
+
+```sql
+SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│  -4 │
+└─────┘
+```
+
+## arrayMax {#array-max}
+
+Возвращает значение максимального элемента в исходном массиве. 
+
+Если передана функция `func`, возвращается максимум из элементов массива, преобразованных этой функцией.
+
+Функция `arrayMax` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию.
+
+**Синтаксис**
+
+```sql
+arrayMax([func,] arr)
+```
+
+**Параметры**
+
+-   `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — массив. [Array](../../sql-reference/data-types/array.md).
+
+**Возвращаемое значение**
+
+-   Максимальное значение функции (или максимальный элемент массива).
+
+Тип: если передана `func`, соответствует типу ее возвращаемого значения, иначе соответствует типу элементов массива.
+
+**Примеры**
+
+Запрос:
+
+```sql
+SELECT arrayMax([1, 2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│   4 │
+└─────┘
+```
+
+Запрос:
+
+```sql
+SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│  -1 │
+└─────┘
+```
+
+## arraySum {#array-sum}
+
+Возвращает сумму элементов в исходном массиве. 
+
+Если передана функция `func`, возвращается сумма элементов массива, преобразованных этой функцией.
+
+Функция `arraySum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию.
+
+**Синтаксис**
+
+```sql
+arraySum([func,] arr)
+```
+
+**Параметры**
+
+-   `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — массив. [Array](../../sql-reference/data-types/array.md).   
+
+**Возвращаемое значение**
+
+-   Сумма значений функции (или сумма элементов массива).
+
+Тип: для Decimal чисел в исходном массиве (если функция `func` была передана, то для чисел, преобразованных ею) — [Decimal128](../../sql-reference/data-types/decimal.md), для чисел с плавающей точкой — [Float64](../../sql-reference/data-types/float.md), для беззнаковых целых чисел — [UInt64](../../sql-reference/data-types/int-uint.md), для целых чисел со знаком — [Int64](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Запрос:
+
+```sql
+SELECT arraySum([2, 3]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│   5 │
+└─────┘
+```
+
+Запрос:
+
+```sql
+SELECT arraySum(x -> x*x, [2, 3]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│  13 │
+└─────┘
+```
+
+## arrayAvg {#array-avg}
+
+Возвращает среднее значение элементов в исходном массиве. 
+
+Если передана функция `func`, возвращается среднее значение элементов массива, преобразованных этой функцией.
+
+Функция `arrayAvg` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию.
+
+**Синтаксис**
+
+```sql
+arrayAvg([func,] arr)
+```
+
+**Параметры**
+
+-   `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — массив. [Array](../../sql-reference/data-types/array.md).   
+
+**Возвращаемое значение**
+
+-   Среднее значение функции (или среднее значение элементов массива).
+
+Тип: [Float64](../../sql-reference/data-types/float.md).
+
+**Примеры**
+
+Запрос:
+
+```sql
+SELECT arrayAvg([1, 2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌────────────────res─┐
+│ 2.3333333333333335 │
+└────────────────────┘
+```
+
+Запрос:
+
+```sql
+SELECT arrayAvg(x -> (x * x), [2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│  10 │
+└─────┘
+```
 
 ## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
 
diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md
index 724fb97c0d5..52f0a92bc9f 100644
--- a/docs/ru/sql-reference/functions/ip-address-functions.md
+++ b/docs/ru/sql-reference/functions/ip-address-functions.md
@@ -243,4 +243,81 @@ SELECT
 └───────────────────────────────────┴──────────────────────────────────┘
 ```
 
+## isIPv4String {#isipv4string}
+
+Определяет, является ли строка адресом IPv4 или нет. Также вернет `0`, если `string` — адрес IPv6.
+
+**Синтаксис**
+
+```sql
+isIPv4String(string)
+```
+
+**Параметры**
+
+-   `string` — IP адрес. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   `1` если `string` является адресом IPv4 , иначе — `0`.
+
+Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Запрос:
+
+```sql
+SELECT addr, isIPv4String(addr) FROM ( SELECT ['0.0.0.0', '127.0.0.1', '::ffff:127.0.0.1'] AS addr ) ARRAY JOIN addr
+```
+
+Результат:
+
+``` text
+┌─addr─────────────┬─isIPv4String(addr)─┐
+│ 0.0.0.0          │                  1 │
+│ 127.0.0.1        │                  1 │
+│ ::ffff:127.0.0.1 │                  0 │
+└──────────────────┴────────────────────┘
+```
+
+## isIPv6String {#isipv6string}
+
+Определяет, является ли строка адресом IPv6 или нет. Также вернет `0`, если `string` — адрес IPv4.
+
+**Синтаксис**
+
+```sql
+isIPv6String(string)
+```
+
+**Параметры**
+
+-   `string` — IP адрес. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   `1` если `string` является адресом IPv6 , иначе — `0`.
+
+Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0.0.1', '127.0.0.1'] AS addr ) ARRAY JOIN addr
+```
+
+Результат:
+
+``` text
+┌─addr─────────────┬─isIPv6String(addr)─┐
+│ ::               │                  1 │
+│ 1111::ffff       │                  1 │
+│ ::ffff:127.0.0.1 │                  1 │
+│ 127.0.0.1        │                  0 │
+└──────────────────┴────────────────────┘
+```
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/ip_address_functions/) <!--hide-->
diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md
index 68afb3e24ce..a738ba755b1 100644
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@@ -183,6 +183,103 @@ SELECT visibleWidth(NULL)
 Получить размер блока.
 В ClickHouse выполнение запроса всегда идёт по блокам (наборам кусочков столбцов). Функция позволяет получить размер блока, для которого её вызвали.
 
+## byteSize {#function-bytesize}
+
+Возвращает оценку в байтах размера аргументов в памяти в несжатом виде.
+
+**Синтаксис**
+
+```sql
+byteSize(argument [, ...])
+```
+
+**Параметры**
+
+-   `argument` — значение.
+
+**Возвращаемое значение**
+
+-   Оценка размера аргументов в памяти в байтах.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Для аргументов типа [String](../../sql-reference/data-types/string.md) функция возвращает длину строки + 9 (нуль-терминатор + длина)
+
+Запрос:
+
+```sql
+SELECT byteSize('string');
+```
+
+Результат:
+
+```text
+┌─byteSize('string')─┐
+│                 15 │
+└────────────────────┘
+```
+
+Запрос:
+
+```sql
+CREATE TABLE test
+(
+    `key` Int32,
+    `u8` UInt8,
+    `u16` UInt16,
+    `u32` UInt32,
+    `u64` UInt64,
+    `i8` Int8,
+    `i16` Int16,
+    `i32` Int32,
+    `i64` Int64,
+    `f32` Float32,
+    `f64` Float64
+)
+ENGINE = MergeTree
+ORDER BY key;
+
+INSERT INTO test VALUES(1, 8, 16, 32, 64,  -8, -16, -32, -64, 32.32, 64.64);
+
+SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)`, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)`, byteSize(f32) AS `byteSize(Float32)`, byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical;
+```
+
+Result:
+
+``` text
+Row 1:
+──────
+key:               1
+byteSize(UInt8):   1
+byteSize(UInt16):  2
+byteSize(UInt32):  4
+byteSize(UInt64):  8
+byteSize(Int8):    1
+byteSize(Int16):   2
+byteSize(Int32):   4
+byteSize(Int64):   8
+byteSize(Float32): 4
+byteSize(Float64): 8
+```
+
+Если функция принимает несколько аргументов, то она возвращает их совокупный размер в байтах.
+
+Запрос:
+
+```sql
+SELECT byteSize(NULL, 1, 0.3, '');
+```
+
+Результат:
+
+```text
+┌─byteSize(NULL, 1, 0.3, '')─┐
+│                         19 │
+└────────────────────────────┘
+```
+
 ## materialize(x) {#materializex}
 
 Превращает константу в полноценный столбец, содержащий только одно значение.
diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md
index aeb0652cc18..236583c211a 100644
--- a/docs/ru/sql-reference/functions/string-functions.md
+++ b/docs/ru/sql-reference/functions/string-functions.md
@@ -597,4 +597,46 @@ Hello, &quot;world&quot;!
 &apos;foo&apos;
 ```
 
+
+## decodeXMLComponent {#decode-xml-component}
+
+Заменяет символами предопределенные мнемоники XML: `&quot;` `&amp;` `&apos;` `&gt;` `&lt;`
+Также эта функция заменяет числовые ссылки соответствующими символами юникод. Поддерживаются десятичная (например, `&#10003;`) и шестнадцатеричная (`&#x2713;`) формы.
+
+**Синтаксис**
+
+``` sql
+decodeXMLComponent(x)
+```
+
+**Параметры**
+
+-   `x` — последовательность символов. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   Строка с произведенными заменами.
+
+Тип: [String](../../sql-reference/data-types/string.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT decodeXMLComponent('&apos;foo&apos;');
+SELECT decodeXMLComponent('&lt; &#x3A3; &gt;');
+```
+
+Результат:
+
+``` text
+'foo' 
+< Σ >
+```
+
+**Смотрите также**
+
+-   [Мнемоники в HTML](https://ru.wikipedia.org/wiki/%D0%9C%D0%BD%D0%B5%D0%BC%D0%BE%D0%BD%D0%B8%D0%BA%D0%B8_%D0%B2_HTML)
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_functions/) <!--hide-->
diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md
index e8cbb8deec4..b7193da6f33 100644
--- a/docs/ru/sql-reference/functions/string-search-functions.md
+++ b/docs/ru/sql-reference/functions/string-search-functions.md
@@ -13,8 +13,6 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u043e\u0438\u
 
 Возвращает позицию (в байтах) найденной подстроки в строке, начиная с 1, или 0, если подстрока не найдена.
 
-Работает при допущении, что строка содержит набор байт, представляющий текст в однобайтовой кодировке. Если допущение не выполнено — то возвращает неопределенный результат (не кидает исключение). Если символ может быть представлен с помощью двух байтов, он будет представлен двумя байтами и так далее.
-
 Для поиска без учета регистра используйте функцию [positionCaseInsensitive](#positioncaseinsensitive).
 
 **Синтаксис**
diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md
index a2b25e68fe5..a36613280a1 100644
--- a/docs/ru/sql-reference/functions/tuple-map-functions.md
+++ b/docs/ru/sql-reference/functions/tuple-map-functions.md
@@ -5,6 +5,66 @@ toc_title: Работа с контейнерами map
 
 # Функции для работы с контейнерами map {#functions-for-working-with-tuple-maps}
 
+## map {#function-map}
+
+Преобразовывает пары `ключ:значение` в тип данных [Map(key, value)](../../sql-reference/data-types/map.md).
+
+**Синтаксис** 
+
+``` sql
+map(key1, value1[, key2, value2, ...])
+```
+
+**Параметры** 
+
+-   `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md).
+
+**Возвращаемое значение**
+
+-   Структура данных в виде пар `ключ:значение`.
+
+Тип: [Map(key, value)](../../sql-reference/data-types/map.md).
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
+```
+
+Результат:
+
+``` text
+┌─map('key1', number, 'key2', multiply(number, 2))─┐
+│ {'key1':0,'key2':0}                              │
+│ {'key1':1,'key2':2}                              │
+│ {'key1':2,'key2':4}                              │
+└──────────────────────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
+INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
+SELECT a['key2'] FROM table_map;
+```
+
+Результат:
+
+``` text
+┌─arrayElement(a, 'key2')─┐
+│                       0 │
+│                       2 │
+│                       4 │
+└─────────────────────────┘
+```
+
+**См. также** 
+
+-   тип данных [Map(key, value)](../../sql-reference/data-types/map.md)
 ## mapAdd {#function-mapadd}
 
 Собирает все ключи и суммирует соответствующие значения.
diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md
index 1008e2a359c..7541e16bed4 100644
--- a/docs/ru/sql-reference/functions/url-functions.md
+++ b/docs/ru/sql-reference/functions/url-functions.md
@@ -115,6 +115,168 @@ SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk')
 
 Например, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
 
+### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom}
+
+Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена. Принимает имя пользовательского [списка доменов верхнего уровня](https://ru.wikipedia.org/wiki/Список_доменов_верхнего_уровня).
+
+Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский.
+
+Пример конфигурации:
+
+```xml
+<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
+<top_level_domains_lists>
+    <!-- https://publicsuffix.org/list/public_suffix_list.dat -->
+    <public_suffix_list>public_suffix_list.dat</public_suffix_list>
+    <!-- NOTE: path is under top_level_domains_path -->
+</top_level_domains_lists>
+```
+
+**Синтаксис**
+
+``` sql
+cutToFirstSignificantSubdomain(URL, TLD)
+```
+
+**Parameters**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена.
+
+Тип: [String](../../sql-reference/data-types/string.md).
+
+**Пример**
+
+Запрос:
+
+```sql
+SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
+```
+
+Результат:
+
+```text
+┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
+│ foo.there-is-no-such-domain                                                                   │
+└───────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
+
+### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww}
+
+Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена, не опуская "www". Принимает имя пользовательского списка доменов верхнего уровня.
+
+Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский.
+
+Пример конфигурации:
+
+```xml
+<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
+<top_level_domains_lists>
+    <!-- https://publicsuffix.org/list/public_suffix_list.dat -->
+    <public_suffix_list>public_suffix_list.dat</public_suffix_list>
+    <!-- NOTE: path is under top_level_domains_path -->
+</top_level_domains_lists>
+```
+
+**Синтаксис**
+
+```sql
+cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD)
+```
+
+**Параметры**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена, без удаления `www`.
+
+Тип: [String](../../sql-reference/data-types/string.md).
+
+**Пример**
+
+Запрос:
+
+```sql
+SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list');
+```
+
+Результат:
+
+```text
+┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐
+│ www.foo                                                                      │
+└──────────────────────────────────────────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
+
+### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom}
+
+Возвращает первый существенный поддомен. Принимает имя пользовательского списка доменов верхнего уровня.
+
+Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский.
+
+Пример конфигурации:
+
+```xml
+<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
+<top_level_domains_lists>
+    <!-- https://publicsuffix.org/list/public_suffix_list.dat -->
+    <public_suffix_list>public_suffix_list.dat</public_suffix_list>
+    <!-- NOTE: path is under top_level_domains_path -->
+</top_level_domains_lists>
+```
+
+**Синтаксис**
+
+```sql
+firstSignificantSubdomainCustom(URL, TLD)
+```
+
+**Параметры**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   Первый существенный поддомен.
+
+Тип: [String](../../sql-reference/data-types/string.md).
+
+**Пример**
+
+Запрос:
+
+```sql
+SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
+```
+
+Результат:
+
+```text 
+┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
+│ foo                                                                                      │
+└──────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
+
 ### port(URL[, default_port = 0]) {#port}
 
 Возвращает порт или значение `default_port`, если в URL-адресе нет порта (или передан невалидный URL) 
diff --git a/docs/ru/sql-reference/statements/alter/quota.md b/docs/ru/sql-reference/statements/alter/quota.md
index 707f56e7cd4..0bdac1381da 100644
--- a/docs/ru/sql-reference/statements/alter/quota.md
+++ b/docs/ru/sql-reference/statements/alter/quota.md
@@ -5,18 +5,38 @@ toc_title: QUOTA
 
 # ALTER QUOTA {#alter-quota-statement}
 
-Изменяет квоту.
+Изменяет [квоту](../../../operations/access-rights.md#quotas-management).
 
-## Синтаксис {#alter-quota-syntax}
+Синтаксис:
 
 ``` sql
 ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
     [RENAME TO new_name]
-    [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
-    [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
+    [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
+    [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
+        {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
         NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/quota/) <!--hide-->
\ No newline at end of file
+Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
+
+Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
+
+В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
+
+**Примеры**
+
+Ограничить для текущего пользователя максимальное число запросов — не более 123 запросов за каждые 15 месяцев:
+
+``` sql
+ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
+```
+
+Ограничить по умолчанию максимальное время выполнения запроса — не более полсекунды за каждые 30 минут, а также максимальное число запросов — не более 321 и максимальное число ошибок — не более 10 за каждые 5 кварталов:
+
+``` sql
+ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/alter/quota/) <!--hide-->
diff --git a/docs/ru/sql-reference/statements/create/quota.md b/docs/ru/sql-reference/statements/create/quota.md
index fe18869bf2e..65762071ea2 100644
--- a/docs/ru/sql-reference/statements/create/quota.md
+++ b/docs/ru/sql-reference/statements/create/quota.md
@@ -7,23 +7,34 @@ toc_title: "\u041a\u0432\u043e\u0442\u0430"
 
 Создает [квоту](../../../operations/access-rights.md#quotas-management), которая может быть присвоена пользователю или роли.
 
-### Синтаксис {#create-quota-syntax}
+Синтаксис:
 
 ``` sql
 CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
-    [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
-    [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
+    [KEYED BY {user_name | ip_address | client_key | client_key, user_name | client_key, ip_address} | NOT KEYED]
+    [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
+        {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
          NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
+Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
 
-### Пример {#create-quota-example}
+Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
 
-Ограничить максимальное количество запросов для текущего пользователя до 123 запросов каждые 15 месяцев:
+В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
+
+**Примеры**
+
+Ограничить максимальное количество запросов для текущего пользователя — не более 123 запросов за каждые 15 месяцев:
 
 ``` sql
-CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER
+CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
+```
+
+Ограничить по умолчанию максимальное время выполнения запроса — не более полсекунды за каждые 30 минут, а также максимальное число запросов — не более 321 и максимальное число ошибок — не более 10 за каждые 5 кварталов:
+
+``` sql
+CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
 ```
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/quota) 
diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md
index 09026874948..f4b91b5ae17 100644
--- a/docs/ru/sql-reference/statements/create/view.md
+++ b/docs/ru/sql-reference/statements/create/view.md
@@ -56,9 +56,10 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
 
 Недоработано выполнение запросов `ALTER` над материализованными представлениями, поэтому они могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления.
 
+Обратите внимание, что работа материлизованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит их слияние.
+ 
 Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`.
 
 Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) 
-<!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) <!--hide-->
diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md
index d83f6691f6b..0ad85ed0166 100644
--- a/docs/ru/sql-reference/statements/insert-into.md
+++ b/docs/ru/sql-reference/statements/insert-into.md
@@ -13,7 +13,7 @@ toc_title: INSERT INTO
 INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
 ```
 
-Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`.
+Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
 
 В качестве примера рассмотрим таблицу:
 
@@ -63,8 +63,6 @@ SELECT * FROM insert_select_testtable
 -   Значения, вычисляемые из `DEFAULT` выражений, указанных в определении таблицы.
 -   Нули и пустые строки, если `DEFAULT` не определены.
 
-Если [strict_insert_defaults=1](../../operations/settings/settings.md), то столбцы, для которых не определены `DEFAULT`, необходимо перечислить в запросе.
-
 В INSERT можно передавать данные любого [формата](../../interfaces/formats.md#formats), который поддерживает ClickHouse. Для этого формат необходимо указать в запросе в явном виде:
 
 ``` sql
diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md
index c37e82ae0be..b0b6e80d7be 100644
--- a/docs/ru/sql-reference/statements/select/index.md
+++ b/docs/ru/sql-reference/statements/select/index.md
@@ -162,6 +162,112 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of
 
 Подробнее смотрите в разделе «Настройки». Присутствует возможность использовать внешнюю сортировку (с сохранением временных данных на диск) и внешнюю агрегацию.
 
+## Модификаторы запроса SELECT {#select-modifiers}
+
+Вы можете использовать следующие модификаторы в запросах `SELECT`.
+
+### APPLY {#apply-modifier}
+
+Вызывает указанную функцию для каждой строки, возвращаемой внешним табличным выражением запроса. 
+
+**Синтаксис:**
+
+``` sql
+SELECT <expr> APPLY( <func> ) FROM [db.]table_name
+```
+
+**Пример:** 
+
+``` sql
+CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i);
+INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23);
+SELECT * APPLY(sum) FROM columns_transformers;
+```
+
+```
+┌─sum(i)─┬─sum(j)─┬─sum(k)─┐
+│    220 │     18 │    347 │
+└────────┴────────┴────────┘
+```
+
+### EXCEPT {#except-modifier}
+
+Исключает из результата запроса один или несколько столбцов.
+
+**Синтаксис:**
+
+``` sql
+SELECT <expr> EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name
+```
+
+**Пример:**
+
+``` sql
+SELECT * EXCEPT (i) from columns_transformers;
+```
+
+```
+┌──j─┬───k─┐
+│ 10 │ 324 │
+│  8 │  23 │
+└────┴─────┘
+```
+
+### REPLACE {#replace-modifier}
+
+Определяет одно или несколько [выражений алиасов](../../../sql-reference/syntax.md#syntax-expression_aliases). Каждый алиас должен соответствовать имени столбца из запроса `SELECT *`. В списке столбцов результата запроса имя столбца, соответствующее алиасу, заменяется выражением в модификаторе `REPLACE`.
+
+Этот модификатор не изменяет имена или порядок столбцов. Однако он может изменить значение и тип значения.
+
+**Синтаксис:**
+
+``` sql
+SELECT <expr> REPLACE( <expr> AS col_name) from [db.]table_name
+```
+
+**Пример:**
+
+``` sql
+SELECT * REPLACE(i + 1 AS i) from columns_transformers;
+```
+
+```
+┌───i─┬──j─┬───k─┐
+│ 101 │ 10 │ 324 │
+│ 121 │  8 │  23 │
+└─────┴────┴─────┘
+```
+
+### Комбинации модификаторов {#modifier-combinations}
+
+Вы можете использовать каждый модификатор отдельно или комбинировать их.
+
+**Примеры:**
+
+Использование одного и того же модификатора несколько раз.
+
+``` sql
+SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers;
+```
+
+```
+┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐
+│                        2 │                        3 │
+└──────────────────────────┴──────────────────────────┘
+```
+
+Использование нескольких модификаторов в одном запросе.
+
+``` sql
+SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
+```
+
+```
+┌─sum(plus(i, 1))─┬─sum(k)─┐
+│             222 │    347 │
+└─────────────────┴────────┘
+```
+
 ## SETTINGS в запросе SELECT {#settings-in-select}
 
 Вы можете задать значения необходимых настроек непосредственно в запросе `SELECT` в секции `SETTINGS`. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. 
@@ -174,5 +280,4 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of
 SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1;
 ```
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/)
-<!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/)<!--hide-->
diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md
index 8f104e3a7d8..c8e883920dd 100644
--- a/docs/zh/development/style.md
+++ b/docs/zh/development/style.md
@@ -118,7 +118,7 @@ for (auto & stream : streams)
     stream.second->finalize();
 ```
 
-**18.** 行的某尾不应该包含空格。
+**18.** 行的末尾不应该包含空格。
 
 **19.** 源文件应该用 UTF-8 编码。
 
diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md
index 2fffcbe7ef3..353dd5f5bc8 100644
--- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md
@@ -401,7 +401,7 @@ TTL date_time + INTERVAL 15 HOUR
 
 ### 列 TTL {#mergetree-column-ttl}
 
-当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期，则ClickHouse 会从文件系统中的数据片段中此列。
+当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期，则ClickHouse 会从文件系统中的数据片段中删除此列。
 
 `TTL`子句不能被用于主键字段。
 
diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
index 7a0a42fa47c..3b89da9f595 100644
--- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
@@ -37,7 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 VersionedCollapsingMergeTree(sign, version)
 ```
 
--   `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划
+-   `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 行
 
     列数据类型应为 `Int8`.
 
diff --git a/docs/zh/operations/backup.md b/docs/zh/operations/backup.md
index 72491bb53ff..1b1993e3ae6 100644
--- a/docs/zh/operations/backup.md
+++ b/docs/zh/operations/backup.md
@@ -7,7 +7,7 @@ toc_title: "\u6570\u636E\u5907\u4EFD"
 
 # 数据备份 {#data-backup}
 
-尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是，这些保障措施不能涵盖所有可能的情况，并且可以规避。
+尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](server-configuration-parameters/settings.md#max-table-size-to-drop). 但是，这些保障措施不能涵盖所有可能的情况，并且可以规避。
 
 为了有效地减少可能的人为错误，您应该 **提前**准备备份和还原数据的策略.
 
diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md
index f834ab74f5a..64625c19c6a 100644
--- a/docs/zh/operations/settings/settings.md
+++ b/docs/zh/operations/settings/settings.md
@@ -1310,3 +1310,14 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
 **另请参阅**
 
 -   [IN 运算符中的 NULL 处理](../../sql-reference/operators/in.md#in-null-processing)
+
+## max_final_threads {#max-final-threads}
+
+设置使用[FINAL](../../sql-reference/statements/select/from.md#select-from-final) 限定符的`SELECT`查询, 在数据读取阶段的最大并发线程数。
+
+可能的值:
+
+-   正整数。
+-   0 or 1 — 禁用。 此时`SELECT` 查询单线程执行。
+
+默认值: `16`。
diff --git a/docs/zh/operations/system-tables/zookeeper.md b/docs/zh/operations/system-tables/zookeeper.md
index b66e5262df3..f7e816ccee6 100644
--- a/docs/zh/operations/system-tables/zookeeper.md
+++ b/docs/zh/operations/system-tables/zookeeper.md
@@ -6,12 +6,16 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
 # 系统。动物园管理员 {#system-zookeeper}
 
 如果未配置ZooKeeper，则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。
-查询必须具有 ‘path’ WHERE子句中的平等条件。 这是ZooKeeper中您想要获取数据的孩子的路径。
+查询必须具有 ‘path’ WHERE子句中的相等条件或者在某个集合中的条件。 这是ZooKeeper中您想要获取数据的孩子的路径。
 
 查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。
 要输出所有根节点的数据，write path= ‘/’.
 如果在指定的路径 ‘path’ 不存在，将引发异常。
 
+查询`SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` 输出`/` 和 `/clickhouse`节点上所有子节点的数据。
+如果在指定的 ‘path’ 集合中有不存在的路径，将引发异常。
+它可以用来做一批ZooKeeper路径查询。
+
 列:
 
 -   `name` (String) — The name of the node.
diff --git a/docs/zh/sql-reference/aggregate-functions/index.md b/docs/zh/sql-reference/aggregate-functions/index.md
index 436a8f433ea..2344c3e6dc0 100644
--- a/docs/zh/sql-reference/aggregate-functions/index.md
+++ b/docs/zh/sql-reference/aggregate-functions/index.md
@@ -1,11 +1,12 @@
 ---
+toc_folder_title: 聚合函数
 toc_priority: 33
-toc_title: 聚合函数
+toc_title: 简介
 ---
 
 # 聚合函数 {#aggregate-functions}
 
-聚合函数在 [正常](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) 方式如预期的数据库专家。
+聚合函数如数据库专家预期的方式 [正常](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) 工作。
 
 ClickHouse还支持:
 
@@ -14,7 +15,7 @@ ClickHouse还支持:
 
 ## 空处理 {#null-processing}
 
-在聚合过程中，所有 `NULL`s被跳过。
+在聚合过程中，所有 `NULL` 被跳过。
 
 **例:**
 
@@ -30,7 +31,7 @@ ClickHouse还支持:
 └───┴──────┘
 ```
 
-比方说，你需要在总的值 `y` 列:
+比方说，你需要计算 `y` 列的总数:
 
 ``` sql
 SELECT sum(y) FROM t_null_big
@@ -40,9 +41,8 @@ SELECT sum(y) FROM t_null_big
     │      7 │
     └────────┘
 
-该 `sum` 函数解释 `NULL` 作为 `0`. 特别是，这意味着，如果函数接收输入的选择，其中所有的值 `NULL`，那么结果将是 `0`，不 `NULL`.
 
-现在你可以使用 `groupArray` 函数从创建一个数组 `y` 列:
+现在你可以使用 `groupArray` 函数用 `y` 列创建一个数组:
 
 ``` sql
 SELECT groupArray(y) FROM t_null_big
@@ -54,6 +54,6 @@ SELECT groupArray(y) FROM t_null_big
 └───────────────┘
 ```
 
-`groupArray` 不包括 `NULL` 在生成的数组中。
+在 `groupArray` 生成的数组中不包括 `NULL`。
 
 [原始文章](https://clickhouse.tech/docs/en/query_language/agg_functions/) <!--hide-->
diff --git a/docs/zh/sql-reference/aggregate-functions/reference.md b/docs/zh/sql-reference/aggregate-functions/reference.md
index cf7dddb9b7e..3a224886a00 100644
--- a/docs/zh/sql-reference/aggregate-functions/reference.md
+++ b/docs/zh/sql-reference/aggregate-functions/reference.md
@@ -1,9 +1,9 @@
 ---
 toc_priority: 36
-toc_title: 聚合函数
+toc_title: 参考手册
 ---
 
-# 聚合函数引用 {#aggregate-functions-reference}
+# 参考手册 {#aggregate-functions-reference}
 
 ## count {#agg_function-count}
 
diff --git a/docs/zh/sql-reference/statements/select/from.md b/docs/zh/sql-reference/statements/select/from.md
index a8b49febab5..71b7cd319eb 100644
--- a/docs/zh/sql-reference/statements/select/from.md
+++ b/docs/zh/sql-reference/statements/select/from.md
@@ -25,11 +25,13 @@ toc_title: FROM
 -   [Replicated](../../../engines/table-engines/mergetree-family/replication.md) 版本 `MergeTree` 引擎
 -   [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md)，和 [MaterializedView](../../../engines/table-engines/special/materializedview.md) 在其他引擎上运行的引擎，只要是它们底层是 `MergeTree`-引擎表即可。
 
+现在使用 `FINAL` 修饰符 的 `SELECT` 查询启用了并发执行, 这会快一点。但是仍然存在缺陷 (见下)。  [max_final_threads](../../../operations/settings/settings.md#max-final-threads) 设置使用的最大线程数限制。
+
 ### 缺点 {#drawbacks}
 
-使用的查询 `FINAL` 执行速度不如类似的查询那么快，因为:
+使用的查询 `FINAL` 执行速度比类似的查询慢一点，因为:
 
--   查询在单个线程中执行，并在查询执行期间合并数据。
+-   在查询执行期间合并数据。
 -   查询与 `FINAL` 除了读取查询中指定的列之外，还读取主键列。
 
 **在大多数情况下，避免使用 `FINAL`.** 常见的方法是使用假设后台进程的不同查询 `MergeTree` 引擎还没有发生，并通过应用聚合（例如，丢弃重复项）来处理它。 {## TODO: examples ##}
diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp
index ae1d16ce402..a0e2ea155ba 100644
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@@ -62,12 +62,12 @@ public:
             bool randomize_, size_t max_iterations_, double max_time_,
             const String & json_path_, size_t confidence_,
             const String & query_id_, const String & query_to_execute_, bool continue_on_errors_,
-            bool print_stacktrace_, const Settings & settings_)
+            bool reconnect_, bool print_stacktrace_, const Settings & settings_)
         :
         concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_),
         cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_),
         json_path(json_path_), confidence(confidence_), query_id(query_id_),
-        query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_),
+        query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_), reconnect(reconnect_),
         print_stacktrace(print_stacktrace_), settings(settings_),
         shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())),
         pool(concurrency)
@@ -155,6 +155,7 @@ private:
     String query_id;
     String query_to_execute;
     bool continue_on_errors;
+    bool reconnect;
     bool print_stacktrace;
     const Settings & settings;
     SharedContextHolder shared_context;
@@ -404,9 +405,14 @@ private:
     void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index)
     {
         Stopwatch watch;
+
+        Connection & connection = **connection_entries[connection_index];
+
+        if (reconnect)
+            connection.disconnect();
+
         RemoteBlockInputStream stream(
-            *(*connection_entries[connection_index]),
-            query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage);
+            connection, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage);
         if (!query_id.empty())
             stream.setQueryId(query_id);
 
@@ -589,6 +595,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
             ("confidence",    value<size_t>()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)")
             ("query_id",      value<std::string>()->default_value(""),         "")
             ("continue_on_errors", "continue testing even if a query fails")
+            ("reconnect", "establish new connection for every query")
         ;
 
         Settings settings;
@@ -638,7 +645,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
             options["confidence"].as<size_t>(),
             options["query_id"].as<std::string>(),
             options["query"].as<std::string>(),
-            options.count("continue_on_errors") > 0,
+            options.count("continue_on_errors"),
+            options.count("reconnect"),
             print_stacktrace,
             settings);
         return benchmark.run();
diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 06bd7d84526..3c27908741c 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -932,6 +932,10 @@ private:
             std::cerr << "Received exception from server (version "
                 << server_version << "):" << std::endl << "Code: "
                 << server_exception->code() << ". " << text << std::endl;
+            if (is_interactive)
+            {
+                std::cerr << std::endl;
+            }
         }
 
         if (client_exception)
@@ -939,6 +943,10 @@ private:
             fmt::print(stderr,
                 "Error on processing query '{}':\n{}\n",
                 full_query, client_exception->message());
+            if (is_interactive)
+            {
+                fmt::print(stderr, "\n");
+            }
         }
 
         // A debug check -- at least some exception must be set, if the error
@@ -1366,7 +1374,30 @@ private:
             {
                 // Probably the server is dead because we found an assertion
                 // failure. Fail fast.
-                fmt::print(stderr, "Lost connection to the server\n");
+                fmt::print(stderr, "Lost connection to the server.\n");
+
+                // Print the changed settings because they might be needed to
+                // reproduce the error.
+                const auto & changes = context.getSettingsRef().changes();
+                if (!changes.empty())
+                {
+                    fmt::print(stderr, "Changed settings: ");
+                    for (size_t i = 0; i < changes.size(); ++i)
+                    {
+                        if (i)
+                        {
+                            fmt::print(stderr, ", ");
+                        }
+                        fmt::print(stderr, "{} = '{}'", changes[i].name,
+                            toString(changes[i].value));
+                    }
+                    fmt::print(stderr, "\n");
+                }
+                else
+                {
+                    fmt::print(stderr, "No changed settings.\n");
+                }
+
                 return false;
             }
 
@@ -1711,7 +1742,7 @@ private:
             }
             // Remember where the data ended. We use this info later to determine
             // where the next query begins.
-            parsed_insert_query->end = data_in.buffer().begin() + data_in.count();
+            parsed_insert_query->end = parsed_insert_query->data + data_in.count();
         }
         else if (!is_interactive)
         {
@@ -1892,6 +1923,9 @@ private:
 
         switch (packet.type)
         {
+            case Protocol::Server::PartUUIDs:
+                return true;
+
             case Protocol::Server::Data:
                 if (!cancelled)
                     onData(packet.block);
diff --git a/programs/client/QueryFuzzer.cpp b/programs/client/QueryFuzzer.cpp
index ae0de450a10..8d8d8daaf39 100644
--- a/programs/client/QueryFuzzer.cpp
+++ b/programs/client/QueryFuzzer.cpp
@@ -325,6 +325,61 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
     // the generic recursion into IAST.children.
 }
 
+void QueryFuzzer::fuzzWindowFrame(WindowFrame & frame)
+{
+    switch (fuzz_rand() % 40)
+    {
+        case 0:
+        {
+            const auto r = fuzz_rand() % 3;
+            frame.type = r == 0 ? WindowFrame::FrameType::Rows
+                : r == 1 ? WindowFrame::FrameType::Range
+                    : WindowFrame::FrameType::Groups;
+            break;
+        }
+        case 1:
+        {
+            const auto r = fuzz_rand() % 3;
+            frame.begin_type = r == 0 ? WindowFrame::BoundaryType::Unbounded
+                : r == 1 ? WindowFrame::BoundaryType::Current
+                    : WindowFrame::BoundaryType::Offset;
+            break;
+        }
+        case 2:
+        {
+            const auto r = fuzz_rand() % 3;
+            frame.end_type = r == 0 ? WindowFrame::BoundaryType::Unbounded
+                : r == 1 ? WindowFrame::BoundaryType::Current
+                    : WindowFrame::BoundaryType::Offset;
+            break;
+        }
+        case 3:
+        {
+            frame.begin_offset = getRandomField(0).get<Int64>();
+            break;
+        }
+        case 4:
+        {
+            frame.end_offset = getRandomField(0).get<Int64>();
+            break;
+        }
+        case 5:
+        {
+            frame.begin_preceding = fuzz_rand() % 2;
+            break;
+        }
+        case 6:
+        {
+            frame.end_preceding = fuzz_rand() % 2;
+            break;
+        }
+        default:
+            break;
+    }
+
+    frame.is_default = (frame == WindowFrame{});
+}
+
 void QueryFuzzer::fuzz(ASTs & asts)
 {
     for (auto & ast : asts)
@@ -409,6 +464,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
             auto & def = fn->window_definition->as<ASTWindowDefinition &>();
             fuzzColumnLikeExpressionList(def.partition_by.get());
             fuzzOrderByList(def.order_by.get());
+            fuzzWindowFrame(def.frame);
         }
 
         fuzz(fn->children);
@@ -421,6 +477,23 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
 
         fuzz(select->children);
     }
+    /*
+     * The time to fuzz the settings has not yet come.
+     * Apparently we don't have any infractructure to validate the values of
+     * the settings, and the first query with max_block_size = -1 breaks
+     * because of overflows here and there.
+     *//*
+     * else if (auto * set = typeid_cast<ASTSetQuery *>(ast.get()))
+     * {
+     *      for (auto & c : set->changes)
+     *      {
+     *          if (fuzz_rand() % 50 == 0)
+     *          {
+     *              c.value = fuzzField(c.value);
+     *          }
+     *      }
+     * }
+     */
     else if (auto * literal = typeid_cast<ASTLiteral *>(ast.get()))
     {
         // There is a caveat with fuzzing the children: many ASTs also keep the
diff --git a/programs/client/QueryFuzzer.h b/programs/client/QueryFuzzer.h
index e9d3f150283..38714205967 100644
--- a/programs/client/QueryFuzzer.h
+++ b/programs/client/QueryFuzzer.h
@@ -14,6 +14,7 @@ namespace DB
 
 class ASTExpressionList;
 class ASTOrderByElement;
+struct WindowFrame;
 
 /*
  * This is an AST-based query fuzzer that makes random modifications to query
@@ -65,6 +66,7 @@ struct QueryFuzzer
     void fuzzOrderByElement(ASTOrderByElement * elem);
     void fuzzOrderByList(IAST * ast);
     void fuzzColumnLikeExpressionList(IAST * ast);
+    void fuzzWindowFrame(WindowFrame & frame);
     void fuzz(ASTs & asts);
     void fuzz(ASTPtr & ast);
     void collectFuzzInfoMain(const ASTPtr ast);
diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp
index 87083c2c27b..dfa7048349e 100644
--- a/programs/client/Suggest.cpp
+++ b/programs/client/Suggest.cpp
@@ -1,5 +1,6 @@
 #include "Suggest.h"
 
+#include <Core/Settings.h>
 #include <Columns/ColumnString.h>
 #include <Common/typeid_cast.h>
 
@@ -86,6 +87,9 @@ Suggest::Suggest()
 
 void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit)
 {
+    /// NOTE: Once you will update the completion list,
+    /// do not forget to update 01676_clickhouse_client_autocomplete.sh
+
     std::stringstream query;        // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     query << "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM ("
         "SELECT name FROM system.functions"
@@ -104,6 +108,18 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
         " UNION ALL "
         "SELECT cluster FROM system.clusters"
         " UNION ALL "
+        "SELECT name FROM system.errors"
+        " UNION ALL "
+        "SELECT event FROM system.events"
+        " UNION ALL "
+        "SELECT metric FROM system.asynchronous_metrics"
+        " UNION ALL "
+        "SELECT metric FROM system.metrics"
+        " UNION ALL "
+        "SELECT macro FROM system.macros"
+        " UNION ALL "
+        "SELECT policy_name FROM system.storage_policies"
+        " UNION ALL "
         "SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate";
 
     /// The user may disable loading of databases, tables, columns by setting suggestion_limit to zero.
@@ -123,12 +139,17 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
 
     query << ") WHERE notEmpty(res)";
 
-    fetch(connection, timeouts, query.str());
+    Settings settings;
+    /// To show all rows from:
+    /// - system.errors
+    /// - system.events
+    settings.system_events_show_zero_values = true;
+    fetch(connection, timeouts, query.str(), settings);
 }
 
-void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query)
+void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings)
 {
-    connection.sendQuery(timeouts, query);
+    connection.sendQuery(timeouts, query, "" /* query_id */, QueryProcessingStage::Complete, &settings);
 
     while (true)
     {
diff --git a/programs/client/Suggest.h b/programs/client/Suggest.h
index 03332088cbe..0049bc08ebf 100644
--- a/programs/client/Suggest.h
+++ b/programs/client/Suggest.h
@@ -33,7 +33,7 @@ public:
 private:
 
     void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit);
-    void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query);
+    void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings);
     void fillWordsFromBlock(const Block & block);
 
     /// Words are fetched asynchronously.
diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml
index c073ab38aea..66e7afd8f8c 100644
--- a/programs/client/clickhouse-client.xml
+++ b/programs/client/clickhouse-client.xml
@@ -29,4 +29,25 @@
         <test>{display_name} \x01\e[1;32m\x02:)\x01\e[0m\x02 </test> <!-- if it matched to the substring "test" in the server display name - -->
         <production>{display_name} \x01\e[1;31m\x02:)\x01\e[0m\x02 </production> <!-- if it matched to the substring "production" in the server display name -->
     </prompt_by_server_display_name>
+
+    <!-- 
+        Settings adjustable via command-line parameters
+        can take their defaults from that config file, see examples:
+
+    <host>127.0.0.1</host>
+    <port>9440</port>
+    <secure>1</secure>
+    <user>dbuser</user>
+    <password>dbpwd123</password>
+    <format>PrettyCompactMonoBlock</format>
+    <multiline>1</multiline>
+    <multiquery>1</multiquery>
+    <stacktrace>1</stacktrace>
+    <database>default2</database>
+    <pager>less -SR</pager>
+    <history_file>/home/user/clickhouse_custom_history.log</history_file>
+    <max_parser_depth>2500</max_parser_depth>
+
+        The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
+    -->
 </config>
diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp
index ca09e7c1889..7eea23160b2 100644
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@@ -316,9 +316,6 @@ void ClusterCopier::process(const ConnectionTimeouts & timeouts)
             }
         }
 
-        /// Delete helping tables in both cases (whole table is done or not)
-        dropHelpingTables(task_table);
-
         if (!table_is_done)
         {
             throw Exception("Too many tries to process table " + task_table.table_id + ". Abort remaining execution",
@@ -642,7 +639,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
                 query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) +
                                                 ((partition_name == "'all'") ? " PARTITION ID " : " PARTITION ") + partition_name + " DEDUPLICATE;";
 
-                LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_alter_ast_string);
+                LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_deduplicate_ast_string);
 
                 UInt64 num_nodes = executeQueryOnCluster(
                         task_table.cluster_push,
@@ -1044,6 +1041,11 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
     {
         LOG_INFO(log, "Table {} is not processed yet.Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions);
     }
+    else
+    {
+        /// Delete helping tables in case that whole table is done
+        dropHelpingTables(task_table);
+    }
 
     return table_is_done;
 }
diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp
index ee4daa3e16d..14fa734f246 100644
--- a/programs/odbc-bridge/ColumnInfoHandler.cpp
+++ b/programs/odbc-bridge/ColumnInfoHandler.cpp
@@ -4,14 +4,14 @@
 
 #    include <DataTypes/DataTypeFactory.h>
 #    include <DataTypes/DataTypeNullable.h>
-#    include <IO/WriteBufferFromHTTPServerResponse.h>
+#    include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #    include <IO/WriteHelpers.h>
 #    include <Parsers/ParserQueryWithOutput.h>
 #    include <Parsers/parseQuery.h>
 #    include <Poco/Data/ODBC/ODBCException.h>
 #    include <Poco/Data/ODBC/SessionImpl.h>
 #    include <Poco/Data/ODBC/Utility.h>
-#    include <Poco/Net/HTMLForm.h>
+#    include <Server/HTTP/HTMLForm.h>
 #    include <Poco/Net/HTTPServerRequest.h>
 #    include <Poco/Net/HTTPServerResponse.h>
 #    include <Poco/NumberParser.h>
@@ -59,16 +59,16 @@ namespace
     }
 }
 
-void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
-    Poco::Net::HTMLForm params(request, request.stream());
+    HTMLForm params(request, request.getStream());
     LOG_TRACE(log, "Request URI: {}", request.getURI());
 
     auto process_error = [&response, this](const std::string & message)
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
         if (!response.sent())
-            response.send() << message << std::endl;
+            *response.send() << message << std::endl;
         LOG_WARNING(log, message);
     };
 
@@ -159,8 +159,16 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques
             columns.emplace_back(reinterpret_cast<char *>(column_name), std::move(column_type));
         }
 
-        WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);
-        writeStringBinary(columns.toString(), out);
+        WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
+        try
+        {
+            writeStringBinary(columns.toString(), out);
+            out.finalize();
+        }
+        catch (...)
+        {
+            out.finalize();
+        }
     }
     catch (...)
     {
diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h
index 04b4c06693b..9b5b470b31d 100644
--- a/programs/odbc-bridge/ColumnInfoHandler.h
+++ b/programs/odbc-bridge/ColumnInfoHandler.h
@@ -3,10 +3,11 @@
 #if USE_ODBC
 
 #    include <Interpreters/Context.h>
-#    include <Poco/Logger.h>
-#    include <Poco/Net/HTTPRequestHandler.h>
+#    include <Server/HTTP/HTTPRequestHandler.h>
 #    include <Common/config.h>
 
+#    include <Poco/Logger.h>
+
 /** The structure of the table is taken from the query "SELECT * FROM table WHERE 1=0".
   * TODO: It would be much better to utilize ODBC methods dedicated for columns description.
   * If there is no such table, an exception is thrown.
@@ -14,7 +15,7 @@
 namespace DB
 {
 
-class ODBCColumnsInfoHandler : public Poco::Net::HTTPRequestHandler
+class ODBCColumnsInfoHandler : public HTTPRequestHandler
 {
 public:
     ODBCColumnsInfoHandler(size_t keep_alive_timeout_, Context & context_)
@@ -22,7 +23,7 @@ public:
     {
     }
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     Poco::Logger * log;
diff --git a/programs/odbc-bridge/HandlerFactory.cpp b/programs/odbc-bridge/HandlerFactory.cpp
index 0cc40480b87..9ac48af4ace 100644
--- a/programs/odbc-bridge/HandlerFactory.cpp
+++ b/programs/odbc-bridge/HandlerFactory.cpp
@@ -7,39 +7,40 @@
 
 namespace DB
 {
-Poco::Net::HTTPRequestHandler * HandlerFactory::createRequestHandler(const Poco::Net::HTTPServerRequest & request)
+
+std::unique_ptr<HTTPRequestHandler> HandlerFactory::createRequestHandler(const HTTPServerRequest & request)
 {
     Poco::URI uri{request.getURI()};
     LOG_TRACE(log, "Request URI: {}", uri.toString());
 
     if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
-        return new PingHandler(keep_alive_timeout);
+        return std::make_unique<PingHandler>(keep_alive_timeout);
 
     if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
     {
 
         if (uri.getPath() == "/columns_info")
 #if USE_ODBC
-            return new ODBCColumnsInfoHandler(keep_alive_timeout, context);
+            return std::make_unique<ODBCColumnsInfoHandler>(keep_alive_timeout, context);
 #else
             return nullptr;
 #endif
         else if (uri.getPath() == "/identifier_quote")
 #if USE_ODBC
-            return new IdentifierQuoteHandler(keep_alive_timeout, context);
+            return std::make_unique<IdentifierQuoteHandler>(keep_alive_timeout, context);
 #else
             return nullptr;
 #endif
         else if (uri.getPath() == "/schema_allowed")
 #if USE_ODBC
-            return new SchemaAllowedHandler(keep_alive_timeout, context);
+            return std::make_unique<SchemaAllowedHandler>(keep_alive_timeout, context);
 #else
             return nullptr;
 #endif
         else if (uri.getPath() == "/write")
-            return new ODBCHandler(pool_map, keep_alive_timeout, context, "write");
+            return std::make_unique<ODBCHandler>(pool_map, keep_alive_timeout, context, "write");
         else
-            return new ODBCHandler(pool_map, keep_alive_timeout, context, "read");
+            return std::make_unique<ODBCHandler>(pool_map, keep_alive_timeout, context, "read");
     }
     return nullptr;
 }
diff --git a/programs/odbc-bridge/HandlerFactory.h b/programs/odbc-bridge/HandlerFactory.h
index 1d4edfc9dd1..5dce6f02ecd 100644
--- a/programs/odbc-bridge/HandlerFactory.h
+++ b/programs/odbc-bridge/HandlerFactory.h
@@ -1,16 +1,17 @@
 #pragma once
+
 #include <Interpreters/Context.h>
-#include <Poco/Logger.h>
-#include <Poco/Net/HTTPRequestHandler.h>
-#include <Poco/Net/HTTPRequestHandlerFactory.h>
-#include "MainHandler.h"
+#include <Server/HTTP/HTTPRequestHandlerFactory.h>
 #include "ColumnInfoHandler.h"
 #include "IdentifierQuoteHandler.h"
+#include "MainHandler.h"
 #include "SchemaAllowedHandler.h"
 
+#include <Poco/Logger.h>
+
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-parameter"
-    #include <Poco/Data/SessionPool.h>
+#include <Poco/Data/SessionPool.h>
 #pragma GCC diagnostic pop
 
 
@@ -19,7 +20,7 @@ namespace DB
 /** Factory for '/ping', '/', '/columns_info', '/identifier_quote', '/schema_allowed' handlers.
   * Also stores Session pools for ODBC connections
   */
-class HandlerFactory : public Poco::Net::HTTPRequestHandlerFactory
+class HandlerFactory : public HTTPRequestHandlerFactory
 {
 public:
     HandlerFactory(const std::string & name_, size_t keep_alive_timeout_, Context & context_)
@@ -28,7 +29,7 @@ public:
         pool_map = std::make_shared<ODBCHandler::PoolMap>();
     }
 
-    Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override;
+    std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
 
 private:
     Poco::Logger * log;
diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp
index 2c3701cfff9..5060d37c479 100644
--- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp
+++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp
@@ -3,14 +3,14 @@
 #if USE_ODBC
 
 #    include <DataTypes/DataTypeFactory.h>
-#    include <IO/WriteBufferFromHTTPServerResponse.h>
+#    include <Server/HTTP/HTMLForm.h>
+#    include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #    include <IO/WriteHelpers.h>
 #    include <Parsers/ParserQueryWithOutput.h>
 #    include <Parsers/parseQuery.h>
 #    include <Poco/Data/ODBC/ODBCException.h>
 #    include <Poco/Data/ODBC/SessionImpl.h>
 #    include <Poco/Data/ODBC/Utility.h>
-#    include <Poco/Net/HTMLForm.h>
 #    include <Poco/Net/HTTPServerRequest.h>
 #    include <Poco/Net/HTTPServerResponse.h>
 #    include <common/logger_useful.h>
@@ -22,16 +22,16 @@
 
 namespace DB
 {
-void IdentifierQuoteHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
-    Poco::Net::HTMLForm params(request, request.stream());
+    HTMLForm params(request, request.getStream());
     LOG_TRACE(log, "Request URI: {}", request.getURI());
 
     auto process_error = [&response, this](const std::string & message)
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
         if (!response.sent())
-            response.send() << message << std::endl;
+            *response.send() << message << std::endl;
         LOG_WARNING(log, message);
     };
 
@@ -49,8 +49,16 @@ void IdentifierQuoteHandler::handleRequest(Poco::Net::HTTPServerRequest & reques
 
         auto identifier = getIdentifierQuote(hdbc);
 
-        WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);
-        writeStringBinary(identifier, out);
+        WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
+        try
+        {
+            writeStringBinary(identifier, out);
+            out.finalize();
+        }
+        catch (...)
+        {
+            out.finalize();
+        }
     }
     catch (...)
     {
diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h
index fd357e32786..dad88c72ad8 100644
--- a/programs/odbc-bridge/IdentifierQuoteHandler.h
+++ b/programs/odbc-bridge/IdentifierQuoteHandler.h
@@ -1,8 +1,9 @@
 #pragma once
 
 #include <Interpreters/Context.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
+
 #include <Poco/Logger.h>
-#include <Poco/Net/HTTPRequestHandler.h>
 
 #if USE_ODBC
 
@@ -10,7 +11,7 @@
 namespace DB
 {
 
-class IdentifierQuoteHandler : public Poco::Net::HTTPRequestHandler
+class IdentifierQuoteHandler : public HTTPRequestHandler
 {
 public:
     IdentifierQuoteHandler(size_t keep_alive_timeout_, Context &)
@@ -18,7 +19,7 @@ public:
     {
     }
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     Poco::Logger * log;
diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp
index 64cb7bc0b46..4fcc9deea6a 100644
--- a/programs/odbc-bridge/MainHandler.cpp
+++ b/programs/odbc-bridge/MainHandler.cpp
@@ -7,7 +7,7 @@
 #include <DataStreams/copyData.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Formats/FormatFactory.h>
-#include <IO/WriteBufferFromHTTPServerResponse.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromIStream.h>
@@ -17,6 +17,7 @@
 #include <Poco/ThreadPool.h>
 #include <Processors/Formats/InputStreamFromInputFormat.h>
 #include <common/logger_useful.h>
+#include <Server/HTTP/HTMLForm.h>
 
 #include <mutex>
 #include <memory>
@@ -73,19 +74,19 @@ ODBCHandler::PoolPtr ODBCHandler::getPool(const std::string & connection_str)
     return pool_map->at(connection_str);
 }
 
-void ODBCHandler::processError(Poco::Net::HTTPServerResponse & response, const std::string & message)
+void ODBCHandler::processError(HTTPServerResponse & response, const std::string & message)
 {
-    response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
+    response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
     if (!response.sent())
-        response.send() << message << std::endl;
+        *response.send() << message << std::endl;
     LOG_WARNING(log, message);
 }
 
-void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
-    Poco::Net::HTMLForm params(request);
+    HTMLForm params(request);
     if (mode == "read")
-        params.read(request.stream());
+        params.read(request.getStream());
     LOG_TRACE(log, "Request URI: {}", request.getURI());
 
     if (mode == "read" && !params.has("query"))
@@ -136,7 +137,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
     std::string connection_string = params.get("connection_string");
     LOG_TRACE(log, "Connection string: '{}'", connection_string);
 
-    WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);
+    WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
 
     try
     {
@@ -163,9 +164,8 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
 #endif
 
             auto pool = getPool(connection_string);
-            ReadBufferFromIStream read_buf(request.stream());
-            auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block,
-                                                                   context, max_block_size);
+            auto & read_buf = request.getStream();
+            auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block, context, max_block_size);
             auto input_stream = std::make_shared<InputStreamFromInputFormat>(input_format);
             ODBCBlockOutputStream output_stream(pool->get(), db_name, table_name, *sample_block, quoting_style);
             copyData(*input_stream, output_stream);
@@ -187,9 +187,27 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
         auto message = getCurrentExceptionMessage(true);
         response.setStatusAndReason(
                 Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); // can't call process_error, because of too soon response sending
-        writeStringBinary(message, out);
-        tryLogCurrentException(log);
 
+        try
+        {
+            writeStringBinary(message, out);
+            out.finalize();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log);
+        }
+
+        tryLogCurrentException(log);
+    }
+
+    try
+    {
+        out.finalize();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log);
     }
 }
 
diff --git a/programs/odbc-bridge/MainHandler.h b/programs/odbc-bridge/MainHandler.h
index ec5e6693a60..e237ede5814 100644
--- a/programs/odbc-bridge/MainHandler.h
+++ b/programs/odbc-bridge/MainHandler.h
@@ -1,12 +1,13 @@
 #pragma once
 
 #include <Interpreters/Context.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
+
 #include <Poco/Logger.h>
-#include <Poco/Net/HTTPRequestHandler.h>
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-parameter"
-    #include <Poco/Data/SessionPool.h>
+#include <Poco/Data/SessionPool.h>
 #pragma GCC diagnostic pop
 
 namespace DB
@@ -16,7 +17,7 @@ namespace DB
   * and also query in request body
   * response in RowBinary format
   */
-class ODBCHandler : public Poco::Net::HTTPRequestHandler
+class ODBCHandler : public HTTPRequestHandler
 {
 public:
     using PoolPtr = std::shared_ptr<Poco::Data::SessionPool>;
@@ -34,7 +35,7 @@ public:
     {
     }
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     Poco::Logger * log;
@@ -47,7 +48,7 @@ private:
     static inline std::mutex mutex;
 
     PoolPtr getPool(const std::string & connection_str);
-    void processError(Poco::Net::HTTPServerResponse & response, const std::string & message);
+    void processError(HTTPServerResponse & response, const std::string & message);
 };
 
 }
diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp
index 9deefaf7895..8869a2639c1 100644
--- a/programs/odbc-bridge/ODBCBridge.cpp
+++ b/programs/odbc-bridge/ODBCBridge.cpp
@@ -11,7 +11,6 @@
 #    include <Poco/Data/ODBC/Connector.h>
 #endif
 
-#include <Poco/Net/HTTPServer.h>
 #include <Poco/Net/NetException.h>
 #include <Poco/String.h>
 #include <Poco/Util/HelpFormatter.h>
@@ -23,6 +22,7 @@
 #include <ext/scope_guard.h>
 #include <ext/range.h>
 #include <Common/SensitiveDataMasker.h>
+#include <Server/HTTP/HTTPServer.h>
 
 
 namespace DB
@@ -212,8 +212,12 @@ int ODBCBridge::main(const std::vector<std::string> & /*args*/)
         SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules"));
     }
 
-    auto server = Poco::Net::HTTPServer(
-        new HandlerFactory("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context), server_pool, socket, http_params);
+    auto server = HTTPServer(
+        context,
+        std::make_shared<HandlerFactory>("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context),
+        server_pool,
+        socket,
+        http_params);
     server.start();
 
     LOG_INFO(log, "Listening http://{}", address.toString());
diff --git a/programs/odbc-bridge/PingHandler.cpp b/programs/odbc-bridge/PingHandler.cpp
index b0313e46bf3..e3ab5e5cd00 100644
--- a/programs/odbc-bridge/PingHandler.cpp
+++ b/programs/odbc-bridge/PingHandler.cpp
@@ -6,7 +6,7 @@
 
 namespace DB
 {
-void PingHandler::handleRequest(Poco::Net::HTTPServerRequest & /*request*/, Poco::Net::HTTPServerResponse & response)
+void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response)
 {
     try
     {
diff --git a/programs/odbc-bridge/PingHandler.h b/programs/odbc-bridge/PingHandler.h
index d8109a50bb6..c969ec55af7 100644
--- a/programs/odbc-bridge/PingHandler.h
+++ b/programs/odbc-bridge/PingHandler.h
@@ -1,17 +1,19 @@
 #pragma once
-#include <Poco/Net/HTTPRequestHandler.h>
+
+#include <Server/HTTP/HTTPRequestHandler.h>
 
 namespace DB
 {
-/** Simple ping handler, answers "Ok." to GET request
- */
-class PingHandler : public Poco::Net::HTTPRequestHandler
+
+/// Simple ping handler, answers "Ok." to GET request
+class PingHandler : public HTTPRequestHandler
 {
 public:
-    PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     size_t keep_alive_timeout;
 };
+
 }
diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp
index fa08a27da59..d4a70db61f4 100644
--- a/programs/odbc-bridge/SchemaAllowedHandler.cpp
+++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp
@@ -2,12 +2,12 @@
 
 #if USE_ODBC
 
-#    include <IO/WriteBufferFromHTTPServerResponse.h>
+#    include <Server/HTTP/HTMLForm.h>
+#    include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #    include <IO/WriteHelpers.h>
 #    include <Poco/Data/ODBC/ODBCException.h>
 #    include <Poco/Data/ODBC/SessionImpl.h>
 #    include <Poco/Data/ODBC/Utility.h>
-#    include <Poco/Net/HTMLForm.h>
 #    include <Poco/Net/HTTPServerRequest.h>
 #    include <Poco/Net/HTTPServerResponse.h>
 #    include <common/logger_useful.h>
@@ -33,16 +33,16 @@ namespace
 }
 
 
-void SchemaAllowedHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
-    Poco::Net::HTMLForm params(request, request.stream());
+    HTMLForm params(request, request.getStream());
     LOG_TRACE(log, "Request URI: {}", request.getURI());
 
     auto process_error = [&response, this](const std::string & message)
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
         if (!response.sent())
-            response.send() << message << std::endl;
+            *response.send() << message << std::endl;
         LOG_WARNING(log, message);
     };
 
@@ -60,8 +60,16 @@ void SchemaAllowedHandler::handleRequest(Poco::Net::HTTPServerRequest & request,
 
         bool result = isSchemaAllowed(hdbc);
 
-        WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);
-        writeBoolText(result, out);
+        WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
+        try
+        {
+            writeBoolText(result, out);
+            out.finalize();
+        }
+        catch (...)
+        {
+            out.finalize();
+        }
     }
     catch (...)
     {
diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h
index 76aa23b903c..91eddf67803 100644
--- a/programs/odbc-bridge/SchemaAllowedHandler.h
+++ b/programs/odbc-bridge/SchemaAllowedHandler.h
@@ -1,17 +1,18 @@
 #pragma once
 
+#include <Server/HTTP/HTTPRequestHandler.h>
+
 #include <Poco/Logger.h>
-#include <Poco/Net/HTTPRequestHandler.h>
 
 #if USE_ODBC
 
 namespace DB
 {
+
 class Context;
 
-
-/// This handler establishes connection to database, and retrieve whether schema is allowed.
-class SchemaAllowedHandler : public Poco::Net::HTTPRequestHandler
+/// This handler establishes connection to database, and retrieves whether schema is allowed.
+class SchemaAllowedHandler : public HTTPRequestHandler
 {
 public:
     SchemaAllowedHandler(size_t keep_alive_timeout_, Context &)
@@ -19,7 +20,7 @@ public:
     {
     }
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     Poco::Logger * log;
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 2bb5181d348..f501e182cb7 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -59,7 +59,6 @@
 #include <Disks/registerDisks.h>
 #include <Common/Config/ConfigReloader.h>
 #include <Server/HTTPHandlerFactory.h>
-#include <Server/TestKeeperTCPHandlerFactory.h>
 #include "MetricsTransmitter.h"
 #include <Common/StatusFile.h>
 #include <Server/TCPHandlerFactory.h>
@@ -70,6 +69,7 @@
 #include <Server/MySQLHandlerFactory.h>
 #include <Server/PostgreSQLHandlerFactory.h>
 #include <Server/ProtocolServerAdapter.h>
+#include <Server/HTTP/HTTPServer.h>
 
 
 #if !defined(ARCADIA_BUILD)
@@ -94,12 +94,16 @@
 #   include <Server/GRPCServer.h>
 #endif
 
+#if USE_NURAFT
+#   include <Server/NuKeeperTCPHandlerFactory.h>
+#endif
 
 namespace CurrentMetrics
 {
     extern const Metric Revision;
     extern const Metric VersionInteger;
     extern const Metric MemoryTracking;
+    extern const Metric MaxDDLEntryID;
 }
 
 
@@ -842,23 +846,33 @@ int Server::main(const std::vector<std::string> & /*args*/)
         listen_try = true;
     }
 
-    for (const auto & listen_host : listen_hosts)
+    if (config().has("test_keeper_server"))
     {
-        /// TCP TestKeeper
-        const char * port_name = "test_keeper_server.tcp_port";
-        createServer(listen_host, port_name, listen_try, [&](UInt16 port)
+#if USE_NURAFT
+        /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config.
+        global_context->initializeNuKeeperStorageDispatcher();
+        for (const auto & listen_host : listen_hosts)
         {
-            Poco::Net::ServerSocket socket;
-            auto address = socketBindListen(socket, listen_host, port);
-            socket.setReceiveTimeout(settings.receive_timeout);
-            socket.setSendTimeout(settings.send_timeout);
-            servers_to_start_before_tables->emplace_back(
-                port_name,
-                std::make_unique<Poco::Net::TCPServer>(
-                    new TestKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
+            /// TCP NuKeeper
+            const char * port_name = "test_keeper_server.tcp_port";
+            createServer(listen_host, port_name, listen_try, [&](UInt16 port)
+            {
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(socket, listen_host, port);
+                socket.setReceiveTimeout(settings.receive_timeout);
+                socket.setSendTimeout(settings.send_timeout);
+                servers_to_start_before_tables->emplace_back(
+                    port_name,
+                    std::make_unique<Poco::Net::TCPServer>(
+                        new NuKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
+
+                LOG_INFO(log, "Listening for connections to NuKeeper (tcp): {}", address.toString());
+            });
+        }
+#else
+        throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
+#endif
 
-            LOG_INFO(log, "Listening for connections to fake zookeeper (tcp): {}", address.toString());
-        });
     }
 
     for (auto & server : *servers_to_start_before_tables)
@@ -898,6 +912,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
             else
                 LOG_INFO(log, "Closed connections to servers for tables.");
+
+            global_context->shutdownNuKeeperStorageDispatcher();
         }
 
         /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
@@ -997,7 +1013,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
         int pool_size = config().getInt("distributed_ddl.pool_size", 1);
         if (pool_size < 1)
             throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
-        global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(), "distributed_ddl"));
+        global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(),
+                                                                 "distributed_ddl", "DDLWorker", &CurrentMetrics::MaxDDLEntryID));
     }
 
     std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
@@ -1056,8 +1073,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
 
-                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
-                    createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
+                servers->emplace_back(
+                    port_name,
+                    std::make_unique<HTTPServer>(
+                        context(), createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
 
                 LOG_INFO(log, "Listening for http://{}", address.toString());
             });
@@ -1071,8 +1090,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
-                    createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
+                servers->emplace_back(
+                    port_name,
+                    std::make_unique<HTTPServer>(
+                        context(), createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
 
                 LOG_INFO(log, "Listening for https://{}", address.toString());
 #else
@@ -1146,8 +1167,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
-                    createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"), server_pool, socket, http_params));
+                servers->emplace_back(
+                    port_name,
+                    std::make_unique<HTTPServer>(
+                        context(),
+                        createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"),
+                        server_pool,
+                        socket,
+                        http_params));
 
                 LOG_INFO(log, "Listening for replica communication (interserver): http://{}", address.toString());
             });
@@ -1160,8 +1187,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
-                    createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"), server_pool, socket, http_params));
+                servers->emplace_back(
+                    port_name,
+                    std::make_unique<HTTPServer>(
+                        context(),
+                        createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"),
+                        server_pool,
+                        socket,
+                        http_params));
 
                 LOG_INFO(log, "Listening for secure replica communication (interserver): https://{}", address.toString());
 #else
@@ -1221,8 +1254,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
-                    createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
+                servers->emplace_back(
+                    port_name,
+                    std::make_unique<HTTPServer>(
+                        context(),
+                        createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"),
+                        server_pool,
+                        socket,
+                        http_params));
 
                 LOG_INFO(log, "Listening for Prometheus: http://{}", address.toString());
             });
diff --git a/programs/server/Server.h b/programs/server/Server.h
index c582e475308..fbfc26f6ee5 100644
--- a/programs/server/Server.h
+++ b/programs/server/Server.h
@@ -51,6 +51,7 @@ public:
     }
 
     void defineOptions(Poco::Util::OptionSet & _options) override;
+
 protected:
     int run() override;
 
@@ -65,8 +66,6 @@ protected:
 private:
     Context * global_context_ptr = nullptr;
 
-private:
-
     Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const;
 
     using CreateServerFunc = std::function<void(UInt16)>;
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 849d3dc32ba..ba9b8b04b05 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -284,6 +284,10 @@
          In bytes. Cache is single for server. Memory is allocated only on demand.
          Cache is used when 'use_uncompressed_cache' user setting turned on (off by default).
          Uncompressed cache is advantageous only for very short queries and in rare cases.
+
+         Note: uncompressed cache can be pointless for lz4, because memory bandwidth
+         is slower than multi-core decompression on some server configurations.
+         Enabling it can sometimes paradoxically make queries slower.
       -->
     <uncompressed_cache_size>8589934592</uncompressed_cache_size>
 
@@ -421,9 +425,15 @@
     <!-- Comma-separated list of prefixes for user-defined settings. -->
     <custom_settings_prefixes></custom_settings_prefixes>
 
-    <!-- System profile of settings. This settings are used by internal processes (Buffer storage, Distributed DDL worker and so on). -->
+    <!-- System profile of settings. This settings are used by internal processes (Distributed DDL worker and so on). -->
     <!-- <system_profile>default</system_profile> -->
 
+    <!-- Buffer profile of settings.
+         This settings are used by Buffer storage to flush data to the underlying table.
+         Default: used from system_profile directive.
+    -->
+    <!-- <buffer_profile>default</buffer_profile> -->
+
     <!-- Default database. -->
     <default_database>default</default_database>
 
diff --git a/programs/server/users.xml b/programs/server/users.xml
index 3223d855651..ef66891a6a0 100644
--- a/programs/server/users.xml
+++ b/programs/server/users.xml
@@ -7,9 +7,6 @@
             <!-- Maximum memory usage for processing single query, in bytes. -->
             <max_memory_usage>10000000000</max_memory_usage>
 
-            <!-- Use cache of uncompressed blocks of data. Meaningful only for processing many of very short queries. -->
-            <use_uncompressed_cache>0</use_uncompressed_cache>
-
             <!-- How to choose between replicas during distributed query processing.
                  random - choose random replica from set of replicas with minimum number of errors
                  nearest_hostname - from set of replicas with minimum number of errors, choose replica
diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp
index 426c27ea799..80594f66dfc 100644
--- a/src/Access/DiskAccessStorage.cpp
+++ b/src/Access/DiskAccessStorage.cpp
@@ -217,6 +217,7 @@ namespace
         /// Write the file.
         WriteBufferFromFile out{tmp_file_path.string()};
         out.write(file_contents.data(), file_contents.size());
+        out.close();
 
         /// Rename.
         std::filesystem::rename(tmp_file_path, file_path);
@@ -274,6 +275,7 @@ namespace
             writeStringBinary(name, out);
             writeUUIDText(id, out);
         }
+        out.close();
     }
 
 
diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp
index e9d586a692f..4a77426004d 100644
--- a/src/Access/EnabledQuota.cpp
+++ b/src/Access/EnabledQuota.cpp
@@ -26,10 +26,6 @@ struct EnabledQuota::Impl
         std::chrono::seconds duration,
         std::chrono::system_clock::time_point end_of_interval)
     {
-        std::function<String(UInt64)> amount_to_string = [](UInt64 amount) { return std::to_string(amount); };
-        if (resource_type == Quota::EXECUTION_TIME)
-            amount_to_string = [&](UInt64 amount) { return ext::to_string(std::chrono::nanoseconds(amount)); };
-
         const auto & type_info = Quota::ResourceTypeInfo::get(resource_type);
         throw Exception(
             "Quota for user " + backQuote(user_name) + " for " + ext::to_string(duration) + " has been exceeded: "
@@ -39,35 +35,47 @@ struct EnabledQuota::Impl
     }
 
 
+    /// Returns the end of the current interval. If the passed `current_time` is greater than that end,
+    /// the function automatically recalculates the interval's end by adding the interval's duration
+    /// one or more times until the interval's end is greater than `current_time`.
+    /// If that recalculation occurs the function also resets amounts of resources used and sets the variable
+    /// `counters_were_reset`.
     static std::chrono::system_clock::time_point getEndOfInterval(
-        const Interval & interval, std::chrono::system_clock::time_point current_time, bool * counters_were_reset = nullptr)
+        const Interval & interval, std::chrono::system_clock::time_point current_time, bool & counters_were_reset)
     {
         auto & end_of_interval = interval.end_of_interval;
         auto end_loaded = end_of_interval.load();
         auto end = std::chrono::system_clock::time_point{end_loaded};
         if (current_time < end)
         {
-            if (counters_were_reset)
-                *counters_were_reset = false;
+            counters_were_reset = false;
             return end;
         }
 
-        const auto duration = interval.duration;
+        /// We reset counters only if the interval's end has been calculated before.
+        /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet.
+        bool need_reset_counters = (end_loaded.count() != 0);
 
         do
         {
-            end = end + (current_time - end + duration) / duration * duration;
+            /// Calculate the end of the next interval:
+            ///  |                     X                                 |
+            /// end               current_time                next_end = end + duration * n
+            /// where n is an integer number, n >= 1.
+            const auto duration = interval.duration;
+            UInt64 n = static_cast<UInt64>((current_time - end + duration) / duration);
+            end = end + duration * n;
             if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch()))
-            {
-                boost::range::fill(interval.used, 0);
                 break;
-            }
             end = std::chrono::system_clock::time_point{end_loaded};
         }
         while (current_time >= end);
 
-        if (counters_were_reset)
-            *counters_were_reset = true;
+        if (need_reset_counters)
+        {
+            boost::range::fill(interval.used, 0);
+            counters_were_reset = true;
+        }
         return end;
     }
 
@@ -89,7 +97,7 @@ struct EnabledQuota::Impl
             if (used > max)
             {
                 bool counters_were_reset = false;
-                auto end_of_interval = getEndOfInterval(interval, current_time, &counters_were_reset);
+                auto end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset);
                 if (counters_were_reset)
                 {
                     used = (interval.used[resource_type] += amount);
@@ -116,9 +124,9 @@ struct EnabledQuota::Impl
                 continue;
             if (used > max)
             {
-                bool used_counters_reset = false;
-                std::chrono::system_clock::time_point end_of_interval = getEndOfInterval(interval, current_time, &used_counters_reset);
-                if (!used_counters_reset)
+                bool counters_were_reset = false;
+                std::chrono::system_clock::time_point end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset);
+                if (!counters_were_reset)
                     throwQuotaExceed(user_name, intervals.quota_name, resource_type, used, max, interval.duration, end_of_interval);
             }
         }
@@ -177,7 +185,8 @@ std::optional<QuotaUsage> EnabledQuota::Intervals::getUsage(std::chrono::system_
         auto & out = usage.intervals.back();
         out.duration = in.duration;
         out.randomize_interval = in.randomize_interval;
-        out.end_of_interval = Impl::getEndOfInterval(in, current_time);
+        bool counters_were_reset = false;
+        out.end_of_interval = Impl::getEndOfInterval(in, current_time, counters_were_reset);
         for (auto resource_type : ext::range(MAX_RESOURCE_TYPE))
         {
             if (in.max[resource_type])
diff --git a/src/Access/Quota.h b/src/Access/Quota.h
index b636e83ec40..430bdca29b0 100644
--- a/src/Access/Quota.h
+++ b/src/Access/Quota.h
@@ -31,6 +31,8 @@ struct Quota : public IAccessEntity
     enum ResourceType
     {
         QUERIES,        /// Number of queries.
+        QUERY_SELECTS,  /// Number of select queries.
+        QUERY_INSERTS,  /// Number of inserts queries.
         ERRORS,         /// Number of queries with exceptions.
         RESULT_ROWS,    /// Number of rows returned as result.
         RESULT_BYTES,   /// Number of bytes returned as result.
@@ -152,6 +154,16 @@ inline const Quota::ResourceTypeInfo & Quota::ResourceTypeInfo::get(ResourceType
             static const auto info = make_info("QUERIES", 1);
             return info;
         }
+        case Quota::QUERY_SELECTS:
+        {
+            static const auto info = make_info("QUERY_SELECTS", 1);
+            return info;
+        }
+        case Quota::QUERY_INSERTS:
+        {
+            static const auto info = make_info("QUERY_INSERTS", 1);
+            return info;
+        }
         case Quota::ERRORS:
         {
             static const auto info = make_info("ERRORS", 1);
diff --git a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp
index fada039e20a..c699dd4f217 100644
--- a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp
@@ -60,7 +60,7 @@ public:
         return std::make_shared<DataTypeUInt8>();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         if (std::uniform_real_distribution<>(0.0, 1.0)(thread_local_rng) <= throw_probability)
             throw Exception("Aggregate function " + getName() + " has thrown exception successfully", ErrorCodes::AGGREGATE_FUNCTION_THROW);
@@ -68,7 +68,7 @@ public:
         new (place) Data;
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         data(place).~Data();
     }
diff --git a/src/AggregateFunctions/AggregateFunctionAny.cpp b/src/AggregateFunctions/AggregateFunctionAny.cpp
index 0aeb2548af9..8b18abae884 100644
--- a/src/AggregateFunctions/AggregateFunctionAny.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAny.cpp
@@ -34,6 +34,14 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
     factory.registerFunction("any", { createAggregateFunctionAny, properties });
     factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
     factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
+
+    // Synonyms for use as window functions.
+    factory.registerFunction("first_value",
+        { createAggregateFunctionAny, properties },
+        AggregateFunctionFactory::CaseInsensitive);
+    factory.registerFunction("last_value",
+        { createAggregateFunctionAnyLast, properties },
+        AggregateFunctionFactory::CaseInsensitive);
 }
 
 }
diff --git a/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/src/AggregateFunctions/AggregateFunctionArgMinMax.h
index 67f21db0240..b559c1c8a7e 100644
--- a/src/AggregateFunctions/AggregateFunctionArgMinMax.h
+++ b/src/AggregateFunctions/AggregateFunctionArgMinMax.h
@@ -70,25 +70,25 @@ public:
         return type_res;
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         if (this->data(place).value.changeIfBetter(*columns[1], row_num, arena))
             this->data(place).result.change(*columns[0], row_num, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         if (this->data(place).value.changeIfBetter(this->data(rhs).value, arena))
             this->data(place).result.change(this->data(rhs).result, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).result.write(buf, *type_res);
         this->data(place).value.write(buf, *type_val);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         this->data(place).result.read(buf, *type_res, arena);
         this->data(place).value.read(buf, *type_val, arena);
@@ -96,7 +96,7 @@ public:
 
     bool allocatesMemoryInArena() const override { return Data::allocatesMemoryInArena(); }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         if (tuple_argument)
         {
diff --git a/src/AggregateFunctions/AggregateFunctionArray.h b/src/AggregateFunctions/AggregateFunctionArray.h
index e72fd3ab6ff..ef16fcde87b 100644
--- a/src/AggregateFunctions/AggregateFunctionArray.h
+++ b/src/AggregateFunctions/AggregateFunctionArray.h
@@ -47,12 +47,12 @@ public:
         return nested_func->getReturnType();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         nested_func->create(place);
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_func->destroy(place);
     }
@@ -77,7 +77,7 @@ public:
         return nested_func->isState();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         const IColumn * nested[num_arguments];
 
@@ -104,22 +104,22 @@ public:
             nested_func->add(place, nested, i, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         nested_func->merge(place, rhs, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         nested_func->serialize(place, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         nested_func->deserialize(place, buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         nested_func->insertResultInto(place, to, arena);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h
index e2f912cc320..7bf742294b4 100644
--- a/src/AggregateFunctions/AggregateFunctionAvg.h
+++ b/src/AggregateFunctions/AggregateFunctionAvg.h
@@ -98,13 +98,13 @@ public:
 
     DataTypePtr getReturnType() const final { return std::make_shared<DataTypeNumber<Float64>>(); }
 
-    void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).numerator += this->data(rhs).numerator;
         this->data(place).denominator += this->data(rhs).denominator;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeBinary(this->data(place).numerator, buf);
 
@@ -114,7 +114,7 @@ public:
             writeBinary(this->data(place).denominator, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         readBinary(this->data(place).numerator, buf);
 
@@ -124,7 +124,7 @@ public:
             readBinary(this->data(place).denominator, buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         if constexpr (IsDecimalNumber<Numerator> || IsDecimalNumber<Denominator>)
             assert_cast<ColumnVector<Float64> &>(to).getData().push_back(
@@ -148,7 +148,7 @@ class AggregateFunctionAvg final : public AggregateFunctionAvgBase<AvgFieldType<
 public:
     using AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>::AggregateFunctionAvgBase;
 
-    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const final
+    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final
     {
         this->data(place).numerator += static_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
         ++this->data(place).denominator;
diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h
index ab9ce9c2a61..f8b452fc444 100644
--- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h
+++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h
@@ -28,7 +28,7 @@ public:
 
     using ValueT = MaxFieldType<Value, Weight>;
 
-    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         const auto& weights = static_cast<const DecimalOrVectorCol<Weight> &>(*columns[1]);
 
diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.h b/src/AggregateFunctions/AggregateFunctionBitwise.h
index 6d9eb3c36e1..3ba8e045069 100644
--- a/src/AggregateFunctions/AggregateFunctionBitwise.h
+++ b/src/AggregateFunctions/AggregateFunctionBitwise.h
@@ -54,27 +54,27 @@ public:
         return std::make_shared<DataTypeNumber<T>>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).update(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).update(this->data(rhs).value);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeBinary(this->data(place).value, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         readBinary(this->data(place).value, buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).value);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h
index 7c254668f8d..32ae22fd573 100644
--- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h
+++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h
@@ -127,7 +127,7 @@ public:
         return std::make_shared<DataTypeFloat64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
     {
         /// NOTE Slightly inefficient.
         const auto x = columns[0]->getFloat64(row_num);
@@ -135,22 +135,22 @@ public:
         data(place).add(x, y);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         data(place).merge(data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnFloat64 &>(to).getData().push_back(getBoundingRatio(data(place)));
     }
diff --git a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h
index aa205a71c97..ba8acb208ea 100644
--- a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h
+++ b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h
@@ -33,7 +33,7 @@ public:
         return "categoricalInformationValue";
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         memset(place, 0, sizeOfData());
     }
diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h
index 63d3d34a0fd..1b3a0acb528 100644
--- a/src/AggregateFunctions/AggregateFunctionCount.h
+++ b/src/AggregateFunctions/AggregateFunctionCount.h
@@ -38,7 +38,7 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn **, size_t, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn **, size_t, Arena *) const override
     {
         ++data(place).count;
     }
@@ -76,28 +76,28 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         data(place).count += data(rhs).count;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeVarUInt(data(place).count, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         readVarUInt(data(place).count, buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(data(place).count);
     }
 
     /// Reset the state to specified value. This function is not the part of common interface.
-    void set(AggregateDataPtr place, UInt64 new_count)
+    void set(AggregateDataPtr __restrict place, UInt64 new_count)
     {
         data(place).count = new_count;
     }
@@ -126,27 +126,27 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         data(place).count += !assert_cast<const ColumnNullable &>(*columns[0]).isNullAt(row_num);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         data(place).count += data(rhs).count;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeVarUInt(data(place).count, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         readVarUInt(data(place).count, buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(data(place).count);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
new file mode 100644
index 00000000000..231b730d1aa
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
@@ -0,0 +1,49 @@
+#include <AggregateFunctions/AggregateFunctionDeltaSum.h>
+
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+#include <AggregateFunctions/Helpers.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+
+AggregateFunctionPtr createAggregateFunctionDeltaSum(
+    const String & name,
+    const DataTypes & arguments,
+    const Array & params)
+{
+    assertNoParameters(name, params);
+
+    if (arguments.size() != 1)
+        throw Exception("Incorrect number of arguments for aggregate function " + name,
+            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+    DataTypePtr data_type = arguments[0];
+
+    if (isInteger(data_type) || isFloat(data_type))
+        return AggregateFunctionPtr(createWithNumericType<AggregationFunctionDeltaSum>(
+            *data_type, arguments, params));
+    else
+        throw Exception("Illegal type " + arguments[0]->getName() + " of argument for aggregate function " + name,
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+}
+}
+
+void registerAggregateFunctionDeltaSum(AggregateFunctionFactory & factory)
+{
+    AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true };
+
+    factory.registerFunction("deltaSum", { createAggregateFunctionDeltaSum, properties });
+}
+
+}
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
new file mode 100644
index 00000000000..d5760de84ae
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
@@ -0,0 +1,129 @@
+#pragma once
+
+#include <type_traits>
+#include <experimental/type_traits>
+
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
+
+namespace DB
+{
+
+template <typename T>
+struct AggregationFunctionDeltaSumData
+{
+    T sum = 0;
+    T last = 0;
+    T first = 0;
+    bool seen_last = false;
+    bool seen_first = false;
+};
+
+template <typename T>
+class AggregationFunctionDeltaSum final
+    : public IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
+{
+public:
+    AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{arguments, params}
+    {}
+
+    AggregationFunctionDeltaSum()
+        : IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{}
+    {}
+
+    String getName() const override { return "deltaSum"; }
+
+    DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<T>>(); }
+
+    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        auto value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
+
+        if ((this->data(place).last < value) && this->data(place).seen_last)
+        {
+            this->data(place).sum += (value - this->data(place).last);
+        }
+
+        this->data(place).last = value;
+        this->data(place).seen_last = true;
+
+        if (!this->data(place).seen_first)
+        {
+            this->data(place).first = value;
+            this->data(place).seen_first = true;
+        }
+    }
+
+    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        auto place_data = &this->data(place);
+        auto rhs_data = &this->data(rhs);
+
+        if ((place_data->last < rhs_data->first) && place_data->seen_last && rhs_data->seen_first)
+        {
+            // If the lhs last number seen is less than the first number the rhs saw, the lhs is before
+            // the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the
+            // difference between lhs last number and rhs first number (the 2 and 4). Then we want to
+            // take last value from the rhs, so first and last become 0 and 7.
+
+            place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last);
+            place_data->last = rhs_data->last;
+        }
+        else if ((rhs_data->last < place_data->first && rhs_data->seen_last && place_data->seen_first))
+        {
+            // In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we
+            // assume the input interval states are sorted by time, we assume this is a counter
+            // reset, and therefore do *not* add the difference between our first value and the
+            // rhs last value.
+
+            place_data->sum += rhs_data->sum;
+            place_data->first = rhs_data->first;
+        }
+        else if (rhs_data->seen_first)
+        {
+            // If we're here then the lhs is an empty state and the rhs does have some state, so
+            // we'll just take that state.
+
+            place_data->first = rhs_data->first;
+            place_data->seen_first = rhs_data->seen_first;
+            place_data->last = rhs_data->last;
+            place_data->seen_last = rhs_data->seen_last;
+            place_data->sum = rhs_data->sum;
+        }
+
+        // Otherwise lhs either has data or is uninitialized, so we don't need to modify its values.
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
+    {
+        writeIntBinary(this->data(place).sum, buf);
+        writeIntBinary(this->data(place).first, buf);
+        writeIntBinary(this->data(place).last, buf);
+        writePODBinary<bool>(this->data(place).seen_first, buf);
+        writePODBinary<bool>(this->data(place).seen_last, buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
+    {
+        readIntBinary(this->data(place).sum, buf);
+        readIntBinary(this->data(place).first, buf);
+        readIntBinary(this->data(place).last, buf);
+        readPODBinary<bool>(this->data(place).seen_first, buf);
+        readPODBinary<bool>(this->data(place).seen_last, buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).sum);
+    }
+};
+
+}
diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h
index f9c8f2651dc..b587bbebf6e 100644
--- a/src/AggregateFunctions/AggregateFunctionDistinct.h
+++ b/src/AggregateFunctions/AggregateFunctionDistinct.h
@@ -156,12 +156,12 @@ private:
     AggregateFunctionPtr nested_func;
     size_t arguments_num;
 
-    AggregateDataPtr getNestedPlace(AggregateDataPtr place) const noexcept
+    AggregateDataPtr getNestedPlace(AggregateDataPtr __restrict place) const noexcept
     {
         return place + prefix_size;
     }
 
-    ConstAggregateDataPtr getNestedPlace(ConstAggregateDataPtr place) const noexcept
+    ConstAggregateDataPtr getNestedPlace(ConstAggregateDataPtr __restrict place) const noexcept
     {
         return place + prefix_size;
     }
@@ -172,27 +172,27 @@ public:
     , nested_func(nested_func_)
     , arguments_num(arguments.size()) {}
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         this->data(place).add(columns, arguments_num, row_num, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         this->data(place).merge(this->data(rhs), arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         this->data(place).deserialize(buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         auto arguments = this->data(place).getArguments(this->argument_types);
         ColumnRawPtrs arguments_raw(arguments.size());
@@ -209,13 +209,13 @@ public:
         return prefix_size + nested_func->sizeOfData();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         new (place) Data;
         nested_func->create(getNestedPlace(place));
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         this->data(place).~Data();
         nested_func->destroy(getNestedPlace(place));
@@ -236,6 +236,11 @@ public:
         return true;
     }
 
+    bool isState() const override
+    {
+        return nested_func->isState();
+    }
+
     AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
 };
 
diff --git a/src/AggregateFunctions/AggregateFunctionEntropy.h b/src/AggregateFunctions/AggregateFunctionEntropy.h
index 656aca43f60..9bb1bc039c5 100644
--- a/src/AggregateFunctions/AggregateFunctionEntropy.h
+++ b/src/AggregateFunctions/AggregateFunctionEntropy.h
@@ -103,7 +103,7 @@ public:
         return std::make_shared<DataTypeNumber<Float64>>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         if constexpr (!std::is_same_v<UInt128, Value>)
         {
@@ -117,22 +117,22 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & column = assert_cast<ColumnVector<Float64> &>(to);
         column.getData().push_back(this->data(place).get());
diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp
index 53fc895849b..c0011b6ebec 100644
--- a/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp
@@ -30,6 +30,10 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
+{
+    return AggregateFunctionFactory::instance().getCanonicalNameIfAny(name);
+}
 
 void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, CaseSensitiveness case_sensitiveness)
 {
@@ -41,10 +45,14 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat
         throw Exception("AggregateFunctionFactory: the aggregate function name '" + name + "' is not unique",
             ErrorCodes::LOGICAL_ERROR);
 
-    if (case_sensitiveness == CaseInsensitive
-        && !case_insensitive_aggregate_functions.emplace(Poco::toLower(name), creator_with_properties).second)
-        throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name '" + name + "' is not unique",
-            ErrorCodes::LOGICAL_ERROR);
+    if (case_sensitiveness == CaseInsensitive)
+    {
+        auto key = Poco::toLower(name);
+        if (!case_insensitive_aggregate_functions.emplace(key, creator_with_properties).second)
+            throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name '" + name + "' is not unique",
+                ErrorCodes::LOGICAL_ERROR);
+        case_insensitive_name_mapping[key] = name;
+    }
 }
 
 static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h
index c3b1b09ab3c..8d99e2e8af3 100644
--- a/src/AggregateFunctions/AggregateFunctionForEach.h
+++ b/src/AggregateFunctions/AggregateFunctionForEach.h
@@ -50,7 +50,7 @@ private:
     size_t nested_size_of_data = 0;
     size_t num_arguments;
 
-    AggregateFunctionForEachData & ensureAggregateData(AggregateDataPtr place, size_t new_size, Arena & arena) const
+    AggregateFunctionForEachData & ensureAggregateData(AggregateDataPtr __restrict place, size_t new_size, Arena & arena) const
     {
         AggregateFunctionForEachData & state = data(place);
 
@@ -128,7 +128,7 @@ public:
         return std::make_shared<DataTypeArray>(nested_func->getReturnType());
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         AggregateFunctionForEachData & state = data(place);
 
@@ -145,7 +145,7 @@ public:
         return nested_func->hasTrivialDestructor();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         const IColumn * nested[num_arguments];
 
@@ -178,7 +178,7 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         const AggregateFunctionForEachData & rhs_state = data(rhs);
         AggregateFunctionForEachData & state = ensureAggregateData(place, rhs_state.dynamic_array_size, *arena);
@@ -195,7 +195,7 @@ public:
         }
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const AggregateFunctionForEachData & state = data(place);
         writeBinary(state.dynamic_array_size, buf);
@@ -208,7 +208,7 @@ public:
         }
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         AggregateFunctionForEachData & state = data(place);
 
@@ -225,7 +225,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         AggregateFunctionForEachData & state = data(place);
 
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h
index 27a8cf0b1ee..921274f7d59 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h
@@ -142,14 +142,14 @@ public:
         }
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         [[maybe_unused]] auto a = new (place) Data;
         if constexpr (Trait::sampler == Sampler::RNG)
             a->rng.seed(seed);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         if constexpr (Trait::sampler == Sampler::NONE)
         {
@@ -176,7 +176,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         if constexpr (Trait::sampler == Sampler::NONE)
         {
@@ -235,7 +235,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const auto & value = this->data(place).value;
         size_t size = value.size();
@@ -254,7 +254,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         size_t size = 0;
         readVarUInt(size, buf);
@@ -283,7 +283,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         const auto & value = this->data(place).value;
         size_t size = value.size();
@@ -416,8 +416,8 @@ class GroupArrayGeneralImpl final
 {
     static constexpr bool limit_num_elems = Trait::has_limit;
     using Data = GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>;
-    static Data & data(AggregateDataPtr place) { return *reinterpret_cast<Data *>(place); }
-    static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast<const Data *>(place); }
+    static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
+    static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
 
     DataTypePtr & data_type;
     UInt64 max_elems;
@@ -450,14 +450,14 @@ public:
         }
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         [[maybe_unused]] auto a = new (place) Data;
         if constexpr (Trait::sampler == Sampler::RNG)
             a->rng.seed(seed);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         if constexpr (Trait::sampler == Sampler::NONE)
         {
@@ -485,7 +485,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         if constexpr (Trait::sampler == Sampler::NONE)
             mergeNoSampler(place, rhs, arena);
@@ -495,7 +495,7 @@ public:
         // else if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void ALWAYS_INLINE mergeNoSampler(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const
+    void ALWAYS_INLINE mergeNoSampler(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const
     {
         if (data(rhs).value.empty()) /// rhs state is empty
             return;
@@ -517,7 +517,7 @@ public:
             a.push_back(b[i]->clone(arena), arena);
     }
 
-    void ALWAYS_INLINE mergeWithRNGSampler(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const
+    void ALWAYS_INLINE mergeWithRNGSampler(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const
     {
         if (data(rhs).value.empty()) /// rhs state is empty
             return;
@@ -553,7 +553,7 @@ public:
         }
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeVarUInt(data(place).value.size(), buf);
 
@@ -573,7 +573,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         UInt64 elems;
         readVarUInt(elems, buf);
@@ -606,7 +606,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & column_array = assert_cast<ColumnArray &>(to);
 
@@ -692,8 +692,8 @@ class GroupArrayGeneralListImpl final
 {
     static constexpr bool limit_num_elems = Trait::has_limit;
     using Data = GroupArrayGeneralListData<Node>;
-    static Data & data(AggregateDataPtr place) { return *reinterpret_cast<Data *>(place); }
-    static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast<const Data *>(place); }
+    static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
+    static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
 
     DataTypePtr & data_type;
     UInt64 max_elems;
@@ -710,7 +710,7 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(data_type); }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         if (limit_num_elems && data(place).elems >= max_elems)
             return;
@@ -731,7 +731,7 @@ public:
         ++data(place).elems;
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         /// It is sadly, but rhs's Arena could be destroyed
 
@@ -780,7 +780,7 @@ public:
         data(place).elems = new_elems;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeVarUInt(data(place).elems, buf);
 
@@ -792,7 +792,7 @@ public:
         }
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         UInt64 elems;
         readVarUInt(elems, buf);
@@ -821,7 +821,7 @@ public:
         data(place).last = prev;
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & column_array = assert_cast<ColumnArray &>(to);
 
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
index a4800dd715e..42005659a36 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
@@ -102,7 +102,7 @@ public:
         return std::make_shared<DataTypeArray>(type);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         /// TODO Do positions need to be 1-based for this function?
         size_t position = columns[1]->getUInt(row_num);
@@ -126,7 +126,7 @@ public:
         columns[0]->get(row_num, arr[position]);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         Array & arr_lhs = data(place).value;
         const Array & arr_rhs = data(rhs).value;
@@ -139,7 +139,7 @@ public:
                 arr_lhs[i] = arr_rhs[i];
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const Array & arr = data(place).value;
         size_t size = arr.size();
@@ -159,7 +159,7 @@ public:
         }
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         size_t size = 0;
         readVarUInt(size, buf);
@@ -179,7 +179,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         ColumnArray & to_array = assert_cast<ColumnArray &>(to);
         IColumn & to_data = to_array.getData();
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
index 3281738e66d..3bab831d316 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
@@ -40,7 +40,7 @@ struct MovingData
     Array value;    /// Prefix sums.
     T sum = 0;
 
-    void add(T val, Arena * arena)
+    void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
     {
         sum += val;
         value.push_back(sum, arena);
@@ -52,7 +52,7 @@ struct MovingSumData : public MovingData<T>
 {
     static constexpr auto name = "groupArrayMovingSum";
 
-    T get(size_t idx, UInt64 window_size) const
+    T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
     {
         if (idx < window_size)
             return this->value[idx];
@@ -66,7 +66,7 @@ struct MovingAvgData : public MovingData<T>
 {
     static constexpr auto name = "groupArrayMovingAvg";
 
-    T get(size_t idx, UInt64 window_size) const
+    T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
     {
         if (idx < window_size)
             return this->value[idx] / window_size;
@@ -114,13 +114,13 @@ public:
             return std::make_shared<DataTypeArray>(std::make_shared<DataTypeResult>());
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
         this->data(place).add(static_cast<ResultT>(value), arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         auto & cur_elems = this->data(place);
         auto & rhs_elems = this->data(rhs);
@@ -138,7 +138,7 @@ public:
         cur_elems.sum += rhs_elems.sum;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const auto & value = this->data(place).value;
         size_t size = value.size();
@@ -146,7 +146,7 @@ public:
         buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         size_t size = 0;
         readVarUInt(size, buf);
@@ -163,7 +163,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         const auto & data = this->data(place);
         size_t size = data.value.size();
diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
index bf1d0af73ff..415ba557ef5 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
@@ -16,6 +16,22 @@ namespace ErrorCodes
 
 namespace
 {
+
+    template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
+    static IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs && ... args)
+    {
+        WhichDataType which(argument_type);
+        if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data<UInt16>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate<UInt32, Data<UInt32>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
+        return nullptr;
+    }
+
     template <template <typename> class Data>
     AggregateFunctionPtr createAggregateFunctionBitmap(const std::string & name, const DataTypes & argument_types, const Array & parameters)
     {
@@ -28,7 +44,7 @@ namespace
                     + " is illegal, because it cannot be used in Bitmap operations",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-        AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
+        AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
 
         if (!res)
             throw Exception(
@@ -55,7 +71,7 @@ namespace
         const DataTypeAggregateFunction & datatype_aggfunc = dynamic_cast<const DataTypeAggregateFunction &>(*argument_type_ptr);
         AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction();
         argument_type_ptr = aggfunc->getArgumentTypes()[0];
-        AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
+        AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
             *argument_type_ptr, argument_type_ptr));
         if (!res)
             throw Exception(
diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
index 315132cf05c..4628410286d 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
@@ -22,21 +22,21 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<T>>(); }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).rbs.add(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).rbs.merge(this->data(rhs).rbs);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).rbs.write(buf); }
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { this->data(place).rbs.write(buf); }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).rbs.read(buf); }
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { this->data(place).rbs.read(buf); }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).rbs.size());
     }
@@ -56,7 +56,7 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<T>>(); }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         Data & data_lhs = this->data(place);
         const Data & data_rhs = this->data(assert_cast<const ColumnAggregateFunction &>(*columns[0]).getData()[row_num]);
@@ -71,7 +71,7 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         Data & data_lhs = this->data(place);
         const Data & data_rhs = this->data(rhs);
@@ -90,11 +90,11 @@ public:
         }
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).rbs.write(buf); }
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { this->data(place).rbs.write(buf); }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).rbs.read(buf); }
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { this->data(place).rbs.read(buf); }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).rbs.size());
     }
diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
index ec945d418f2..40885ba74a0 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
@@ -32,6 +32,7 @@ template <typename T, UInt8 small_set_size>
 class RoaringBitmapWithSmallSet : private boost::noncopyable
 {
 private:
+    using UnsignedT = std::make_unsigned_t<T>;
     SmallSet<T, small_set_size> small;
     using ValueBuffer = std::vector<T>;
     using RoaringBitmap = std::conditional_t<sizeof(T) >= 8, roaring::Roaring64Map, roaring::Roaring>;
@@ -363,6 +364,7 @@ public:
     /**
      * Check whether the argument is the subset of this set.
      * Empty set is a subset of any other set (consistent with hasAll).
+     * It's used in subset and currently only support comparing same type
      */
     UInt8 rb_is_subset(const RoaringBitmapWithSmallSet & r1) const
     {
@@ -486,6 +488,7 @@ public:
 
     /**
      * Return new set with specified range (not include the range_end)
+     * It's used in subset and currently only support UInt32
      */
     UInt64 rb_range(UInt64 range_start, UInt64 range_end, RoaringBitmapWithSmallSet & r1) const
     {
@@ -525,6 +528,7 @@ public:
 
     /**
      * Return new set of the smallest `limit` values in set which is no less than `range_start`.
+     * It's used in subset and currently only support UInt32
      */
     UInt64 rb_limit(UInt64 range_start, UInt64 limit, RoaringBitmapWithSmallSet & r1) const
     {
@@ -578,10 +582,10 @@ public:
         {
             if (small.empty())
                 return 0;
-            auto min_val = std::numeric_limits<std::make_unsigned_t<T>>::max();
+            auto min_val = std::numeric_limits<UnsignedT>::max();
             for (const auto & x : small)
             {
-                auto val = x.getValue();
+                UnsignedT val = x.getValue();
                 if (val < min_val)
                     min_val = val;
             }
@@ -597,10 +601,10 @@ public:
         {
             if (small.empty())
                 return 0;
-            auto max_val = std::numeric_limits<std::make_unsigned_t<T>>::min();
+            UnsignedT max_val = 0;
             for (const auto & x : small)
             {
-                auto val = x.getValue();
+                UnsignedT val = x.getValue();
                 if (val > max_val)
                     max_val = val;
             }
@@ -611,7 +615,8 @@ public:
     }
 
     /**
-     * Replace value
+     * Replace value.
+     * It's used in transform and currently can only support UInt32
      */
     void rb_replace(const UInt64 * from_vals, const UInt64 * to_vals, size_t num)
     {
diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
index 1dc7dcde9c3..435efdd2373 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
@@ -59,14 +59,14 @@ public:
         return std::make_shared<DataTypeArray>(this->argument_types[0]);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         if (limit_num_elems && this->data(place).value.size() >= max_elems)
             return;
         this->data(place).value.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         if (!limit_num_elems)
             this->data(place).value.merge(this->data(rhs).value);
@@ -84,7 +84,7 @@ public:
         }
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         auto & set = this->data(place).value;
         size_t size = set.size();
@@ -93,12 +93,12 @@ public:
             writeIntBinary(elem, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).value.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
         ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
@@ -166,7 +166,7 @@ public:
         return true;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         auto & set = this->data(place).value;
         writeVarUInt(set.size(), buf);
@@ -177,7 +177,7 @@ public:
         }
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         auto & set = this->data(place).value;
         size_t size;
@@ -188,7 +188,7 @@ public:
             set.insert(readStringBinaryInto(*arena, buf));
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         auto & set = this->data(place).value;
         if (limit_num_elems && set.size() >= max_elems)
@@ -200,7 +200,7 @@ public:
         set.emplace(key_holder, it, inserted);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         auto & cur_set = this->data(place).value;
         auto & rhs_set = this->data(rhs).value;
@@ -218,7 +218,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
         ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h
index bc9c95ecf2a..76aa96ba663 100644
--- a/src/AggregateFunctions/AggregateFunctionHistogram.h
+++ b/src/AggregateFunctions/AggregateFunctionHistogram.h
@@ -332,28 +332,28 @@ public:
         return std::make_shared<DataTypeArray>(tuple);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         auto val = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
         this->data(place).add(static_cast<Data::Mean>(val), 1, max_bins);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs), max_bins);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf, max_bins);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & data = this->data(place);
 
diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp
index 5e7e3844956..45fead18d29 100644
--- a/src/AggregateFunctions/AggregateFunctionIf.cpp
+++ b/src/AggregateFunctions/AggregateFunctionIf.cpp
@@ -97,7 +97,7 @@ public:
         return assert_cast<const ColumnUInt8 &>(*filter_column).getData()[row_num];
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
         const IColumn * nested_column = &column->getNestedColumn();
@@ -140,7 +140,7 @@ public:
         return assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num];
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         /// This container stores the columns we really pass to the nested function.
         const IColumn * nested_columns[number_of_arguments];
diff --git a/src/AggregateFunctions/AggregateFunctionIf.h b/src/AggregateFunctions/AggregateFunctionIf.h
index e3a23a432c7..8144ae355ba 100644
--- a/src/AggregateFunctions/AggregateFunctionIf.h
+++ b/src/AggregateFunctions/AggregateFunctionIf.h
@@ -49,12 +49,12 @@ public:
         return nested_func->getReturnType();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         nested_func->create(place);
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_func->destroy(place);
     }
@@ -74,7 +74,7 @@ public:
         return nested_func->alignOfData();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         if (assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num])
             nested_func->add(place, columns, row_num, arena);
@@ -108,22 +108,22 @@ public:
         nested_func->addBatchSinglePlaceNotNull(batch_size, place, columns, null_map, arena, num_arguments - 1);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         nested_func->merge(place, rhs, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         nested_func->serialize(place, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         nested_func->deserialize(place, buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         nested_func->insertResultInto(place, to, arena);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionMLMethod.h b/src/AggregateFunctions/AggregateFunctionMLMethod.h
index b6912405fef..0c88f9d877d 100644
--- a/src/AggregateFunctions/AggregateFunctionMLMethod.h
+++ b/src/AggregateFunctions/AggregateFunctionMLMethod.h
@@ -329,7 +329,7 @@ public:
         return std::make_shared<DataTypeNumber<Float64>>();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         std::shared_ptr<IWeightsUpdater> new_weights_updater;
         if (weights_updater_name == "SGD")
@@ -346,16 +346,16 @@ public:
         new (place) Data(learning_rate, l2_reg_coef, param_num, batch_size, gradient_computer, new_weights_updater);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).add(columns, row_num);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); }
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).write(buf); }
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { this->data(place).write(buf); }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).read(buf); }
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { this->data(place).read(buf); }
 
     void predictValues(
         ConstAggregateDataPtr place,
@@ -383,7 +383,7 @@ public:
     /** This function is called if aggregate function without State modifier is selected in a query.
      *  Inserts all weights of the model into the column 'to', so user may use such information if needed
      */
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).returnWeights(to);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h
index 82a15c6cfae..1451536d519 100644
--- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h
+++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h
@@ -147,7 +147,7 @@ public:
         }
 
         if (params[0].getType() != Field::Types::String)
-            throw Exception("Aggregate function " + getName() + " require require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            throw Exception("Aggregate function " + getName() + " require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         auto param = params[0].get<String>();
         if (param == "two-sided")
@@ -158,13 +158,13 @@ public:
             alternative = Alternative::Greater;
         else
             throw Exception("Unknown parameter in aggregate function " + getName() +
-                    ". It must be one of: 'two sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
+                    ". It must be one of: 'two-sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
 
         if (params.size() != 2)
             return;
 
         if (params[1].getType() != Field::Types::UInt64)
-                throw Exception("Aggregate function " + getName() + " require require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+                throw Exception("Aggregate function " + getName() + " require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         continuity_correction = static_cast<bool>(params[1].get<UInt64>());
     }
@@ -194,7 +194,7 @@ public:
         );
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         Float64 value = columns[0]->getFloat64(row_num);
         UInt8 is_second = columns[1]->getUInt(row_num);
@@ -205,7 +205,7 @@ public:
             this->data(place).addX(value, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         auto & a = this->data(place);
         auto & b = this->data(rhs);
@@ -213,17 +213,17 @@ public:
         a.merge(b, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         this->data(place).read(buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         if (!this->data(place).size_x || !this->data(place).size_y)
             throw Exception("Aggregate function " + getName() + " require both samples to be non empty", ErrorCodes::BAD_ARGUMENTS);
diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
index b8a4dd63eea..d4946ad2c9d 100644
--- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
+++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
@@ -87,7 +87,7 @@ public:
             return std::make_shared<DataTypeNumber<PointType>>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         PointType left = assert_cast<const ColumnVector<PointType> &>(*columns[0]).getData()[row_num];
         PointType right = assert_cast<const ColumnVector<PointType> &>(*columns[1]).getData()[row_num];
@@ -99,7 +99,7 @@ public:
             this->data(place).value.push_back(std::make_pair(right, Int64(-1)), arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         auto & cur_elems = this->data(place);
         auto & rhs_elems = this->data(rhs);
@@ -107,7 +107,7 @@ public:
         cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const auto & value = this->data(place).value;
         size_t size = value.size();
@@ -115,7 +115,7 @@ public:
         buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         size_t size = 0;
         readVarUInt(size, buf);
@@ -129,7 +129,7 @@ public:
         buf.read(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         Int64 current_intersections = 0;
         Int64 max_intersections = 0;
diff --git a/src/AggregateFunctions/AggregateFunctionMerge.h b/src/AggregateFunctions/AggregateFunctionMerge.h
index 721a736fcb7..7f6a23f1845 100644
--- a/src/AggregateFunctions/AggregateFunctionMerge.h
+++ b/src/AggregateFunctions/AggregateFunctionMerge.h
@@ -48,12 +48,12 @@ public:
         return nested_func->getReturnType();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         nested_func->create(place);
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_func->destroy(place);
     }
@@ -73,27 +73,27 @@ public:
         return nested_func->alignOfData();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         nested_func->merge(place, assert_cast<const ColumnAggregateFunction &>(*columns[0]).getData()[row_num], arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         nested_func->merge(place, rhs, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         nested_func->serialize(place, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         nested_func->deserialize(place, buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         nested_func->insertResultInto(place, to, arena);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
index a21a64af9a4..a39d9af000b 100644
--- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@@ -721,22 +721,22 @@ public:
         return type;
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         this->data(place).changeIfBetter(*columns[0], row_num, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         this->data(place).changeIfBetter(this->data(rhs), arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf, *type.get());
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         this->data(place).read(buf, *type.get(), arena);
     }
@@ -746,7 +746,7 @@ public:
         return Data::allocatesMemoryInArena();
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).insertResultInto(to);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h
index 5c94e68cb26..e1238182ab5 100644
--- a/src/AggregateFunctions/AggregateFunctionNull.h
+++ b/src/AggregateFunctions/AggregateFunctionNull.h
@@ -45,29 +45,29 @@ protected:
       * We use prefix_size bytes for flag to satisfy the alignment requirement of nested state.
       */
 
-    AggregateDataPtr nestedPlace(AggregateDataPtr place) const noexcept
+    AggregateDataPtr nestedPlace(AggregateDataPtr __restrict place) const noexcept
     {
         return place + prefix_size;
     }
 
-    ConstAggregateDataPtr nestedPlace(ConstAggregateDataPtr place) const noexcept
+    ConstAggregateDataPtr nestedPlace(ConstAggregateDataPtr __restrict place) const noexcept
     {
         return place + prefix_size;
     }
 
-    static void initFlag(AggregateDataPtr place) noexcept
+    static void initFlag(AggregateDataPtr __restrict place) noexcept
     {
         if constexpr (result_is_nullable)
             place[0] = 0;
     }
 
-    static void setFlag(AggregateDataPtr place) noexcept
+    static void setFlag(AggregateDataPtr __restrict place) noexcept
     {
         if constexpr (result_is_nullable)
             place[0] = 1;
     }
 
-    static bool getFlag(ConstAggregateDataPtr place) noexcept
+    static bool getFlag(ConstAggregateDataPtr __restrict place) noexcept
     {
         return result_is_nullable ? place[0] : 1;
     }
@@ -95,13 +95,13 @@ public:
             : nested_function->getReturnType();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         initFlag(place);
         nested_function->create(nestedPlace(place));
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_function->destroy(nestedPlace(place));
     }
@@ -121,7 +121,7 @@ public:
         return nested_function->alignOfData();
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         if (result_is_nullable && getFlag(rhs))
             setFlag(place);
@@ -129,7 +129,7 @@ public:
         nested_function->merge(nestedPlace(place), nestedPlace(rhs), arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         bool flag = getFlag(place);
         if constexpr (serialize_flag)
@@ -138,7 +138,7 @@ public:
             nested_function->serialize(nestedPlace(place), buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         bool flag = 1;
         if constexpr (serialize_flag)
@@ -150,7 +150,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         if constexpr (result_is_nullable)
         {
@@ -200,7 +200,7 @@ public:
     {
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
         const IColumn * nested_column = &column->getNestedColumn();
@@ -250,7 +250,7 @@ public:
             is_nullable[i] = arguments[i]->isNullable();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         /// This container stores the columns we really pass to the nested function.
         const IColumn * nested_columns[number_of_arguments];
diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.h b/src/AggregateFunctions/AggregateFunctionOrFill.h
index 0b0b5e717a2..93fe84a036a 100644
--- a/src/AggregateFunctions/AggregateFunctionOrFill.h
+++ b/src/AggregateFunctions/AggregateFunctionOrFill.h
@@ -76,13 +76,13 @@ public:
         return nested_function->alignOfData();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         nested_function->create(place);
         place[size_of_data] = 0;
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_function->destroy(place);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h
index 9d2232892f0..edd24add736 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.h
@@ -103,7 +103,7 @@ public:
             return res;
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         auto value = static_cast<const ColVecType &>(*columns[0]).getData()[row_num];
 
@@ -122,23 +122,23 @@ public:
             this->data(place).add(value);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         /// const_cast is required because some data structures apply finalizaton (like compactization) before serializing.
         this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         /// const_cast is required because some data structures apply finalizaton (like sorting) for obtain a result.
         auto & data = this->data(place);
diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
index bdec03d5975..a7e0852378c 100644
--- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
+++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
@@ -63,7 +63,7 @@ public:
         return std::make_shared<DataTypeNumber<Float64>>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         Float64 new_x = columns[0]->getFloat64(row_num);
         Float64 new_y = columns[1]->getFloat64(row_num);
@@ -71,7 +71,7 @@ public:
         this->data(place).addY(new_y, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         auto & a = this->data(place);
         auto & b = this->data(rhs);
@@ -79,17 +79,17 @@ public:
         a.merge(b, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         this->data(place).read(buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto answer = this->data(place).getResult();
 
diff --git a/src/AggregateFunctions/AggregateFunctionResample.h b/src/AggregateFunctions/AggregateFunctionResample.h
index 51252fe0b89..4b29b921b45 100644
--- a/src/AggregateFunctions/AggregateFunctionResample.h
+++ b/src/AggregateFunctions/AggregateFunctionResample.h
@@ -110,7 +110,7 @@ public:
         return align_of_data;
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         for (size_t i = 0; i < total; ++i)
         {
@@ -127,7 +127,7 @@ public:
         }
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         for (size_t i = 0; i < total; ++i)
             nested_function->destroy(place + i * size_of_data);
diff --git a/src/AggregateFunctions/AggregateFunctionRetention.h b/src/AggregateFunctions/AggregateFunctionRetention.h
index f8a2163ccb9..5f0d9907280 100644
--- a/src/AggregateFunctions/AggregateFunctionRetention.h
+++ b/src/AggregateFunctions/AggregateFunctionRetention.h
@@ -94,7 +94,7 @@ public:
         return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt8>());
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
     {
         for (const auto i : ext::range(0, events_size))
         {
@@ -106,22 +106,22 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & data_to = assert_cast<ColumnUInt8 &>(assert_cast<ColumnArray &>(to).getData()).getData();
         auto & offsets_to = assert_cast<ColumnArray &>(to).getOffsets();
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
index 1a3da20e347..48015a6d282 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
+++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
@@ -149,7 +149,7 @@ public:
         parsePattern();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
     {
         const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
 
@@ -163,17 +163,17 @@ public:
         this->data(place).add(timestamp, events);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
@@ -560,7 +560,7 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeUInt8>(); }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).sort();
 
@@ -588,14 +588,14 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeUInt64>(); }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).sort();
         assert_cast<ColumnUInt64 &>(to).getData().push_back(count(place));
     }
 
 private:
-    UInt64 count(const ConstAggregateDataPtr & place) const
+    UInt64 count(ConstAggregateDataPtr __restrict place) const
     {
         const auto & data_ref = this->data(place);
 
diff --git a/src/AggregateFunctions/AggregateFunctionSimpleState.h b/src/AggregateFunctions/AggregateFunctionSimpleState.h
index 0b47e21e570..612f4ae76c1 100644
--- a/src/AggregateFunctions/AggregateFunctionSimpleState.h
+++ b/src/AggregateFunctions/AggregateFunctionSimpleState.h
@@ -48,9 +48,9 @@ public:
         return storage_type;
     }
 
-    void create(AggregateDataPtr place) const override { nested_func->create(place); }
+    void create(AggregateDataPtr __restrict place) const override { nested_func->create(place); }
 
-    void destroy(AggregateDataPtr place) const noexcept override { nested_func->destroy(place); }
+    void destroy(AggregateDataPtr __restrict place) const noexcept override { nested_func->destroy(place); }
 
     bool hasTrivialDestructor() const override { return nested_func->hasTrivialDestructor(); }
 
@@ -58,21 +58,21 @@ public:
 
     size_t alignOfData() const override { return nested_func->alignOfData(); }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         nested_func->add(place, columns, row_num, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { nested_func->merge(place, rhs, arena); }
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { nested_func->merge(place, rhs, arena); }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { nested_func->serialize(place, buf); }
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { nested_func->serialize(place, buf); }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         nested_func->deserialize(place, buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         nested_func->insertResultInto(place, to, arena);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionState.h b/src/AggregateFunctions/AggregateFunctionState.h
index 01614b7f0ab..e1a2c542fe8 100644
--- a/src/AggregateFunctions/AggregateFunctionState.h
+++ b/src/AggregateFunctions/AggregateFunctionState.h
@@ -34,12 +34,12 @@ public:
 
     DataTypePtr getReturnType() const override;
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         nested_func->create(place);
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_func->destroy(place);
     }
@@ -59,27 +59,27 @@ public:
         return nested_func->alignOfData();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         nested_func->add(place, columns, row_num, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         nested_func->merge(place, rhs, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         nested_func->serialize(place, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         nested_func->deserialize(place, buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnAggregateFunction &>(to).getData().push_back(place);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.h b/src/AggregateFunctions/AggregateFunctionStatistics.h
index b0ff57665da..76b6e843c15 100644
--- a/src/AggregateFunctions/AggregateFunctionStatistics.h
+++ b/src/AggregateFunctions/AggregateFunctionStatistics.h
@@ -123,27 +123,27 @@ public:
         return std::make_shared<DataTypeFloat64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).update(*columns[0], row_num);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).mergeWith(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).publish(to);
     }
@@ -375,27 +375,27 @@ public:
         return std::make_shared<DataTypeFloat64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).update(*columns[0], *columns[1], row_num);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).mergeWith(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).publish(to);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h
index 5caa30dbdab..9903e2f6eaa 100644
--- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h
+++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h
@@ -121,7 +121,7 @@ public:
         return std::make_shared<DataTypeNumber<ResultType>>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         if constexpr (StatFunc::num_args == 2)
             this->data(place).add(
@@ -141,22 +141,22 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         const auto & data = this->data(place);
         auto & dst = static_cast<ColVecResult &>(to).getData();
diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h
index ffd8a077390..bd1f9fc302e 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@@ -314,7 +314,7 @@ public:
             return std::make_shared<ResultDataType>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         const auto & column = assert_cast<const ColVecType &>(*columns[0]);
         if constexpr (is_big_int_v<T>)
@@ -361,22 +361,22 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & column = assert_cast<ColVecResult &>(to);
         column.getData().push_back(this->data(place).get());
diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h
index 6c8155f967c..f6a473546f9 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.h
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.h
@@ -115,7 +115,14 @@ public:
                         "Values for {} are expected to be Numeric, Float or Decimal, passed type {}",
                         getName(), value_type->getName()};
 
-                result_type = value_type_without_nullable->promoteNumericType();
+                WhichDataType value_type_to_check(value_type);
+
+                /// Do not promote decimal because of implementation issues of this function design
+                /// If we decide to make this function more efficient we should promote decimal type during summ
+                if (value_type_to_check.isDecimal())
+                    result_type = value_type_without_nullable;
+                else
+                    result_type = value_type_without_nullable->promoteNumericType();
             }
 
             types.emplace_back(std::make_shared<DataTypeArray>(result_type));
@@ -136,7 +143,7 @@ public:
         }
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns_, const size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns_, const size_t row_num, Arena *) const override
     {
         const auto & columns = getArgumentColumns(columns_);
 
@@ -212,7 +219,7 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         auto & merged_maps = this->data(place).merged_maps;
         const auto & rhs_maps = this->data(rhs).merged_maps;
@@ -231,7 +238,7 @@ public:
         }
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const auto & merged_maps = this->data(place).merged_maps;
         size_t size = merged_maps.size();
@@ -245,7 +252,7 @@ public:
         }
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         auto & merged_maps = this->data(place).merged_maps;
         size_t size = 0;
@@ -268,7 +275,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         size_t num_columns = values_types.size();
 
diff --git a/src/AggregateFunctions/AggregateFunctionTTest.h b/src/AggregateFunctions/AggregateFunctionTTest.h
index e73ae5e4c36..3c9873ebd1e 100644
--- a/src/AggregateFunctions/AggregateFunctionTTest.h
+++ b/src/AggregateFunctions/AggregateFunctionTTest.h
@@ -109,7 +109,7 @@ public:
         );
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         Float64 value = columns[0]->getFloat64(row_num);
         UInt8 is_second = columns[1]->getUInt(row_num);
@@ -120,22 +120,22 @@ public:
             this->data(place).addX(value);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto [t_statistic, p_value] = this->data(place).getResult();
 
diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h
index 791df21d1a7..43320a96b99 100644
--- a/src/AggregateFunctions/AggregateFunctionTopK.h
+++ b/src/AggregateFunctions/AggregateFunctionTopK.h
@@ -50,7 +50,7 @@ public:
         return std::make_shared<DataTypeArray>(this->argument_types[0]);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         auto & set = this->data(place).value;
         if (set.capacity() != reserved)
@@ -62,7 +62,7 @@ public:
             set.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         auto & set = this->data(place).value;
         if (set.capacity() != reserved)
@@ -70,19 +70,19 @@ public:
         set.merge(this->data(rhs).value);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).value.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         auto & set = this->data(place).value;
         set.resize(reserved);
         set.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
         ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
@@ -145,12 +145,12 @@ public:
         return true;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).value.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         auto & set = this->data(place).value;
         set.clear();
@@ -173,7 +173,7 @@ public:
         set.readAlphaMap(buf);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         auto & set = this->data(place).value;
         if (set.capacity() != reserved)
@@ -198,7 +198,7 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         auto & set = this->data(place).value;
         if (set.capacity() != reserved)
@@ -206,7 +206,7 @@ public:
         set.merge(this->data(rhs).value);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
         ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h
index c8f7b155ace..4e27922ba7c 100644
--- a/src/AggregateFunctions/AggregateFunctionUniq.h
+++ b/src/AggregateFunctions/AggregateFunctionUniq.h
@@ -211,27 +211,27 @@ public:
     }
 
     /// ALWAYS_INLINE is required to have better code layout for uniqHLL12 function
-    void ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         detail::OneAdder<T, Data>::add(this->data(place), *columns[0], row_num);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).set.merge(this->data(rhs).set);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).set.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).set.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
     }
@@ -265,28 +265,28 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).set.insert(typename Data::Set::value_type(
             UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)));
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).set.merge(this->data(rhs).set);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).set.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).set.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
     }
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h
index 10d8b3f512c..c9327594670 100644
--- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h
@@ -141,7 +141,7 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         if constexpr (!std::is_same_v<T, String>)
         {
@@ -155,22 +155,22 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).set.merge(this->data(rhs).set);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).set.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).set.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
     }
@@ -211,28 +211,28 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).set.insert(typename AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>::Set::value_type(
             UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)));
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).set.merge(this->data(rhs).set);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).set.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).set.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
     }
diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
index f6b96b561ac..d2c765137bc 100644
--- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
+++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
@@ -185,27 +185,27 @@ public:
     }
 
     /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
-    void ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).add(*columns[0], row_num, threshold);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs), threshold);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf, threshold);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf, threshold);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).size());
     }
@@ -247,27 +247,27 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).insert(UInt64(UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)), threshold);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs), threshold);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf, threshold);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf, threshold);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).size());
     }
diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
index 2be9d874a05..c765024507e 100644
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
+++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
@@ -149,7 +149,6 @@ private:
     UInt8 strict_order; // When the 'strict_order' is set, it doesn't allow interventions of other events.
                         // In the case of 'A->B->D->C', it stops finding 'A->B->C' at the 'D' and the max event level is 2.
 
-
     // Loop through the entire events_list, update the event timestamp value
     // The level path must be 1---2---3---...---check_events_size, find the max event level that satisfied the path in the sliding window.
     // If found, returns the max event level, else return 0.
@@ -250,7 +249,7 @@ public:
         return std::make_shared<AggregateFunctionNullVariadic<false, false, false>>(nested_function, arguments, params);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
     {
         bool has_event = false;
         const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
@@ -269,22 +268,22 @@ public:
             this->data(place).add(timestamp, 0);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt8 &>(to).getData().push_back(getEventLevel(this->data(place)));
     }
diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h
index 4c76ccf7720..d15ff4e8a78 100644
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@@ -26,6 +26,7 @@ class ReadBuffer;
 class WriteBuffer;
 class IColumn;
 class IDataType;
+class IWindowFunction;
 
 using DataTypePtr = std::shared_ptr<const IDataType>;
 using DataTypes = std::vector<DataTypePtr>;
@@ -72,10 +73,10 @@ public:
     /** Create empty data for aggregation with `placement new` at the specified location.
       * You will have to destroy them using the `destroy` method.
       */
-    virtual void create(AggregateDataPtr place) const = 0;
+    virtual void create(AggregateDataPtr __restrict place) const = 0;
 
     /// Delete data for aggregation.
-    virtual void destroy(AggregateDataPtr place) const noexcept = 0;
+    virtual void destroy(AggregateDataPtr __restrict place) const noexcept = 0;
 
     /// It is not necessary to delete data.
     virtual bool hasTrivialDestructor() const = 0;
@@ -91,16 +92,16 @@ public:
      *  row_num is number of row which should be added.
      *  Additional parameter arena should be used instead of standard memory allocator if the addition requires memory allocation.
      */
-    virtual void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const = 0;
+    virtual void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const = 0;
 
     /// Merges state (on which place points to) with other state of current aggregation function.
-    virtual void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const = 0;
+    virtual void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const = 0;
 
     /// Serializes state (to transmit it over the network, for example).
-    virtual void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const = 0;
+    virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const = 0;
 
     /// Deserializes state. This function is called only for empty (just created) states.
-    virtual void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const = 0;
+    virtual void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const = 0;
 
     /// Returns true if a function requires Arena to handle own states (see add(), merge(), deserialize()).
     virtual bool allocatesMemoryInArena() const { return false; }
@@ -111,7 +112,7 @@ public:
     /// insertResultInto must work correctly. This kind of call sequence occurs
     /// in `runningAccumulate`, or when calculating an aggregate function as a
     /// window function.
-    virtual void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const = 0;
+    virtual void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const = 0;
 
     /// Used for machine learning methods. Predict result from trained model.
     /// Will insert result into `to` column for rows in range [offset, offset + limit).
@@ -215,6 +216,20 @@ public:
     const DataTypes & getArgumentTypes() const { return argument_types; }
     const Array & getParameters() const { return parameters; }
 
+    // Any aggregate function can be calculated over a window, but there are some
+    // window functions such as rank() that require a different interface, e.g.
+    // because they don't respect the window frame, or need to be notified when
+    // a new peer group starts. They pretend to be normal aggregate functions,
+    // but will fail if you actually try to use them in Aggregator. The
+    // WindowTransform recognizes these functions and handles them differently.
+    // We could have a separate factory for window functions, and make all
+    // aggregate functions implement IWindowFunction interface and so on. This
+    // would be more logically correct, but more complex. We only have a handful
+    // of true window functions, so this hack-ish interface suffices.
+    virtual IWindowFunction * asWindowFunction() { return nullptr; }
+    virtual const IWindowFunction * asWindowFunction() const
+    { return const_cast<IAggregateFunction *>(this)->asWindowFunction(); }
+
 protected:
     DataTypes argument_types;
     Array parameters;
@@ -387,8 +402,8 @@ class IAggregateFunctionDataHelper : public IAggregateFunctionHelper<Derived>
 protected:
     using Data = T;
 
-    static Data & data(AggregateDataPtr place) { return *reinterpret_cast<Data *>(place); }
-    static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast<const Data *>(place); }
+    static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
+    static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
 
 public:
     // Derived class can `override` this to flag that DateTime64 is not supported.
@@ -399,9 +414,9 @@ public:
     {
     }
 
-    void create(AggregateDataPtr place) const override { new (place) Data; }
+    void create(AggregateDataPtr __restrict place) const override { new (place) Data; }
 
-    void destroy(AggregateDataPtr place) const noexcept override { data(place).~Data(); }
+    void destroy(AggregateDataPtr __restrict place) const noexcept override { data(place).~Data(); }
 
     bool hasTrivialDestructor() const override { return std::is_trivially_destructible_v<Data>; }
 
@@ -513,7 +528,7 @@ private:
     }
 
 protected:
-    void extractColumns(const IColumn ** columns, const IColumn ** aggr_columns) const
+    ssize_t extractColumns(const IColumn ** columns, const IColumn ** aggr_columns, ssize_t if_argument_pos) const
     {
         if (tuple_argument)
         {
@@ -526,6 +541,13 @@ protected:
             for (size_t i = 0; i < args_count; ++i)
                 columns[i] = aggr_columns[i];
         }
+        if (if_argument_pos >= 0)
+        {
+            columns[args_count] = aggr_columns[if_argument_pos];
+            return args_count;
+        }
+        else
+            return -1;
     }
 
     bool tuple_argument;
@@ -551,8 +573,8 @@ public:
         Arena * arena,
         ssize_t if_argument_pos = -1) const override
     {
-        const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
+        if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
 
         Base::addBatch(batch_size, places, place_offset, ex_columns, arena, if_argument_pos);
     }
@@ -560,8 +582,8 @@ public:
     void addBatchSinglePlace(
         size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const override
     {
-        const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
+        if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
 
         Base::addBatchSinglePlace(batch_size, place, ex_columns, arena, if_argument_pos);
     }
@@ -574,8 +596,8 @@ public:
         Arena * arena,
         ssize_t if_argument_pos = -1) const override
     {
-        const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
+        if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
 
         Base::addBatchSinglePlaceNotNull(batch_size, place, ex_columns, null_map, arena, if_argument_pos);
     }
@@ -584,8 +606,8 @@ public:
         size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1)
         const override
     {
-        const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
+        if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
 
         Base::addBatchSinglePlaceFromInterval(batch_begin, batch_end, place, ex_columns, arena, if_argument_pos);
     }
@@ -595,7 +617,7 @@ public:
         const override
     {
         const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        extractColumns(ex_columns, columns, -1);
 
         Base::addBatchArray(batch_size, places, place_offset, ex_columns, offsets, arena);
     }
@@ -610,7 +632,7 @@ public:
         Arena * arena) const override
     {
         const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        extractColumns(ex_columns, columns, -1);
 
         Base::addBatchLookupTable8(batch_size, map, place_offset, init, key, ex_columns, arena);
     }
diff --git a/src/AggregateFunctions/QuantileTiming.h b/src/AggregateFunctions/QuantileTiming.h
index 6070f264ad6..dd6d923a5a0 100644
--- a/src/AggregateFunctions/QuantileTiming.h
+++ b/src/AggregateFunctions/QuantileTiming.h
@@ -32,6 +32,8 @@ namespace ErrorCodes
   * - a histogram (that is, value -> number), consisting of two parts
   * -- for values from 0 to 1023 - in increments of 1;
   * -- for values from 1024 to 30,000 - in increments of 16;
+  *
+  * NOTE: 64-bit integer weight can overflow, see also QantileExactWeighted.h::get()
   */
 
 #define TINY_MAX_ELEMS 31
@@ -396,9 +398,9 @@ namespace detail
         /// Get the value of the `level` quantile. The level must be between 0 and 1.
         UInt16 get(double level) const
         {
-            UInt64 pos = std::ceil(count * level);
+            double pos = std::ceil(count * level);
 
-            UInt64 accumulated = 0;
+            double accumulated = 0;
             Iterator it(*this);
 
             while (it.isValid())
@@ -422,9 +424,9 @@ namespace detail
             const auto * indices_end = indices + size;
             const auto * index = indices;
 
-            UInt64 pos = std::ceil(count * levels[*index]);
+            double pos = std::ceil(count * levels[*index]);
 
-            UInt64 accumulated = 0;
+            double accumulated = 0;
             Iterator it(*this);
 
             while (it.isValid())
diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp
index d8e4eb7ba98..ae26fdc5d40 100644
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@@ -11,6 +11,7 @@ class AggregateFunctionFactory;
 void registerAggregateFunctionAvg(AggregateFunctionFactory &);
 void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory &);
 void registerAggregateFunctionCount(AggregateFunctionFactory &);
+void registerAggregateFunctionDeltaSum(AggregateFunctionFactory &);
 void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
 void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
 void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
@@ -57,6 +58,8 @@ void registerAggregateFunctionCombinatorOrFill(AggregateFunctionCombinatorFactor
 void registerAggregateFunctionCombinatorResample(AggregateFunctionCombinatorFactory &);
 void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory &);
 
+void registerWindowFunctions(AggregateFunctionFactory & factory);
+
 
 void registerAggregateFunctions()
 {
@@ -66,6 +69,7 @@ void registerAggregateFunctions()
         registerAggregateFunctionAvg(factory);
         registerAggregateFunctionAvgWeighted(factory);
         registerAggregateFunctionCount(factory);
+        registerAggregateFunctionDeltaSum(factory);
         registerAggregateFunctionGroupArray(factory);
         registerAggregateFunctionGroupUniqArray(factory);
         registerAggregateFunctionGroupArrayInsertAt(factory);
@@ -101,6 +105,8 @@ void registerAggregateFunctions()
         registerAggregateFunctionMannWhitney(factory);
         registerAggregateFunctionWelchTTest(factory);
         registerAggregateFunctionStudentTTest(factory);
+
+        registerWindowFunctions(factory);
     }
 
     {
diff --git a/src/AggregateFunctions/ya.make b/src/AggregateFunctions/ya.make
index f2105688feb..3a8f0ad9fba 100644
--- a/src/AggregateFunctions/ya.make
+++ b/src/AggregateFunctions/ya.make
@@ -19,6 +19,7 @@ SRCS(
     AggregateFunctionCategoricalInformationValue.cpp
     AggregateFunctionCombinatorFactory.cpp
     AggregateFunctionCount.cpp
+    AggregateFunctionDeltaSum.cpp
     AggregateFunctionDistinct.cpp
     AggregateFunctionEntropy.cpp
     AggregateFunctionFactory.cpp
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4207d371c09..215a13cce1a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -60,6 +60,7 @@ add_subdirectory (Processors)
 add_subdirectory (Formats)
 add_subdirectory (Compression)
 add_subdirectory (Server)
+add_subdirectory (Coordination)
 
 
 set(dbms_headers)
@@ -100,8 +101,8 @@ endif()
 list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD})
 list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
 
-list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp)
-list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h)
+list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp)
+list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h)
 
 list (APPEND dbms_sources
     AggregateFunctions/AggregateFunctionFactory.cpp
@@ -180,6 +181,7 @@ add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
 add_object_library(clickhouse_storages_liveview Storages/LiveView)
 add_object_library(clickhouse_client Client)
 add_object_library(clickhouse_server Server)
+add_object_library(clickhouse_server_http Server/HTTP)
 add_object_library(clickhouse_formats Formats)
 add_object_library(clickhouse_processors Processors)
 add_object_library(clickhouse_processors_executors Processors/Executors)
@@ -190,6 +192,11 @@ add_object_library(clickhouse_processors_sources Processors/Sources)
 add_object_library(clickhouse_processors_merges Processors/Merges)
 add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
 add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
+add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations)
+
+if (USE_NURAFT)
+    add_object_library(clickhouse_coordination Coordination)
+endif()
 
 set (DBMS_COMMON_LIBRARIES)
 # libgcc_s does not provide an implementation of an atomics library. Instead,
@@ -313,7 +320,7 @@ if (USE_KRB5)
 endif()
 
 if (USE_NURAFT)
-    dbms_target_link_libraries(PRIVATE ${NURAFT_LIBRARY})
+    dbms_target_link_libraries(PUBLIC ${NURAFT_LIBRARY})
 endif()
 
 if(RE2_INCLUDE_DIR)
diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 65b15a46955..164b9565633 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -542,6 +542,12 @@ void Connection::sendData(const Block & block, const String & name, bool scalar)
         throttler->add(out->count() - prev_bytes);
 }
 
+void Connection::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
+{
+    writeVarUInt(Protocol::Client::IgnoredPartUUIDs, *out);
+    writeVectorBinary(uuids, *out);
+    out->next();
+}
 
 void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String & name)
 {
@@ -750,7 +756,11 @@ std::optional<UInt64> Connection::checkPacket(size_t timeout_microseconds)
 Packet Connection::receivePacket(std::function<void(Poco::Net::Socket &)> async_callback)
 {
     in->setAsyncCallback(std::move(async_callback));
-    SCOPE_EXIT(in->setAsyncCallback({}));
+    SCOPE_EXIT({
+        /// disconnect() will reset "in".
+        if (in)
+            in->setAsyncCallback({});
+    });
 
     try
     {
@@ -798,6 +808,10 @@ Packet Connection::receivePacket(std::function<void(Poco::Net::Socket &)> async_
             case Protocol::Server::EndOfStream:
                 return res;
 
+            case Protocol::Server::PartUUIDs:
+                readVectorBinary(res.part_uuids, *in);
+                return res;
+
             default:
                 /// In unknown state, disconnect - to not leave unsynchronised connection.
                 disconnect();
diff --git a/src/Client/Connection.h b/src/Client/Connection.h
index 83e8f3ba206..2d24b143d7a 100644
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@@ -66,6 +66,7 @@ struct Packet
     std::vector<String> multistring_message;
     Progress progress;
     BlockStreamProfileInfo profile_info;
+    std::vector<UUID> part_uuids;
 
     Packet() : type(Protocol::Server::Hello) {}
 };
@@ -157,6 +158,8 @@ public:
     void sendScalarsData(Scalars & data);
     /// Send all contents of external (temporary) tables.
     void sendExternalTablesData(ExternalTablesData & data);
+    /// Send parts' uuids to excluded them from query processing
+    void sendIgnoredPartUUIDs(const std::vector<UUID> & uuids);
 
     /// Send prepared block of data (serialized and, if need, compressed), that will be read from 'input'.
     /// You could pass size of serialized/compressed block.
diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp
index ed7aad0a515..c50dd7b6454 100644
--- a/src/Client/MultiplexedConnections.cpp
+++ b/src/Client/MultiplexedConnections.cpp
@@ -140,6 +140,21 @@ void MultiplexedConnections::sendQuery(
     sent_query = true;
 }
 
+void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
+{
+    std::lock_guard lock(cancel_mutex);
+
+    if (sent_query)
+        throw Exception("Cannot send uuids after query is sent.", ErrorCodes::LOGICAL_ERROR);
+
+    for (ReplicaState & state : replica_states)
+    {
+        Connection * connection = state.connection;
+        if (connection != nullptr)
+            connection->sendIgnoredPartUUIDs(uuids);
+    }
+}
+
 Packet MultiplexedConnections::receivePacket()
 {
     std::lock_guard lock(cancel_mutex);
@@ -195,6 +210,7 @@ Packet MultiplexedConnections::drain()
 
         switch (packet.type)
         {
+            case Protocol::Server::PartUUIDs:
             case Protocol::Server::Data:
             case Protocol::Server::Progress:
             case Protocol::Server::ProfileInfo:
@@ -253,6 +269,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(std::function<void(Poco::Ne
 
     switch (packet.type)
     {
+        case Protocol::Server::PartUUIDs:
         case Protocol::Server::Data:
         case Protocol::Server::Progress:
         case Protocol::Server::ProfileInfo:
diff --git a/src/Client/MultiplexedConnections.h b/src/Client/MultiplexedConnections.h
index 2ab2b60570e..da0326fa6c0 100644
--- a/src/Client/MultiplexedConnections.h
+++ b/src/Client/MultiplexedConnections.h
@@ -50,6 +50,9 @@ public:
     /// Send a request to the replica to cancel the request
     void sendCancel();
 
+    /// Send parts' uuids to replicas to exclude them from query processing
+    void sendIgnoredPartUUIDs(const std::vector<UUID> & uuids);
+
     /** On each replica, read and skip all packets to EndOfStream or Exception.
       * Returns EndOfStream if no exception has been received. Otherwise
       * returns the last received packet of type Exception.
diff --git a/src/Client/tests/CMakeLists.txt b/src/Client/tests/CMakeLists.txt
index e69de29bb2d..d952c006bb5 100644
--- a/src/Client/tests/CMakeLists.txt
+++ b/src/Client/tests/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_executable(test-connect test_connect.cpp)
+target_link_libraries (test-connect PRIVATE dbms)
diff --git a/src/Client/tests/test_connect.cpp b/src/Client/tests/test_connect.cpp
new file mode 100644
index 00000000000..1b98b936a52
--- /dev/null
+++ b/src/Client/tests/test_connect.cpp
@@ -0,0 +1,93 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <thread>
+#include <atomic>
+#include <Poco/Net/StreamSocket.h>
+#include <Common/Exception.h>
+#include <Common/ShellCommand.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromFileDescriptor.h>
+#include <IO/copyData.h>
+
+
+/** In a loop it connects to the server and immediately breaks the connection.
+  * Using the SO_LINGER option, we ensure that the connection is terminated by sending a RST packet (not FIN).
+  * Long time ago this behavior caused a bug in the TCPServer implementation in the Poco library.
+  */
+int main(int argc, char ** argv)
+try
+{
+    using namespace DB;
+
+    size_t num_iterations = 1;
+    size_t num_threads = 1;
+    std::string host = "localhost";
+    uint16_t port = 9000;
+
+    if (argc >= 2)
+        num_iterations = parse<size_t>(argv[1]);
+
+    if (argc >= 3)
+        num_threads = parse<size_t>(argv[2]);
+
+    if (argc >= 4)
+        host = argv[3];
+
+    if (argc >= 5)
+        port = parse<uint16_t>(argv[4]);
+
+    WriteBufferFromFileDescriptor out(STDERR_FILENO);
+
+    std::atomic_bool cancel{false};
+    std::vector<std::thread> threads(num_threads);
+    for (auto & thread : threads)
+    {
+        thread = std::thread([&]
+        {
+            for (size_t i = 0; i < num_iterations && !cancel.load(std::memory_order_relaxed); ++i)
+            {
+                std::cerr << ".";
+
+                try
+                {
+                    Poco::Net::SocketAddress address(host, port);
+                    Poco::Net::StreamSocket socket;
+                    //socket.setLinger(1, 0);
+
+                    socket.connectNB(address);
+                    if (!socket.poll(Poco::Timespan(1000000),
+                        Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_WRITE | Poco::Net::Socket::SELECT_ERROR))
+                    {
+                        /// Allow to debug the server.
+/*                        auto command = ShellCommand::execute("kill -STOP $(pidof clickhouse-server)");
+                        copyData(command->err, out);
+                        copyData(command->out, out);
+                        command->wait();*/
+
+                        std::cerr << "Timeout\n";
+/*                        cancel = true;
+                        break;*/
+                    }
+                }
+                catch (const Poco::Exception & e)
+                {
+                    std::cerr << e.displayText() << "\n";
+                    cancel = true;
+                    break;
+                }
+            }
+        });
+    }
+
+    for (auto & thread : threads)
+        thread.join();
+
+    std::cerr << "\n";
+}
+catch (const Poco::Exception & e)
+{
+    std::cerr << e.displayText() << "\n";
+}
diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index 8c0e06424e7..e8a48672435 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -7,6 +7,7 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnsCommon.h>
+#include <Columns/ColumnCompressed.h>
 
 #include <common/unaligned.h>
 #include <common/sort.h>
@@ -369,8 +370,12 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
                                         compare_results, direction, nan_direction_hint);
 }
 
+
+namespace
+{
+
 template <bool positive>
-struct ColumnArray::Cmp
+struct Cmp
 {
     const ColumnArray & parent;
     int nan_direction_hint;
@@ -390,6 +395,9 @@ struct ColumnArray::Cmp
     }
 };
 
+}
+
+
 void ColumnArray::reserve(size_t n)
 {
     getOffsets().reserve(n);
@@ -912,6 +920,21 @@ void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool
         updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint, &collator));
 }
 
+ColumnPtr ColumnArray::compress() const
+{
+    ColumnPtr data_compressed = data->compress();
+    ColumnPtr offsets_compressed = offsets->compress();
+
+    size_t byte_size = data_compressed->byteSize() + offsets_compressed->byteSize();
+
+    return ColumnCompressed::create(size(), byte_size,
+        [data_compressed = std::move(data_compressed), offsets_compressed = std::move(offsets_compressed)]
+        {
+            return ColumnArray::create(data_compressed->decompress(), offsets_compressed->decompress());
+        });
+}
+
+
 ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
 {
     if (replicate_offsets.empty())
diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h
index e81ecbc1ca0..1caaf672d49 100644
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@@ -123,6 +123,8 @@ public:
 
     void gather(ColumnGathererStream & gatherer_stream) override;
 
+    ColumnPtr compress() const override;
+
     void forEachSubcolumn(ColumnCallback callback) override
     {
         callback(offsets);
@@ -183,9 +185,6 @@ private:
 
     template <typename Comparator>
     void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const;
-
-    template <bool positive>
-    struct Cmp;
 };
 
 
diff --git a/src/Columns/ColumnCompressed.cpp b/src/Columns/ColumnCompressed.cpp
new file mode 100644
index 00000000000..292c6968b86
--- /dev/null
+++ b/src/Columns/ColumnCompressed.cpp
@@ -0,0 +1,61 @@
+#include <Columns/ColumnCompressed.h>
+
+#pragma GCC diagnostic ignored "-Wold-style-cast"
+
+#include <lz4.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_COMPRESS;
+    extern const int CANNOT_DECOMPRESS;
+}
+
+
+std::shared_ptr<Memory<>> ColumnCompressed::compressBuffer(const void * data, size_t data_size, bool always_compress)
+{
+    size_t max_dest_size = LZ4_COMPRESSBOUND(data_size);
+
+    if (max_dest_size > std::numeric_limits<int>::max())
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(data_size));
+
+    Memory<> compressed(max_dest_size);
+
+    auto compressed_size = LZ4_compress_default(
+        reinterpret_cast<const char *>(data),
+        compressed.data(),
+        data_size,
+        max_dest_size);
+
+    if (compressed_size <= 0)
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column");
+
+    /// If compression is inefficient.
+    if (!always_compress && static_cast<size_t>(compressed_size) * 2 > data_size)
+        return {};
+
+    /// Shrink to fit.
+    auto shrank = std::make_shared<Memory<>>(compressed_size);
+    memcpy(shrank->data(), compressed.data(), compressed_size);
+
+    return shrank;
+}
+
+
+void ColumnCompressed::decompressBuffer(
+    const void * compressed_data, void * decompressed_data, size_t compressed_size, size_t decompressed_size)
+{
+    auto processed_size = LZ4_decompress_safe(
+        reinterpret_cast<const char *>(compressed_data),
+        reinterpret_cast<char *>(decompressed_data),
+        compressed_size,
+        decompressed_size);
+
+    if (processed_size <= 0)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress column");
+}
+
+}
diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h
new file mode 100644
index 00000000000..f6b6bf22177
--- /dev/null
+++ b/src/Columns/ColumnCompressed.h
@@ -0,0 +1,121 @@
+#pragma once
+
+#include <optional>
+#include <Core/Field.h>
+#include <Columns/IColumn.h>
+#include <IO/BufferWithOwnMemory.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+
+/** Wrapper for compressed column data.
+  * The only supported operations are:
+  * - decompress (reconstruct the source column)
+  * - get size in rows or bytes.
+  *
+  * It is needed to implement in-memory compression
+  * - to keep compressed data in Block or pass around.
+  *
+  * It's often beneficial to store compressed data in-memory and decompress on the fly
+  * because it allows to lower memory throughput. More specifically, if:
+  *
+  * decompression speed * num CPU cores >= memory read throughput
+  *
+  * Also in-memory compression allows to keep more data in RAM.
+  */
+class ColumnCompressed : public COWHelper<IColumn, ColumnCompressed>
+{
+public:
+    using Lazy = std::function<ColumnPtr()>;
+
+    ColumnCompressed(size_t rows_, size_t bytes_, Lazy lazy_)
+        : rows(rows_), bytes(bytes_), lazy(lazy_)
+    {
+    }
+
+    const char * getFamilyName() const override { return "Compressed"; }
+
+    size_t size() const override { return rows; }
+    size_t byteSize() const override { return bytes; }
+    size_t allocatedBytes() const override { return bytes; }
+
+    ColumnPtr decompress() const override
+    {
+        return lazy();
+    }
+
+    /** Wrap uncompressed column without compression.
+      * Method can be used when compression is not worth doing.
+      * But returning CompressedColumn is still needed to keep uniform block structure.
+      */
+    static ColumnPtr wrap(ColumnPtr column)
+    {
+        return ColumnCompressed::create(
+            column->size(),
+            column->allocatedBytes(),
+            [column = std::move(column)]{ return column; });
+    }
+
+    /// Helper methods for compression.
+
+    /// If data is not worth to be compressed and not 'always_compress' - returns nullptr.
+    /// Note: shared_ptr is to allow to be captured by std::function.
+    static std::shared_ptr<Memory<>> compressBuffer(const void * data, size_t data_size, bool always_compress);
+
+    static void decompressBuffer(
+        const void * compressed_data, void * decompressed_data, size_t compressed_size, size_t decompressed_size);
+
+    /// All other methods throw exception.
+
+    TypeIndex getDataType() const override { throwMustBeDecompressed(); }
+    Field operator[](size_t) const override { throwMustBeDecompressed(); }
+    void get(size_t, Field &) const override { throwMustBeDecompressed(); }
+    StringRef getDataAt(size_t) const override { throwMustBeDecompressed(); }
+    void insert(const Field &) override { throwMustBeDecompressed(); }
+    void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
+    void insertData(const char *, size_t) override { throwMustBeDecompressed(); }
+    void insertDefault() override { throwMustBeDecompressed(); }
+    void popBack(size_t) override { throwMustBeDecompressed(); }
+    StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeDecompressed(); }
+    const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
+    void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
+    void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); }
+    void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
+    ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
+    ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); }
+    ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); }
+    int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }
+    void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
+    {
+        throwMustBeDecompressed();
+    }
+    void getPermutation(bool, size_t, int, Permutation &) const override { throwMustBeDecompressed(); }
+    void updatePermutation(bool, size_t, int, Permutation &, EqualRanges &) const override { throwMustBeDecompressed(); }
+    ColumnPtr replicate(const Offsets &) const override { throwMustBeDecompressed(); }
+    MutableColumns scatter(ColumnIndex, const Selector &) const override { throwMustBeDecompressed(); }
+    void gather(ColumnGathererStream &) override { throwMustBeDecompressed(); }
+    void getExtremes(Field &, Field &) const override { throwMustBeDecompressed(); }
+    size_t byteSizeAt(size_t) const override { throwMustBeDecompressed(); }
+
+protected:
+    size_t rows;
+    size_t bytes;
+
+    Lazy lazy;
+
+private:
+    [[noreturn]] void throwMustBeDecompressed() const
+    {
+        throw Exception("ColumnCompressed must be decompressed before use", ErrorCodes::LOGICAL_ERROR);
+    }
+};
+
+}
+
diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index f6261079287..bad3a4c3402 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -14,6 +14,7 @@
 
 #include <Columns/ColumnsCommon.h>
 #include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnCompressed.h>
 #include <DataStreams/ColumnGathererStream.h>
 
 
@@ -30,6 +31,12 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+template class DecimalPaddedPODArray<Decimal32>;
+template class DecimalPaddedPODArray<Decimal64>;
+template class DecimalPaddedPODArray<Decimal128>;
+template class DecimalPaddedPODArray<Decimal256>;
+template class DecimalPaddedPODArray<DateTime64>;
+
 template <typename T>
 int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const
 {
@@ -340,6 +347,30 @@ void ColumnDecimal<T>::gather(ColumnGathererStream & gatherer)
     gatherer.gather(*this);
 }
 
+template <typename T>
+ColumnPtr ColumnDecimal<T>::compress() const
+{
+    size_t source_size = data.size() * sizeof(T);
+
+    /// Don't compress small blocks.
+    if (source_size < 4096) /// A wild guess.
+        return ColumnCompressed::wrap(this->getPtr());
+
+    auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, false);
+
+    if (!compressed)
+        return ColumnCompressed::wrap(this->getPtr());
+
+    return ColumnCompressed::create(data.size(), compressed->size(),
+        [compressed = std::move(compressed), column_size = data.size(), scale = this->scale]
+        {
+            auto res = ColumnDecimal<T>::create(column_size, scale);
+            ColumnCompressed::decompressBuffer(
+                compressed->data(), res->getData().data(), compressed->size(), column_size * sizeof(T));
+            return res;
+        });
+}
+
 template <typename T>
 void ColumnDecimal<T>::getExtremes(Field & min, Field & max) const
 {
@@ -370,4 +401,5 @@ template class ColumnDecimal<Decimal64>;
 template class ColumnDecimal<Decimal128>;
 template class ColumnDecimal<Decimal256>;
 template class ColumnDecimal<DateTime64>;
+
 }
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index 51ad1486520..5016ddca791 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -50,6 +50,14 @@ private:
     UInt32 scale;
 };
 
+/// Prevent implicit template instantiation of DecimalPaddedPODArray for common decimal types
+
+extern template class DecimalPaddedPODArray<Decimal32>;
+extern template class DecimalPaddedPODArray<Decimal64>;
+extern template class DecimalPaddedPODArray<Decimal128>;
+extern template class DecimalPaddedPODArray<Decimal256>;
+extern template class DecimalPaddedPODArray<DateTime64>;
+
 /// A ColumnVector for Decimals
 template <typename T>
 class ColumnDecimal final : public COWHelper<ColumnVectorHelper, ColumnDecimal<T>>
@@ -136,7 +144,7 @@ public:
     Field operator[](size_t n) const override { return DecimalField(data[n], scale); }
     void get(size_t n, Field & res) const override { res = (*this)[n]; }
     bool getBool(size_t n) const override { return bool(data[n].value); }
-    Int64 getInt(size_t n) const override { return Int64(data[n].value * scale); }
+    Int64 getInt(size_t n) const override { return Int64(data[n].value) * scale; }
     UInt64 get64(size_t n) const override;
     bool isDefaultAt(size_t n) const override { return data[n].value == 0; }
 
@@ -164,6 +172,8 @@ public:
         return false;
     }
 
+    ColumnPtr compress() const override;
+
 
     void insertValue(const T value) { data.push_back(value); }
     Container & getData() { return data; }
@@ -215,4 +225,14 @@ ColumnPtr ColumnDecimal<T>::indexImpl(const PaddedPODArray<Type> & indexes, size
     return res;
 }
 
+
+/// Prevent implicit template instantiation of ColumnDecimal for common decimal types
+
+extern template class ColumnDecimal<Decimal32>;
+extern template class ColumnDecimal<Decimal64>;
+extern template class ColumnDecimal<Decimal128>;
+extern template class ColumnDecimal<Decimal256>;
+extern template class ColumnDecimal<DateTime64>;
+
+
 }
diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp
index a20e5d3ca0d..c4a7f923867 100644
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@@ -1,6 +1,7 @@
 #include <Columns/ColumnFixedString.h>
-
 #include <Columns/ColumnsCommon.h>
+#include <Columns/ColumnCompressed.h>
+
 #include <DataStreams/ColumnGathererStream.h>
 #include <IO/WriteHelpers.h>
 #include <Common/Arena.h>
@@ -289,7 +290,8 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
 
     while (filt_pos < filt_end_sse)
     {
-        int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
+        UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
+        mask = ~mask;
 
         if (0 == mask)
         {
@@ -445,4 +447,46 @@ void ColumnFixedString::getExtremes(Field & min, Field & max) const
     get(max_idx, max);
 }
 
+ColumnPtr ColumnFixedString::compress() const
+{
+    size_t source_size = chars.size();
+
+    /// Don't compress small blocks.
+    if (source_size < 4096) /// A wild guess.
+        return ColumnCompressed::wrap(this->getPtr());
+
+    auto compressed = ColumnCompressed::compressBuffer(chars.data(), source_size, false);
+
+    if (!compressed)
+        return ColumnCompressed::wrap(this->getPtr());
+
+    size_t column_size = size();
+
+    return ColumnCompressed::create(column_size, compressed->size(),
+        [compressed = std::move(compressed), column_size, n = n]
+        {
+            size_t chars_size = n * column_size;
+            auto res = ColumnFixedString::create(n);
+            res->getChars().resize(chars_size);
+            ColumnCompressed::decompressBuffer(
+                compressed->data(), res->getChars().data(), compressed->size(), chars_size);
+            return res;
+        });
+}
+
+
+void ColumnFixedString::alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size)
+{
+    size_t length = data.size() - old_size;
+    if (length < n)
+    {
+        data.resize_fill(old_size + n);
+    }
+    else if (length > n)
+    {
+        data.resize_assume_reserved(old_size);
+        throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
+    }
+}
+
 }
diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h
index 286b3a752dc..d9f6619b2d1 100644
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@@ -156,6 +156,8 @@ public:
 
     void gather(ColumnGathererStream & gatherer_stream) override;
 
+    ColumnPtr compress() const override;
+
     void reserve(size_t size) override
     {
         chars.reserve(n * size);
@@ -182,7 +184,8 @@ public:
     const Chars & getChars() const { return chars; }
 
     size_t getN() const { return n; }
+
+    static void alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size);
 };
 
-
 }
diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp
index 1cfd7e6c4ef..cc2640a9cf6 100644
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@@ -1,4 +1,5 @@
 #include <Columns/ColumnMap.h>
+#include <Columns/ColumnCompressed.h>
 #include <Columns/IColumnImpl.h>
 #include <DataStreams/ColumnGathererStream.h>
 #include <IO/WriteBufferFromString.h>
@@ -243,4 +244,13 @@ bool ColumnMap::structureEquals(const IColumn & rhs) const
     return false;
 }
 
+ColumnPtr ColumnMap::compress() const
+{
+    auto compressed = nested->compress();
+    return ColumnCompressed::create(size(), compressed->byteSize(), [compressed = std::move(compressed)]
+    {
+        return ColumnMap::create(compressed->decompress());
+    });
+}
+
 }
diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h
index c1948491db5..acae1574f4c 100644
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@@ -91,6 +91,8 @@ public:
 
     const ColumnTuple & getNestedData() const { return assert_cast<const ColumnTuple &>(getNestedColumn().getData()); }
     ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getNestedColumn().getData()); }
+
+    ColumnPtr compress() const override;
 };
 
 }
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 35ce005073a..4e5cc2b4cf7 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -7,6 +7,7 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnString.h>
+#include <Columns/ColumnCompressed.h>
 #include <DataStreams/ColumnGathererStream.h>
 
 
@@ -511,6 +512,20 @@ void ColumnNullable::protect()
     getNullMapColumn().protect();
 }
 
+ColumnPtr ColumnNullable::compress() const
+{
+    ColumnPtr nested_compressed = nested_column->compress();
+    ColumnPtr null_map_compressed = null_map->compress();
+
+    size_t byte_size = nested_column->byteSize() + null_map->byteSize();
+
+    return ColumnCompressed::create(size(), byte_size,
+        [nested_column = std::move(nested_column), null_map = std::move(null_map)]
+        {
+            return ColumnNullable::create(nested_column->decompress(), null_map->decompress());
+        });
+}
+
 
 namespace
 {
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index ade2c106627..8d267de8644 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -117,6 +117,8 @@ public:
 
     void gather(ColumnGathererStream & gatherer_stream) override;
 
+    ColumnPtr compress() const override;
+
     void forEachSubcolumn(ColumnCallback callback) override
     {
         callback(nested_column);
diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp
index 00d6349408f..8fd22e85e10 100644
--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@@ -2,6 +2,7 @@
 
 #include <Columns/Collator.h>
 #include <Columns/ColumnsCommon.h>
+#include <Columns/ColumnCompressed.h>
 #include <DataStreams/ColumnGathererStream.h>
 #include <Common/Arena.h>
 #include <Common/HashTable/Hash.h>
@@ -525,6 +526,47 @@ void ColumnString::getExtremes(Field & min, Field & max) const
 }
 
 
+ColumnPtr ColumnString::compress() const
+{
+    size_t source_chars_size = chars.size();
+    size_t source_offsets_size = offsets.size() * sizeof(Offset);
+
+    /// Don't compress small blocks.
+    if (source_chars_size < 4096) /// A wild guess.
+        return ColumnCompressed::wrap(this->getPtr());
+
+    auto chars_compressed = ColumnCompressed::compressBuffer(chars.data(), source_chars_size, false);
+
+    /// Return original column if not compressible.
+    if (!chars_compressed)
+        return ColumnCompressed::wrap(this->getPtr());
+
+    auto offsets_compressed = ColumnCompressed::compressBuffer(offsets.data(), source_offsets_size, true);
+
+    return ColumnCompressed::create(offsets.size(), chars_compressed->size() + offsets_compressed->size(),
+        [
+            chars_compressed = std::move(chars_compressed),
+            offsets_compressed = std::move(offsets_compressed),
+            source_chars_size,
+            source_offsets_elements = offsets.size()
+        ]
+        {
+            auto res = ColumnString::create();
+
+            res->getChars().resize(source_chars_size);
+            res->getOffsets().resize(source_offsets_elements);
+
+            ColumnCompressed::decompressBuffer(
+                chars_compressed->data(), res->getChars().data(), chars_compressed->size(), source_chars_size);
+
+            ColumnCompressed::decompressBuffer(
+                offsets_compressed->data(), res->getOffsets().data(), offsets_compressed->size(), source_offsets_elements * sizeof(Offset));
+
+            return res;
+        });
+}
+
+
 int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const
 {
     const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index c1e76c5e28e..843e445d1a0 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -261,6 +261,8 @@ public:
 
     void gather(ColumnGathererStream & gatherer_stream) override;
 
+    ColumnPtr compress() const override;
+
     void reserve(size_t n) override;
 
     void getExtremes(Field & min, Field & max) const override;
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index fa5a15d0351..c7c5f7b97c6 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -1,6 +1,7 @@
 #include <Columns/ColumnTuple.h>
 
 #include <Columns/IColumnImpl.h>
+#include <Columns/ColumnCompressed.h>
 #include <Core/Field.h>
 #include <DataStreams/ColumnGathererStream.h>
 #include <IO/Operators.h>
@@ -486,7 +487,7 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const
 
 bool ColumnTuple::isCollationSupported() const
 {
-    for (const auto& column : columns)
+    for (const auto & column : columns)
     {
         if (column->isCollationSupported())
             return true;
@@ -495,4 +496,25 @@ bool ColumnTuple::isCollationSupported() const
 }
 
 
+ColumnPtr ColumnTuple::compress() const
+{
+    size_t byte_size = 0;
+    Columns compressed;
+    compressed.reserve(columns.size());
+    for (const auto & column : columns)
+    {
+        auto compressed_column = column->compress();
+        byte_size += compressed_column->byteSize();
+        compressed.emplace_back(std::move(compressed_column));
+    }
+
+    return ColumnCompressed::create(size(), byte_size,
+        [compressed = std::move(compressed)]() mutable
+        {
+            for (auto & column : compressed)
+                column = column->decompress();
+            return ColumnTuple::create(compressed);
+        });
+}
+
 }
diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h
index f763ca3fcba..818b29937bd 100644
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@@ -89,6 +89,7 @@ public:
     void forEachSubcolumn(ColumnCallback callback) override;
     bool structureEquals(const IColumn & rhs) const override;
     bool isCollationSupported() const override;
+    ColumnPtr compress() const override;
 
     size_t tupleSize() const { return columns.size(); }
 
diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index 5d58b2484e0..fbd3c3641b5 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -28,13 +28,18 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
+/** Stores another column with unique values
+  * and also an index that allows to find position by value.
+  *
+  * This column is not used on it's own but only as implementation detail of ColumnLowCardinality.
+  */
 template <typename ColumnType>
 class ColumnUnique final : public COWHelper<IColumnUnique, ColumnUnique<ColumnType>>
 {
     friend class COWHelper<IColumnUnique, ColumnUnique<ColumnType>>;
 
 private:
-    explicit ColumnUnique(MutableColumnPtr && holder, bool is_nullable);
+    ColumnUnique(MutableColumnPtr && holder, bool is_nullable);
     explicit ColumnUnique(const IDataType & type);
     ColumnUnique(const ColumnUnique & other);
 
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index fcbcc63731a..19ba86c5120 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -2,6 +2,7 @@
 
 #include <pdqsort.h>
 #include <Columns/ColumnsCommon.h>
+#include <Columns/ColumnCompressed.h>
 #include <DataStreams/ColumnGathererStream.h>
 #include <IO/WriteHelpers.h>
 #include <Common/Arena.h>
@@ -356,7 +357,8 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
 
     while (filt_pos < filt_end_sse)
     {
-        int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
+        UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
+        mask = ~mask;
 
         if (0 == mask)
         {
@@ -519,6 +521,33 @@ void ColumnVector<T>::getExtremes(Field & min, Field & max) const
     max = NearestFieldType<T>(cur_max);
 }
 
+
+#pragma GCC diagnostic ignored "-Wold-style-cast"
+
+template <typename T>
+ColumnPtr ColumnVector<T>::compress() const
+{
+    size_t source_size = data.size() * sizeof(T);
+
+    /// Don't compress small blocks.
+    if (source_size < 4096) /// A wild guess.
+        return ColumnCompressed::wrap(this->getPtr());
+
+    auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, false);
+
+    if (!compressed)
+        return ColumnCompressed::wrap(this->getPtr());
+
+    return ColumnCompressed::create(data.size(), compressed->size(),
+        [compressed = std::move(compressed), column_size = data.size()]
+        {
+            auto res = ColumnVector<T>::create(column_size);
+            ColumnCompressed::decompressBuffer(
+                compressed->data(), res->getData().data(), compressed->size(), column_size * sizeof(T));
+            return res;
+        });
+}
+
 /// Explicit template instantiations - to avoid code bloat in headers.
 template class ColumnVector<UInt8>;
 template class ColumnVector<UInt16>;
@@ -534,4 +563,5 @@ template class ColumnVector<Int128>;
 template class ColumnVector<Int256>;
 template class ColumnVector<Float32>;
 template class ColumnVector<Float64>;
+
 }
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 1b13859bdee..f0aa4a3bab5 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -298,6 +298,8 @@ public:
         return typeid(rhs) == typeid(ColumnVector<T>);
     }
 
+    ColumnPtr compress() const override;
+
     /// Replace elements that match the filter with zeroes. If inverted replaces not matched elements.
     void applyZeroMap(const IColumn::Filter & filt, bool inverted = false);
 
@@ -345,4 +347,21 @@ ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_
     return res;
 }
 
+/// Prevent implicit template instantiation of ColumnVector for common types
+
+extern template class ColumnVector<UInt8>;
+extern template class ColumnVector<UInt16>;
+extern template class ColumnVector<UInt32>;
+extern template class ColumnVector<UInt64>;
+extern template class ColumnVector<UInt128>;
+extern template class ColumnVector<UInt256>;
+extern template class ColumnVector<Int8>;
+extern template class ColumnVector<Int16>;
+extern template class ColumnVector<Int32>;
+extern template class ColumnVector<Int64>;
+extern template class ColumnVector<Int128>;
+extern template class ColumnVector<Int256>;
+extern template class ColumnVector<Float32>;
+extern template class ColumnVector<Float64>;
+
 }
diff --git a/src/Columns/ColumnsCommon.cpp b/src/Columns/ColumnsCommon.cpp
index f3f10a25df3..3c356afa4da 100644
--- a/src/Columns/ColumnsCommon.cpp
+++ b/src/Columns/ColumnsCommon.cpp
@@ -17,13 +17,17 @@ namespace DB
 static UInt64 toBits64(const Int8 * bytes64)
 {
     static const __m128i zero16 = _mm_setzero_si128();
-    return static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
-        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16)))
-           << 16)
-        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16)))
-           << 32)
-        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16)))
-           << 48);
+    UInt64 res =
+        static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
+            _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
+        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
+            _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16))) << 16)
+        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
+            _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16))) << 32)
+        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
+            _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16))) << 48);
+
+    return ~res;
 }
 #endif
 
@@ -49,7 +53,7 @@ size_t countBytesInFilter(const UInt8 * filt, size_t sz)
 #endif
 
     for (; pos < end; ++pos)
-        count += *pos > 0;
+        count += *pos != 0;
 
     return count;
 }
@@ -82,7 +86,7 @@ size_t countBytesInFilterWithNull(const IColumn::Filter & filt, const UInt8 * nu
 #endif
 
     for (; pos < end; ++pos)
-        count += (*pos & ~*pos2) > 0;
+        count += (*pos & ~*pos2) != 0;
 
     return count;
 }
@@ -232,9 +236,10 @@ namespace
 
         while (filt_pos < filt_end_aligned)
         {
-            const auto mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
+            UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
                 _mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
                 zero_vec));
+            mask = ~mask;
 
             if (mask == 0)
             {
diff --git a/src/Columns/ColumnsNumber.h b/src/Columns/ColumnsNumber.h
index 96ce2bd6d6f..17a28e617c3 100644
--- a/src/Columns/ColumnsNumber.h
+++ b/src/Columns/ColumnsNumber.h
@@ -26,4 +26,6 @@ using ColumnInt256 = ColumnVector<Int256>;
 using ColumnFloat32 = ColumnVector<Float32>;
 using ColumnFloat64 = ColumnVector<Float64>;
 
+using ColumnUUID = ColumnVector<UInt128>;
+
 }
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index 824b5411744..2b4b633f9a5 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -357,6 +357,21 @@ public:
         throw Exception("Method structureEquals is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
+    /// Compress column in memory to some representation that allows to decompress it back.
+    /// Return itself if compression is not applicable for this column type.
+    virtual Ptr compress() const
+    {
+        /// No compression by default.
+        return getPtr();
+    }
+
+    /// If it's CompressedColumn, decompress it and return.
+    /// Otherwise return itself.
+    virtual Ptr decompress() const
+    {
+        return getPtr();
+    }
+
 
     static MutablePtr mutate(Ptr ptr)
     {
@@ -460,7 +475,7 @@ using Columns = std::vector<ColumnPtr>;
 using MutableColumns = std::vector<MutableColumnPtr>;
 
 using ColumnRawPtrs = std::vector<const IColumn *>;
-//using MutableColumnRawPtrs = std::vector<IColumn *>;
+
 
 template <typename ... Args>
 struct IsMutableColumns;
diff --git a/src/Columns/ReverseIndex.h b/src/Columns/ReverseIndex.h
index 154293acf99..35b0029fc7b 100644
--- a/src/Columns/ReverseIndex.h
+++ b/src/Columns/ReverseIndex.h
@@ -316,8 +316,8 @@ template <typename IndexType, typename ColumnType>
 class ReverseIndex
 {
 public:
-    explicit ReverseIndex(UInt64 num_prefix_rows_to_skip_, UInt64 base_index_)
-            : num_prefix_rows_to_skip(num_prefix_rows_to_skip_), base_index(base_index_), saved_hash_ptr(nullptr) {}
+    ReverseIndex(UInt64 num_prefix_rows_to_skip_, UInt64 base_index_)
+        : num_prefix_rows_to_skip(num_prefix_rows_to_skip_), base_index(base_index_), saved_hash_ptr(nullptr) {}
 
     void setColumn(ColumnType * column_);
 
@@ -329,14 +329,16 @@ public:
     /// Returns the found data's index in the dictionary. If index is not built, builds it.
     UInt64 getInsertionPoint(StringRef data)
     {
-        if (!index) buildIndex();
+        if (!index)
+            buildIndex();
         return getIndexImpl(data);
     }
 
     /// Returns the found data's index in the dictionary if the #index is built, otherwise, returns a std::nullopt.
     std::optional<UInt64> getIndex(StringRef data) const
     {
-        if (!index) return {};
+        if (!index)
+            return {};
         return getIndexImpl(data);
     }
 
diff --git a/src/Columns/ya.make b/src/Columns/ya.make
index 2affaeb0fc6..54dd02609ff 100644
--- a/src/Columns/ya.make
+++ b/src/Columns/ya.make
@@ -7,18 +7,21 @@ ADDINCL(
     contrib/libs/icu/common
     contrib/libs/icu/i18n
     contrib/libs/pdqsort
+    contrib/libs/lz4
 )
 
 PEERDIR(
     clickhouse/src/Common
     contrib/libs/icu
     contrib/libs/pdqsort
+    contrib/libs/lz4
 )
 
 SRCS(
     Collator.cpp
     ColumnAggregateFunction.cpp
     ColumnArray.cpp
+    ColumnCompressed.cpp
     ColumnConst.cpp
     ColumnDecimal.cpp
     ColumnFixedString.cpp
diff --git a/src/Columns/ya.make.in b/src/Columns/ya.make.in
index 677a5bcbd70..846e2c6c3bd 100644
--- a/src/Columns/ya.make.in
+++ b/src/Columns/ya.make.in
@@ -6,12 +6,14 @@ ADDINCL(
     contrib/libs/icu/common
     contrib/libs/icu/i18n
     contrib/libs/pdqsort
+    contrib/libs/lz4
 )
 
 PEERDIR(
     clickhouse/src/Common
     contrib/libs/icu
     contrib/libs/pdqsort
+    contrib/libs/lz4
 )
 
 SRCS(
diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp
index 08c275abfc2..5a66ddb63a2 100644
--- a/src/Common/Allocator.cpp
+++ b/src/Common/Allocator.cpp
@@ -19,3 +19,8 @@
       */
     __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
 #endif
+
+template class Allocator<false, false>;
+template class Allocator<true, false>;
+template class Allocator<false, true>;
+template class Allocator<true, true>;
diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h
index a499f4a442b..e3c6ddf9ff4 100644
--- a/src/Common/Allocator.h
+++ b/src/Common/Allocator.h
@@ -352,6 +352,12 @@ template<typename Base, size_t initial_bytes, size_t Alignment>
 constexpr size_t allocatorInitialBytes<AllocatorWithStackMemory<
     Base, initial_bytes, Alignment>> = initial_bytes;
 
+/// Prevent implicit template instantiation of Allocator
+
+extern template class Allocator<false, false>;
+extern template class Allocator<true, false>;
+extern template class Allocator<false, true>;
+extern template class Allocator<true, true>;
 
 #if !__clang__
 #pragma GCC diagnostic pop
diff --git a/src/Common/ColumnsHashing.h b/src/Common/ColumnsHashing.h
index a7fcfd4f8c0..1ac753fbae5 100644
--- a/src/Common/ColumnsHashing.h
+++ b/src/Common/ColumnsHashing.h
@@ -28,12 +28,12 @@ namespace ColumnsHashing
 
 /// For the case when there is one numeric key.
 /// UInt8/16/32/64 for any type with corresponding bit width.
-template <typename Value, typename Mapped, typename FieldType, bool use_cache = true>
+template <typename Value, typename Mapped, typename FieldType, bool use_cache = true, bool need_offset = false>
 struct HashMethodOneNumber
-    : public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache>, Value, Mapped, use_cache>
+    : public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
 {
-    using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache>;
-    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
 
     const char * vec;
 
@@ -70,12 +70,12 @@ struct HashMethodOneNumber
 
 
 /// For the case when there is one string key.
-template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
+template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
 struct HashMethodString
-    : public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
+    : public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
 {
-    using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache>;
-    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
 
     const IColumn::Offset * offsets;
     const UInt8 * chars;
@@ -108,12 +108,13 @@ protected:
 
 
 /// For the case when there is one fixed-length string key.
-template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
+template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
 struct HashMethodFixedString
-    : public columns_hashing_impl::HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
+    : public columns_hashing_impl::
+          HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
 {
-    using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>;
-    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
 
     size_t n;
     const ColumnFixedString::Chars * chars;
@@ -454,13 +455,20 @@ template <>
 struct LowCardinalityKeys<false> {};
 
 /// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
-template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true>
+template <
+    typename Value,
+    typename Key,
+    typename Mapped,
+    bool has_nullable_keys_ = false,
+    bool has_low_cardinality_ = false,
+    bool use_cache = true,
+    bool need_offset = false>
 struct HashMethodKeysFixed
     : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>
-    , public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>, Value, Mapped, use_cache>
+    , public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
 {
-    using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>;
-    using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>;
+    using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
     using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>;
 
     static constexpr bool has_nullable_keys = has_nullable_keys_;
@@ -470,6 +478,12 @@ struct HashMethodKeysFixed
     Sizes key_sizes;
     size_t keys_size;
 
+    /// SSSE3 shuffle method can be used. Shuffle masks will be calculated and stored here.
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
+    std::unique_ptr<uint8_t[]> masks;
+    std::unique_ptr<const char*[]> columns_data;
+#endif
+
     HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const HashMethodContextPtr &)
         : Base(key_columns), key_sizes(std::move(key_sizes_)), keys_size(key_columns.size())
     {
@@ -490,6 +504,58 @@ struct HashMethodKeysFixed
                     low_cardinality_keys.nested_columns[i] = key_columns[i];
             }
         }
+
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
+        if constexpr (!has_low_cardinality && !has_nullable_keys && sizeof(Key) <= 16)
+        {
+            /** The task is to "pack" multiple fixed-size fields into single larger Key.
+              * Example: pack UInt8, UInt32, UInt16, UInt64 into UInt128 key:
+              * [- ---- -- -------- -] - the resulting uint128 key
+              *  ^  ^   ^   ^       ^
+              *  u8 u32 u16 u64    zero
+              *
+              * We can do it with the help of SSSE3 shuffle instruction.
+              *
+              * There will be a mask for every GROUP BY element (keys_size masks in total).
+              * Every mask has 16 bytes but only sizeof(Key) bytes are used (other we don't care).
+              *
+              * Every byte in the mask has the following meaning:
+              * - if it is 0..15, take the element at this index from source register and place here in the result;
+              * - if it is 0xFF - set the elemend in the result to zero.
+              *
+              * Example:
+              * We want to copy UInt32 to offset 1 in the destination and set other bytes in the destination as zero.
+              * The corresponding mask will be: FF, 0, 1, 2, 3, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF
+              *
+              * The max size of destination is 16 bytes, because we cannot process more with SSSE3.
+              *
+              * The method is disabled under MSan, because it's allowed
+              * to load into SSE register and process up to 15 bytes of uninitialized memory in columns padding.
+              * We don't use this uninitialized memory but MSan cannot look "into" the shuffle instruction.
+              *
+              * 16-bytes masks can be placed overlapping, only first sizeof(Key) bytes are relevant in each mask.
+              * We initialize them to 0xFF and then set the needed elements.
+              */
+            size_t total_masks_size = sizeof(Key) * keys_size + (16 - sizeof(Key));
+            masks.reset(new uint8_t[total_masks_size]);
+            memset(masks.get(), 0xFF, total_masks_size);
+
+            size_t offset = 0;
+            for (size_t i = 0; i < keys_size; ++i)
+            {
+                for (size_t j = 0; j < key_sizes[i]; ++j)
+                {
+                    masks[i * sizeof(Key) + offset] = j;
+                    ++offset;
+                }
+            }
+
+            columns_data.reset(new const char*[keys_size]);
+
+            for (size_t i = 0; i < keys_size; ++i)
+                columns_data[i] = Base::getActualColumns()[i]->getRawData().data;
+        }
+#endif
     }
 
     ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
@@ -505,6 +571,10 @@ struct HashMethodKeysFixed
                 return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes,
                                             &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes);
 
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
+            if constexpr (!has_low_cardinality && !has_nullable_keys && sizeof(Key) <= 16)
+                return packFixedShuffle<Key>(columns_data.get(), keys_size, key_sizes.data(), row, masks.get());
+#endif
             return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes);
         }
     }
@@ -540,13 +610,13 @@ protected:
 };
 
 /// For the case when there is one string key.
-template <typename Value, typename Mapped, bool use_cache = true>
+template <typename Value, typename Mapped, bool use_cache = true, bool need_offset = false>
 struct HashMethodHashed
-    : public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache>, Value, Mapped, use_cache>
+    : public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
 {
     using Key = UInt128;
-    using Self = HashMethodHashed<Value, Mapped, use_cache>;
-    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Self = HashMethodHashed<Value, Mapped, use_cache, need_offset>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
 
     ColumnRawPtrs key_columns;
 
diff --git a/src/Common/ColumnsHashingImpl.h b/src/Common/ColumnsHashingImpl.h
index cdd8dd20dd0..9af746a69ad 100644
--- a/src/Common/ColumnsHashingImpl.h
+++ b/src/Common/ColumnsHashingImpl.h
@@ -87,34 +87,61 @@ public:
     bool isInserted() const { return inserted; }
 };
 
-template <typename Mapped>
-class FindResultImpl
+/// FindResult optionally may contain pointer to value and offset in hashtable buffer.
+/// Only bool found is required.
+/// So we will have 4 different specializations for FindResultImpl
+class FindResultImplBase
 {
-    Mapped * value;
     bool found;
 
 public:
-    FindResultImpl(Mapped * value_, bool found_) : value(value_), found(found_) {}
+    explicit FindResultImplBase(bool found_) : found(found_) {}
     bool isFound() const { return found; }
-    Mapped & getMapped() const { return *value; }
+};
+
+template <bool need_offset = false>
+class FindResultImplOffsetBase
+{
+public:
+    constexpr static bool has_offset = need_offset;
+    explicit FindResultImplOffsetBase(size_t /* off */) {}
 };
 
 template <>
-class FindResultImpl<void>
+class FindResultImplOffsetBase<true>
 {
-    bool found;
-
+    size_t offset;
 public:
-    explicit FindResultImpl(bool found_) : found(found_) {}
-    bool isFound() const { return found; }
+    constexpr static bool has_offset = true;
+
+    explicit FindResultImplOffsetBase(size_t off) : offset(off) {}
+    ALWAYS_INLINE size_t getOffset() const { return offset; }
 };
 
-template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization>
+template <typename Mapped, bool need_offset = false>
+class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBase<need_offset>
+{
+    Mapped * value;
+
+public:
+    FindResultImpl(Mapped * value_, bool found_, size_t off)
+        : FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off), value(value_) {}
+    Mapped & getMapped() const { return *value; }
+};
+
+template <bool need_offset>
+class FindResultImpl<void, need_offset> : public FindResultImplBase, public FindResultImplOffsetBase<need_offset>
+{
+public:
+    FindResultImpl(bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off) {}
+};
+
+template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization, bool need_offset = false>
 class HashMethodBase
 {
 public:
     using EmplaceResult = EmplaceResultImpl<Mapped>;
-    using FindResult = FindResultImpl<Mapped>;
+    using FindResult = FindResultImpl<Mapped, need_offset>;
     static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
     using Cache = LastElementCache<Value, consecutive_keys_optimization>;
 
@@ -217,12 +244,15 @@ protected:
     {
         if constexpr (Cache::consecutive_keys_optimization)
         {
+            /// It's possible to support such combination, but code will became more complex.
+            /// Now there's not place where we need this options enabled together
+            static_assert(!FindResult::has_offset, "`consecutive_keys_optimization` and `has_offset` are conflicting options");
             if (cache.check(key))
             {
                 if constexpr (has_mapped)
-                    return FindResult(&cache.value.second, cache.found);
+                    return FindResult(&cache.value.second, cache.found, 0);
                 else
-                    return FindResult(cache.found);
+                    return FindResult(cache.found, 0);
             }
         }
 
@@ -247,10 +277,15 @@ protected:
             }
         }
 
+        size_t offset = 0;
+        if constexpr (FindResult::has_offset)
+        {
+            offset = it ? data.offsetInternal(it) : 0;
+        }
         if constexpr (has_mapped)
-            return FindResult(it ? &it->getMapped() : nullptr, it != nullptr);
+            return FindResult(it ? &it->getMapped() : nullptr, it != nullptr, offset);
         else
-            return FindResult(it != nullptr);
+            return FindResult(it != nullptr, offset);
     }
 };
 
diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h
index 876cbd8a66b..7ab57ea7fab 100644
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@@ -63,9 +63,6 @@ public:
     /// Call from master thread as soon as possible (e.g. when thread accepted connection)
     static void initializeQuery();
 
-    /// Sets query_context for current thread group
-    static void attachQueryContext(Context & query_context);
-
     /// You must call one of these methods when create a query child thread:
     /// Add current thread to a group associated with the thread group
     static void attachTo(const ThreadGroupStatusPtr & thread_group);
@@ -99,6 +96,10 @@ public:
 
 private:
     static void defaultThreadDeleter();
+
+    /// Sets query_context for current thread group
+    /// Can by used only through QueryScope
+    static void attachQueryContext(Context & query_context);
 };
 
 }
diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp
index 7a697a2c9ef..d0b3244dac2 100644
--- a/src/Common/Dwarf.cpp
+++ b/src/Common/Dwarf.cpp
@@ -19,8 +19,6 @@
 /** This file was edited for ClickHouse.
   */
 
-#include <optional>
-
 #include <string.h>
 
 #include <Common/Elf.h>
@@ -43,6 +41,7 @@
 #define DW_FORM_ref4 0x13
 #define DW_FORM_data8 0x07
 #define DW_FORM_ref8 0x14
+#define DW_FORM_ref_sig8 0x20
 #define DW_FORM_sdata 0x0d
 #define DW_FORM_udata 0x0f
 #define DW_FORM_ref_udata 0x15
@@ -54,9 +53,24 @@
 #define DW_FORM_strp 0x0e
 #define DW_FORM_indirect 0x16
 #define DW_TAG_compile_unit 0x11
+#define DW_TAG_subprogram 0x2e
+#define DW_TAG_try_block 0x32
+#define DW_TAG_catch_block 0x25
+#define DW_TAG_entry_point 0x03
+#define DW_TAG_common_block 0x1a
+#define DW_TAG_lexical_block 0x0b
 #define DW_AT_stmt_list 0x10
 #define DW_AT_comp_dir 0x1b
 #define DW_AT_name 0x03
+#define DW_AT_high_pc 0x12
+#define DW_AT_low_pc 0x11
+#define DW_AT_entry_pc 0x52
+#define DW_AT_ranges 0x55
+#define DW_AT_abstract_origin 0x31
+#define DW_AT_call_line 0x59
+#define DW_AT_call_file 0x58
+#define DW_AT_linkage_name 0x6e
+#define DW_AT_specification 0x47
 #define DW_LNE_define_file 0x03
 #define DW_LNS_copy 0x01
 #define DW_LNS_advance_pc 0x02
@@ -84,7 +98,7 @@ namespace ErrorCodes
 }
 
 
-Dwarf::Dwarf(const Elf & elf) : elf_(&elf)
+Dwarf::Dwarf(const std::shared_ptr<Elf> & elf) : elf_(elf)
 {
     init();
 }
@@ -99,6 +113,10 @@ Dwarf::Section::Section(std::string_view d) : is64Bit_(false), data_(d)
 
 namespace
 {
+// Maximum number of DIEAbbreviation to cache in a compilation unit. Used to
+// speed up inline function lookup.
+const uint32_t kMaxAbbreviationEntries = 1000;
+
 // All following read* functions read from a std::string_view, advancing the
 // std::string_view, and aborting if there's not enough room.
 
@@ -158,7 +176,7 @@ uint64_t readOffset(std::string_view & sp, bool is64Bit)
 // Read "len" bytes
 std::string_view readBytes(std::string_view & sp, uint64_t len)
 {
-    SAFE_CHECK(len >= sp.size(), "invalid string length");
+    SAFE_CHECK(len <= sp.size(), "invalid string length: " + std::to_string(len) + " vs. " + std::to_string(sp.size()));
     std::string_view ret(sp.data(), len);
     sp.remove_prefix(len);
     return ret;
@@ -364,15 +382,18 @@ void Dwarf::init()
         || !getSection(".debug_line", &line_)
         || !getSection(".debug_str", &strings_))
     {
-        elf_ = nullptr;
+        elf_.reset();
         return;
     }
 
     // Optional: fast address range lookup. If missing .debug_info can
     // be used - but it's much slower (linear scan).
     getSection(".debug_aranges", &aranges_);
+
+    getSection(".debug_ranges", &ranges_);
 }
 
+// static
 bool Dwarf::readAbbreviation(std::string_view & section, DIEAbbreviation & abbr)
 {
     // abbreviation code
@@ -384,14 +405,14 @@ bool Dwarf::readAbbreviation(std::string_view & section, DIEAbbreviation & abbr)
     abbr.tag = readULEB(section);
 
     // does this entry have children?
-    abbr.hasChildren = (read<uint8_t>(section) != DW_CHILDREN_no);
+    abbr.has_children = (read<uint8_t>(section) != DW_CHILDREN_no);
 
     // attributes
     const char * attribute_begin = section.data();
     for (;;)
     {
         SAFE_CHECK(!section.empty(), "invalid attribute section");
-        auto attr = readAttribute(section);
+        auto attr = readAttributeSpec(section);
         if (attr.name == 0 && attr.form == 0)
             break;
     }
@@ -400,11 +421,161 @@ bool Dwarf::readAbbreviation(std::string_view & section, DIEAbbreviation & abbr)
     return true;
 }
 
-Dwarf::DIEAbbreviation::Attribute Dwarf::readAttribute(std::string_view & sp)
+// static
+void Dwarf::readCompilationUnitAbbrs(std::string_view abbrev, CompilationUnit & cu)
+{
+    abbrev.remove_prefix(cu.abbrev_offset);
+
+    DIEAbbreviation abbr;
+    while (readAbbreviation(abbrev, abbr))
+    {
+        // Abbreviation code 0 is reserved for null debugging information entries.
+        if (abbr.code != 0 && abbr.code <= kMaxAbbreviationEntries)
+        {
+            cu.abbr_cache[abbr.code - 1] = abbr;
+        }
+    }
+}
+
+size_t Dwarf::forEachChild(const CompilationUnit & cu, const Die & die, std::function<bool(const Die & die)> f) const
+{
+    size_t next_die_offset = forEachAttribute(cu, die, [&](const Attribute &) { return true; });
+    if (!die.abbr.has_children)
+    {
+        return next_die_offset;
+    }
+
+    auto child_die = getDieAtOffset(cu, next_die_offset);
+    while (child_die.code != 0)
+    {
+        if (!f(child_die))
+        {
+            return child_die.offset;
+        }
+
+        // NOTE: Don't run `f` over grandchildren, just skip over them.
+        size_t sibling_offset = forEachChild(cu, child_die, [](const Die &) { return true; });
+        child_die = getDieAtOffset(cu, sibling_offset);
+    }
+
+    // childDie is now a dummy die whose offset is to the code 0 marking the
+    // end of the children. Need to add one to get the offset of the next die.
+    return child_die.offset + 1;
+}
+
+/*
+ * Iterate over all attributes of the given DIE, calling the given callable
+ * for each. Iteration is stopped early if any of the calls return false.
+ */
+size_t Dwarf::forEachAttribute(const CompilationUnit & cu, const Die & die, std::function<bool(const Attribute & die)> f) const
+{
+    auto attrs = die.abbr.attributes;
+    auto values = std::string_view{info_.data() + die.offset + die.attr_offset, cu.offset + cu.size - die.offset - die.attr_offset};
+    while (auto spec = readAttributeSpec(attrs))
+    {
+        auto attr = readAttribute(die, spec, values);
+        if (!f(attr))
+        {
+            return static_cast<size_t>(-1);
+        }
+    }
+    return values.data() - info_.data();
+}
+
+Dwarf::Attribute Dwarf::readAttribute(const Die & die, AttributeSpec spec, std::string_view & info) const
+{
+    switch (spec.form)
+    {
+        case DW_FORM_addr:
+            return {spec, die, read<uintptr_t>(info)};
+        case DW_FORM_block1:
+            return {spec, die, readBytes(info, read<uint8_t>(info))};
+        case DW_FORM_block2:
+            return {spec, die, readBytes(info, read<uint16_t>(info))};
+        case DW_FORM_block4:
+            return {spec, die, readBytes(info, read<uint32_t>(info))};
+        case DW_FORM_block:
+            [[fallthrough]];
+        case DW_FORM_exprloc:
+            return {spec, die, readBytes(info, readULEB(info))};
+        case DW_FORM_data1:
+            [[fallthrough]];
+        case DW_FORM_ref1:
+            return {spec, die, read<uint8_t>(info)};
+        case DW_FORM_data2:
+            [[fallthrough]];
+        case DW_FORM_ref2:
+            return {spec, die, read<uint16_t>(info)};
+        case DW_FORM_data4:
+            [[fallthrough]];
+        case DW_FORM_ref4:
+            return {spec, die, read<uint32_t>(info)};
+        case DW_FORM_data8:
+            [[fallthrough]];
+        case DW_FORM_ref8:
+            [[fallthrough]];
+        case DW_FORM_ref_sig8:
+            return {spec, die, read<uint64_t>(info)};
+        case DW_FORM_sdata:
+            return {spec, die, uint64_t(readSLEB(info))};
+        case DW_FORM_udata:
+            [[fallthrough]];
+        case DW_FORM_ref_udata:
+            return {spec, die, readULEB(info)};
+        case DW_FORM_flag:
+            return {spec, die, read<uint8_t>(info)};
+        case DW_FORM_flag_present:
+            return {spec, die, 1u};
+        case DW_FORM_sec_offset:
+            [[fallthrough]];
+        case DW_FORM_ref_addr:
+            return {spec, die, readOffset(info, die.is64Bit)};
+        case DW_FORM_string:
+            return {spec, die, readNullTerminated(info)};
+        case DW_FORM_strp:
+            return {spec, die, getStringFromStringSection(readOffset(info, die.is64Bit))};
+        case DW_FORM_indirect: // form is explicitly specified
+            // Update spec with the actual FORM.
+            spec.form = readULEB(info);
+            return readAttribute(die, spec, info);
+        default:
+            SAFE_CHECK(false, "invalid attribute form");
+    }
+
+    return {spec, die, 0u};
+}
+
+// static
+Dwarf::AttributeSpec Dwarf::readAttributeSpec(std::string_view & sp)
 {
     return {readULEB(sp), readULEB(sp)};
 }
 
+// static
+Dwarf::CompilationUnit Dwarf::getCompilationUnit(std::string_view info, uint64_t offset)
+{
+    SAFE_CHECK(offset < info.size(), "unexpected offset");
+    CompilationUnit cu;
+    std::string_view chunk(info);
+    cu.offset = offset;
+    chunk.remove_prefix(offset);
+
+    auto initial_length = read<uint32_t>(chunk);
+    cu.is64Bit = (initial_length == uint32_t(-1));
+    cu.size = cu.is64Bit ? read<uint64_t>(chunk) : initial_length;
+    SAFE_CHECK(cu.size <= chunk.size(), "invalid chunk size");
+    cu.size += cu.is64Bit ? 12 : 4;
+
+    cu.version = read<uint16_t>(chunk);
+    SAFE_CHECK(cu.version >= 2 && cu.version <= 4, "invalid info version");
+    cu.abbrev_offset = readOffset(chunk, cu.is64Bit);
+    cu.addr_size = read<uint8_t>(chunk);
+    SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size");
+
+    cu.first_die = chunk.data() - info.data();
+    return cu;
+}
+
 Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) const
 {
     // Linear search in the .debug_abbrev section, starting at offset
@@ -516,104 +687,411 @@ bool Dwarf::findDebugInfoOffset(uintptr_t address, std::string_view aranges, uin
     return false;
 }
 
+Dwarf::Die Dwarf::getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const
+{
+    SAFE_CHECK(offset < info_.size(), "unexpected offset");
+    Die die;
+    std::string_view sp{info_.data() + offset, cu.offset + cu.size - offset};
+    die.offset = offset;
+    die.is64Bit = cu.is64Bit;
+    auto code = readULEB(sp);
+    die.code = code;
+    if (code == 0)
+    {
+        return die;
+    }
+    die.attr_offset = sp.data() - info_.data() - offset;
+    die.abbr = !cu.abbr_cache.empty() && die.code < kMaxAbbreviationEntries ? cu.abbr_cache[die.code - 1]
+                                                                            : getAbbreviation(die.code, cu.abbrev_offset);
+
+    return die;
+}
+
+Dwarf::Die Dwarf::findDefinitionDie(const CompilationUnit & cu, const Die & die) const
+{
+    // Find the real definition instead of declaration.
+    // DW_AT_specification: Incomplete, non-defining, or separate declaration
+    // corresponding to a declaration
+    auto offset = getAttribute<uint64_t>(cu, die, DW_AT_specification);
+    if (!offset)
+    {
+        return die;
+    }
+    return getDieAtOffset(cu, cu.offset + offset.value());
+}
+
 /**
  * Find the @locationInfo for @address in the compilation unit represented
  * by the @sp .debug_info entry.
  * Returns whether the address was found.
  * Advances @sp to the next entry in .debug_info.
  */
-bool Dwarf::findLocation(uintptr_t address, std::string_view & infoEntry, LocationInfo & locationInfo) const
+bool Dwarf::findLocation(
+    uintptr_t address,
+    const LocationInfoMode mode,
+    CompilationUnit & cu,
+    LocationInfo & info,
+    std::vector<SymbolizedFrame> & inline_frames) const
 {
-    // For each compilation unit compiled with a DWARF producer, a
-    // contribution is made to the .debug_info section of the object
-    // file. Each such contribution consists of a compilation unit
-    // header (see Section 7.5.1.1) followed by a single
-    // DW_TAG_compile_unit or DW_TAG_partial_unit debugging information
-    // entry, together with its children.
-
-    // 7.5.1.1 Compilation Unit Header
-    //  1. unit_length (4B or 12B): read by Section::next
-    //  2. version (2B)
-    //  3. debug_abbrev_offset (4B or 8B): offset into the .debug_abbrev section
-    //  4. address_size (1B)
-
-    Section debug_info_section(infoEntry);
-    std::string_view chunk;
-    SAFE_CHECK(debug_info_section.next(chunk), "invalid debug info");
-
-    auto version = read<uint16_t>(chunk);
-    SAFE_CHECK(version >= 2 && version <= 4, "invalid info version");
-    uint64_t abbrev_offset = readOffset(chunk, debug_info_section.is64Bit());
-    auto address_size = read<uint8_t>(chunk);
-    SAFE_CHECK(address_size == sizeof(uintptr_t), "invalid address size");
-
-    // We survived so far. The first (and only) DIE should be DW_TAG_compile_unit
-    // NOTE: - binutils <= 2.25 does not issue DW_TAG_partial_unit.
-    //       - dwarf compression tools like `dwz` may generate it.
-    // TODO(tudorb): Handle DW_TAG_partial_unit?
-    auto code = readULEB(chunk);
-    SAFE_CHECK(code != 0, "invalid code");
-    auto abbr = getAbbreviation(code, abbrev_offset);
-    SAFE_CHECK(abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry");
-    // Skip children entries, remove_prefix to the next compilation unit entry.
-    infoEntry.remove_prefix(chunk.end() - infoEntry.begin());
+    Die die = getDieAtOffset(cu, cu.first_die);
+    // Partial compilation unit (DW_TAG_partial_unit) is not supported.
+    SAFE_CHECK(die.abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry");
 
     // Read attributes, extracting the few we care about
-    bool found_line_offset = false;
-    uint64_t line_offset = 0;
+    std::optional<uint64_t> line_offset = 0;
     std::string_view compilation_directory;
-    std::string_view main_file_name;
+    std::optional<std::string_view> main_file_name;
+    std::optional<uint64_t> base_addr_cu;
 
-    DIEAbbreviation::Attribute attr;
-    std::string_view attributes = abbr.attributes;
-    for (;;)
+    forEachAttribute(cu, die, [&](const Attribute & attr)
     {
-        attr = readAttribute(attributes);
-        if (attr.name == 0 && attr.form == 0)
-        {
-            break;
-        }
-        auto val = readAttributeValue(chunk, attr.form, debug_info_section.is64Bit());
-        switch (attr.name)
+        switch (attr.spec.name)
         {
             case DW_AT_stmt_list:
                 // Offset in .debug_line for the line number VM program for this
                 // compilation unit
-                line_offset = std::get<uint64_t>(val);
-                found_line_offset = true;
+                line_offset = std::get<uint64_t>(attr.attr_value);
                 break;
             case DW_AT_comp_dir:
                 // Compilation directory
-                compilation_directory = std::get<std::string_view>(val);
+                compilation_directory = std::get<std::string_view>(attr.attr_value);
                 break;
             case DW_AT_name:
                 // File name of main file being compiled
-                main_file_name = std::get<std::string_view>(val);
+                main_file_name = std::get<std::string_view>(attr.attr_value);
+                break;
+            case DW_AT_low_pc:
+            case DW_AT_entry_pc:
+                // 2.17.1: historically DW_AT_low_pc was used. DW_AT_entry_pc was
+                // introduced in DWARF3. Support either to determine the base address of
+                // the CU.
+                base_addr_cu = std::get<uint64_t>(attr.attr_value);
                 break;
         }
-    }
+        // Iterate through all attributes until find all above.
+        return true;
+    });
 
-    if (!main_file_name.empty())
+    if (main_file_name)
     {
-        locationInfo.hasMainFile = true;
-        locationInfo.mainFile = Path(compilation_directory, "", main_file_name);
+        info.has_main_file = true;
+        info.main_file = Path(compilation_directory, "", *main_file_name);
     }
 
-    if (!found_line_offset)
+    if (!line_offset)
     {
         return false;
     }
 
     std::string_view line_section(line_);
-    line_section.remove_prefix(line_offset);
+    line_section.remove_prefix(*line_offset);
     LineNumberVM line_vm(line_section, compilation_directory);
 
     // Execute line number VM program to find file and line
-    locationInfo.hasFileAndLine = line_vm.findAddress(address, locationInfo.file, locationInfo.line);
-    return locationInfo.hasFileAndLine;
+    info.has_file_and_line = line_vm.findAddress(address, info.file, info.line);
+
+    bool check_inline = (mode == LocationInfoMode::FULL_WITH_INLINE);
+
+    if (info.has_file_and_line && check_inline)
+    {
+        // Re-get the compilation unit with abbreviation cached.
+        cu.abbr_cache.clear();
+        cu.abbr_cache.resize(kMaxAbbreviationEntries);
+        readCompilationUnitAbbrs(abbrev_, cu);
+
+        // Find the subprogram that matches the given address.
+        Die subprogram;
+        findSubProgramDieForAddress(cu, die, address, base_addr_cu, subprogram);
+
+        // Subprogram is the DIE of caller function.
+        if (check_inline && subprogram.abbr.has_children)
+        {
+            // Use an extra location and get its call file and call line, so that
+            // they can be used for the second last location when we don't have
+            // enough inline frames for all inline functions call stack.
+            const size_t max_size = Dwarf::kMaxInlineLocationInfoPerFrame + 1;
+            std::vector<CallLocation> call_locations;
+            call_locations.reserve(Dwarf::kMaxInlineLocationInfoPerFrame + 1);
+
+            findInlinedSubroutineDieForAddress(cu, subprogram, line_vm, address, base_addr_cu, call_locations, max_size);
+            size_t num_found = call_locations.size();
+
+            if (num_found > 0)
+            {
+                const auto inner_most_file = info.file;
+                const auto inner_most_line = info.line;
+
+                // Earlier we filled in locationInfo:
+                // - mainFile: the path to the CU -- the file where the non-inlined
+                //   call is made from.
+                // - file + line: the location of the inner-most inlined call.
+                // Here we already find inlined info so mainFile would be redundant.
+                info.has_main_file = false;
+                info.main_file = Path{};
+                // @findInlinedSubroutineDieForAddress fills inlineLocations[0] with the
+                // file+line of the non-inlined outer function making the call.
+                // locationInfo.name is already set by the caller by looking up the
+                // non-inlined function @address belongs to.
+                info.has_file_and_line = true;
+                info.file = call_locations[0].file;
+                info.line = call_locations[0].line;
+
+                // The next inlined subroutine's call file and call line is the current
+                // caller's location.
+                for (size_t i = 0; i < num_found - 1; i++)
+                {
+                    call_locations[i].file = call_locations[i + 1].file;
+                    call_locations[i].line = call_locations[i + 1].line;
+                }
+                // CallLocation for the inner-most inlined function:
+                // - will be computed if enough space was available in the passed
+                //   buffer.
+                // - will have a .name, but no !.file && !.line
+                // - its corresponding file+line is the one returned by LineVM based
+                //   on @address.
+                // Use the inner-most inlined file+line info we got from the LineVM.
+                call_locations[num_found - 1].file = inner_most_file;
+                call_locations[num_found - 1].line = inner_most_line;
+
+                // Fill in inline frames in reverse order (as expected by the caller).
+                std::reverse(call_locations.begin(), call_locations.end());
+                for (const auto & call_location : call_locations)
+                {
+                    SymbolizedFrame inline_frame;
+                    inline_frame.found = true;
+                    inline_frame.addr = address;
+                    inline_frame.name = call_location.name.data();
+                    inline_frame.location.has_file_and_line = true;
+                    inline_frame.location.file = call_location.file;
+                    inline_frame.location.line = call_location.line;
+                    inline_frames.push_back(inline_frame);
+                }
+            }
+        }
+    }
+
+    return info.has_file_and_line;
 }
 
-bool Dwarf::findAddress(uintptr_t address, LocationInfo & locationInfo, LocationInfoMode mode) const
+void Dwarf::findSubProgramDieForAddress(
+    const CompilationUnit & cu, const Die & die, uint64_t address, std::optional<uint64_t> base_addr_cu, Die & subprogram) const
+{
+    forEachChild(cu, die, [&](const Die & child_die)
+    {
+        if (child_die.abbr.tag == DW_TAG_subprogram)
+        {
+            std::optional<uint64_t> low_pc;
+            std::optional<uint64_t> high_pc;
+            std::optional<bool> is_high_pc_addr;
+            std::optional<uint64_t> range_offset;
+            forEachAttribute(cu, child_die, [&](const Attribute & attr)
+            {
+                switch (attr.spec.name)
+                {
+                    case DW_AT_ranges:
+                        range_offset = std::get<uint64_t>(attr.attr_value);
+                        break;
+                    case DW_AT_low_pc:
+                        low_pc = std::get<uint64_t>(attr.attr_value);
+                        break;
+                    case DW_AT_high_pc:
+                        // Value of DW_AT_high_pc attribute can be an address
+                        // (DW_FORM_addr) or an offset (DW_FORM_data).
+                        is_high_pc_addr = (attr.spec.form == DW_FORM_addr);
+                        high_pc = std::get<uint64_t>(attr.attr_value);
+                        break;
+                }
+                // Iterate through all attributes until find all above.
+                return true;
+            });
+            bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc
+                && (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc));
+            bool range_match = range_offset && isAddrInRangeList(address, base_addr_cu, range_offset.value(), cu.addr_size);
+            if (pc_match || range_match)
+            {
+                subprogram = child_die;
+                return false;
+            }
+        }
+
+        findSubProgramDieForAddress(cu, child_die, address, base_addr_cu, subprogram);
+
+        // Iterates through children until find the inline subprogram.
+        return true;
+    });
+}
+
+/**
+ * Find DW_TAG_inlined_subroutine child DIEs that contain @address and
+ * then extract:
+ * - Where was it called from (DW_AT_call_file & DW_AT_call_line):
+ *   the statement or expression that caused the inline expansion.
+ * - The inlined function's name. As a function may be inlined multiple
+ *   times, common attributes like DW_AT_linkage_name or DW_AT_name
+ *   are only stored in its "concrete out-of-line instance" (a
+ *   DW_TAG_subprogram) which we find using DW_AT_abstract_origin.
+ */
+void Dwarf::findInlinedSubroutineDieForAddress(
+    const CompilationUnit & cu,
+    const Die & die,
+    const LineNumberVM & line_vm,
+    uint64_t address,
+    std::optional<uint64_t> base_addr_cu,
+    std::vector<CallLocation> & locations,
+    const size_t max_size) const
+{
+    if (locations.size() >= max_size)
+    {
+        return;
+    }
+
+    forEachChild(cu, die, [&](const Die & child_die)
+    {
+        // Between a DW_TAG_subprogram and and DW_TAG_inlined_subroutine we might
+        // have arbitrary intermediary "nodes", including DW_TAG_common_block,
+        // DW_TAG_lexical_block, DW_TAG_try_block, DW_TAG_catch_block and
+        // DW_TAG_with_stmt, etc.
+        // We can't filter with locationhere since its range may be not specified.
+        // See section 2.6.2: A location list containing only an end of list entry
+        // describes an object that exists in the source code but not in the
+        // executable program.
+        if (child_die.abbr.tag == DW_TAG_try_block || child_die.abbr.tag == DW_TAG_catch_block || child_die.abbr.tag == DW_TAG_entry_point
+            || child_die.abbr.tag == DW_TAG_common_block || child_die.abbr.tag == DW_TAG_lexical_block)
+        {
+            findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu, locations, max_size);
+            return true;
+        }
+
+        std::optional<uint64_t> low_pc;
+        std::optional<uint64_t> high_pc;
+        std::optional<bool> is_high_pc_addr;
+        std::optional<uint64_t> abstract_origin;
+        std::optional<uint64_t> abstract_origin_ref_type;
+        std::optional<uint64_t> call_file;
+        std::optional<uint64_t> call_line;
+        std::optional<uint64_t> range_offset;
+        forEachAttribute(cu, child_die, [&](const Attribute & attr)
+        {
+            switch (attr.spec.name)
+            {
+                case DW_AT_ranges:
+                    range_offset = std::get<uint64_t>(attr.attr_value);
+                    break;
+                case DW_AT_low_pc:
+                    low_pc = std::get<uint64_t>(attr.attr_value);
+                    break;
+                case DW_AT_high_pc:
+                    // Value of DW_AT_high_pc attribute can be an address
+                    // (DW_FORM_addr) or an offset (DW_FORM_data).
+                    is_high_pc_addr = (attr.spec.form == DW_FORM_addr);
+                    high_pc = std::get<uint64_t>(attr.attr_value);
+                    break;
+                case DW_AT_abstract_origin:
+                    abstract_origin_ref_type = attr.spec.form;
+                    abstract_origin = std::get<uint64_t>(attr.attr_value);
+                    break;
+                case DW_AT_call_line:
+                    call_line = std::get<uint64_t>(attr.attr_value);
+                    break;
+                case DW_AT_call_file:
+                    call_file = std::get<uint64_t>(attr.attr_value);
+                    break;
+            }
+            // Iterate through all until find all above attributes.
+            return true;
+        });
+
+        // 2.17 Code Addresses and Ranges
+        // Any debugging information entry describing an entity that has a
+        // machine code address or range of machine code addresses,
+        // which includes compilation units, module initialization, subroutines,
+        // ordinary blocks, try/catch blocks, labels and the like, may have
+        //  - A DW_AT_low_pc attribute for a single address,
+        //  - A DW_AT_low_pc and DW_AT_high_pc pair of attributes for a
+        //    single contiguous range of addresses, or
+        //  - A DW_AT_ranges attribute for a non-contiguous range of addresses.
+        // TODO: Support DW_TAG_entry_point and DW_TAG_common_block that don't
+        // have DW_AT_low_pc/DW_AT_high_pc pairs and DW_AT_ranges.
+        // TODO: Support relocated address which requires lookup in relocation map.
+        bool pc_match
+            = low_pc && high_pc && is_high_pc_addr && address >= *low_pc && (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc));
+        bool range_match = range_offset && isAddrInRangeList(address, base_addr_cu, range_offset.value(), cu.addr_size);
+        if (!pc_match && !range_match)
+        {
+            // Address doesn't match. Keep searching other children.
+            return true;
+        }
+
+        if (!abstract_origin || !abstract_origin_ref_type || !call_line || !call_file)
+        {
+            // We expect a single sibling DIE to match on addr, but it's missing
+            // required fields. Stop searching for other DIEs.
+            return false;
+        }
+
+        CallLocation location;
+        location.file = line_vm.getFullFileName(*call_file);
+        location.line = *call_line;
+
+        auto get_function_name = [&](const CompilationUnit & srcu, uint64_t die_offset)
+        {
+            auto decl_die = getDieAtOffset(srcu, die_offset);
+            // Jump to the actual function definition instead of declaration for name
+            // and line info.
+            auto def_die = findDefinitionDie(srcu, decl_die);
+
+            std::string_view name;
+            // The file and line will be set in the next inline subroutine based on
+            // its DW_AT_call_file and DW_AT_call_line.
+            forEachAttribute(srcu, def_die, [&](const Attribute & attr)
+            {
+                switch (attr.spec.name)
+                {
+                    case DW_AT_linkage_name:
+                        name = std::get<std::string_view>(attr.attr_value);
+                        break;
+                    case DW_AT_name:
+                        // NOTE: when DW_AT_linkage_name and DW_AT_name match, dwarf
+                        // emitters omit DW_AT_linkage_name (to save space). If present
+                        // DW_AT_linkage_name should always be preferred (mangled C++ name
+                        // vs just the function name).
+                        if (name.empty())
+                        {
+                            name = std::get<std::string_view>(attr.attr_value);
+                        }
+                        break;
+                }
+                return true;
+            });
+            return name;
+        };
+
+        // DW_AT_abstract_origin is a reference. There a 3 types of references:
+        // - the reference can identify any debugging information entry within the
+        //   compilation unit (DW_FORM_ref1, DW_FORM_ref2, DW_FORM_ref4,
+        //   DW_FORM_ref8, DW_FORM_ref_udata). This type of reference is an offset
+        //   from the first byte of the compilation header for the compilation unit
+        //   containing the reference.
+        // - the reference can identify any debugging information entry within a
+        //   .debug_info section; in particular, it may refer to an entry in a
+        //   different compilation unit (DW_FORM_ref_addr)
+        // - the reference can identify any debugging information type entry that
+        //   has been placed in its own type unit.
+        //   Not applicable for DW_AT_abstract_origin.
+        location.name = (*abstract_origin_ref_type != DW_FORM_ref_addr)
+            ? get_function_name(cu, cu.offset + *abstract_origin)
+            : get_function_name(findCompilationUnit(info_, *abstract_origin), *abstract_origin);
+
+        locations.push_back(location);
+
+        findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu, locations, max_size);
+
+        return false;
+    });
+}
+
+bool Dwarf::findAddress(
+    uintptr_t address, LocationInfo & locationInfo, LocationInfoMode mode, std::vector<SymbolizedFrame> & inline_frames) const
 {
     locationInfo = LocationInfo();
 
@@ -635,10 +1113,9 @@ bool Dwarf::findAddress(uintptr_t address, LocationInfo & locationInfo, Location
         if (findDebugInfoOffset(address, aranges_, offset))
         {
             // Read compilation unit header from .debug_info
-            std::string_view info_entry(info_);
-            info_entry.remove_prefix(offset);
-            findLocation(address, info_entry, locationInfo);
-            return locationInfo.hasFileAndLine;
+            auto unit = getCompilationUnit(info_, offset);
+            findLocation(address, mode, unit, locationInfo, inline_frames);
+            return locationInfo.has_file_and_line;
         }
         else if (mode == LocationInfoMode::FAST)
         {
@@ -650,20 +1127,92 @@ bool Dwarf::findAddress(uintptr_t address, LocationInfo & locationInfo, Location
         }
         else
         {
-            SAFE_CHECK(mode == LocationInfoMode::FULL, "unexpected mode");
+            SAFE_CHECK(mode == LocationInfoMode::FULL || mode == LocationInfoMode::FULL_WITH_INLINE, "unexpected mode");
             // Fall back to the linear scan.
         }
     }
 
     // Slow path (linear scan): Iterate over all .debug_info entries
     // and look for the address in each compilation unit.
-    std::string_view info_entry(info_);
-    while (!info_entry.empty() && !locationInfo.hasFileAndLine)
-        findLocation(address, info_entry, locationInfo);
+    uint64_t offset = 0;
+    while (offset < info_.size() && !locationInfo.has_file_and_line)
+    {
+        auto unit = getCompilationUnit(info_, offset);
+        offset += unit.size;
+        findLocation(address, mode, unit, locationInfo, inline_frames);
+    }
 
-    return locationInfo.hasFileAndLine;
+    return locationInfo.has_file_and_line;
 }
 
+bool Dwarf::isAddrInRangeList(uint64_t address, std::optional<uint64_t> base_addr, size_t offset, uint8_t addr_size) const
+{
+    SAFE_CHECK(addr_size == 4 || addr_size == 8, "wrong address size");
+    if (ranges_.empty())
+    {
+        return false;
+    }
+
+    const bool is_64bit_addr = addr_size == 8;
+    std::string_view sp = ranges_;
+    sp.remove_prefix(offset);
+    const uint64_t max_addr = is_64bit_addr ? std::numeric_limits<uint64_t>::max() : std::numeric_limits<uint32_t>::max();
+    while (!sp.empty())
+    {
+        uint64_t begin = readOffset(sp, is_64bit_addr);
+        uint64_t end = readOffset(sp, is_64bit_addr);
+        // The range list entry is a base address selection entry.
+        if (begin == max_addr)
+        {
+            base_addr = end;
+            continue;
+        }
+        // The range list entry is an end of list entry.
+        if (begin == 0 && end == 0)
+        {
+            break;
+        }
+        // Check if the given address falls in the range list entry.
+        // 2.17.3 Non-Contiguous Address Ranges
+        // The applicable base address of a range list entry is determined by the
+        // closest preceding base address selection entry (see below) in the same
+        // range list. If there is no such selection entry, then the applicable base
+        // address defaults to the base address of the compilation unit.
+        if (base_addr && address >= begin + *base_addr && address < end + *base_addr)
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+// static
+Dwarf::CompilationUnit Dwarf::findCompilationUnit(std::string_view info, uint64_t targetOffset)
+{
+    SAFE_CHECK(targetOffset < info.size(), "unexpected target address");
+    uint64_t offset = 0;
+    while (offset < info.size())
+    {
+        std::string_view chunk(info);
+        chunk.remove_prefix(offset);
+
+        auto initial_length = read<uint32_t>(chunk);
+        auto is_64bit = (initial_length == uint32_t(-1));
+        auto size = is_64bit ? read<uint64_t>(chunk) : initial_length;
+        SAFE_CHECK(size <= chunk.size(), "invalid chunk size");
+        size += is_64bit ? 12 : 4;
+
+        if (offset + size > targetOffset)
+        {
+            break;
+        }
+        offset += size;
+    }
+    return getCompilationUnit(info, offset);
+}
+
+
 Dwarf::LineNumberVM::LineNumberVM(std::string_view data, std::string_view compilationDirectory)
     : compilationDirectory_(compilationDirectory)
 {
diff --git a/src/Common/Dwarf.h b/src/Common/Dwarf.h
index 40badc1c5a4..9ea940c3380 100644
--- a/src/Common/Dwarf.h
+++ b/src/Common/Dwarf.h
@@ -21,9 +21,13 @@
 /** This file was edited for ClickHouse.
   */
 
+#include <functional>
+#include <memory>
+#include <optional>
 #include <string>
 #include <string_view>
 #include <variant>
+#include <vector>
 
 
 namespace DB
@@ -61,7 +65,13 @@ class Dwarf final
     // be live for as long as the passed-in Elf is live.
 public:
     /** Create a DWARF parser around an ELF file. */
-    explicit Dwarf(const Elf & elf);
+    explicit Dwarf(const std::shared_ptr<Elf> & elf);
+
+    /**
+     * More than one location info may exist if current frame is an inline
+     * function call.
+     */
+    static constexpr uint32_t kMaxInlineLocationInfoPerFrame = 10;
 
     /**
       * Represent a file path a s collection of three parts (base directory,
@@ -70,7 +80,7 @@ public:
     class Path
     {
     public:
-        Path() {}
+        Path() = default;
 
         Path(std::string_view baseDir, std::string_view subDir, std::string_view file);
 
@@ -107,6 +117,14 @@ public:
         std::string_view file_;
     };
 
+    // Indicates inline function `name` is called  at `line@file`.
+    struct CallLocation
+    {
+        Path file = {};
+        uint64_t line;
+        std::string_view name;
+    };
+
     enum class LocationInfoMode
     {
         // Don't resolve location info.
@@ -115,30 +133,47 @@ public:
         FAST,
         // Scan all CU in .debug_info (slow!) on .debug_aranges lookup failure.
         FULL,
+        // Scan .debug_info (super slower, use with caution) for inline functions in
+        // addition to FULL.
+        FULL_WITH_INLINE,
     };
 
     struct LocationInfo
     {
-        bool hasMainFile = false;
-        Path mainFile;
+        bool has_main_file = false;
+        Path main_file;
 
-        bool hasFileAndLine = false;
+        bool has_file_and_line = false;
         Path file;
         uint64_t line = 0;
     };
 
+    /**
+     * Frame information: symbol name and location.
+     */
+    struct SymbolizedFrame
+    {
+        bool found = false;
+        uintptr_t addr = 0;
+        // Mangled symbol name. Use `folly::demangle()` to demangle it.
+        const char * name = nullptr;
+        LocationInfo location;
+        std::shared_ptr<const Elf> file;
+
+        void clear() { *this = SymbolizedFrame(); }
+    };
+
     /** Find the file and line number information corresponding to address.
       * The address must be physical - offset in object file without offset in virtual memory where the object is loaded.
       */
-    bool findAddress(uintptr_t address, LocationInfo & info, LocationInfoMode mode) const;
+    bool findAddress(uintptr_t address, LocationInfo & info, LocationInfoMode mode, std::vector<SymbolizedFrame> & inline_frames) const;
 
 private:
     static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset);
 
     void init();
-    bool findLocation(uintptr_t address, std::string_view & infoEntry, LocationInfo & info) const;
 
-    const Elf * elf_;
+    std::shared_ptr<const Elf> elf_;
 
     // DWARF section made up of chunks, each prefixed with a length header.
     // The length indicates whether the chunk is DWARF-32 or DWARF-64, which
@@ -169,17 +204,81 @@ private:
     {
         uint64_t code;
         uint64_t tag;
-        bool hasChildren;
-
-        struct Attribute
-        {
-            uint64_t name;
-            uint64_t form;
-        };
+        bool has_children = false;
 
         std::string_view attributes;
     };
 
+    // Debugging information entry to define a low-level representation of a
+    // source program. Each debugging information entry consists of an identifying
+    // tag and a series of attributes. An entry, or group of entries together,
+    // provide a description of a corresponding entity in the source program.
+    struct Die
+    {
+        bool is64Bit;
+        // Offset from start to first attribute
+        uint8_t attr_offset;
+        // Offset within debug info.
+        uint32_t offset;
+        uint64_t code;
+        DIEAbbreviation abbr;
+    };
+
+    struct AttributeSpec
+    {
+        uint64_t name = 0;
+        uint64_t form = 0;
+
+        explicit operator bool() const { return name != 0 || form != 0; }
+    };
+
+    struct Attribute
+    {
+        AttributeSpec spec;
+        const Die & die;
+        std::variant<uint64_t, std::string_view> attr_value;
+    };
+
+    struct CompilationUnit
+    {
+        bool is64Bit;
+        uint8_t version;
+        uint8_t addr_size;
+        // Offset in .debug_info of this compilation unit.
+        uint32_t offset;
+        uint32_t size;
+        // Offset in .debug_info for the first DIE in this compilation unit.
+        uint32_t first_die;
+        uint64_t abbrev_offset;
+        // Only the CompilationUnit that contains the caller functions needs this cache.
+        // Indexed by (abbr.code - 1) if (abbr.code - 1) < abbrCache.size();
+        std::vector<DIEAbbreviation> abbr_cache;
+    };
+
+    static CompilationUnit getCompilationUnit(std::string_view info, uint64_t offset);
+
+    /** cu must exist during the life cycle of created detail::Die. */
+    Die getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const;
+
+    /**
+     * Find the actual definition DIE instead of declaration for the given die.
+     */
+    Die findDefinitionDie(const CompilationUnit & cu, const Die & die) const;
+
+    bool findLocation(
+        uintptr_t address,
+        LocationInfoMode mode,
+        CompilationUnit & cu,
+        LocationInfo & info,
+        std::vector<SymbolizedFrame> & inline_frames) const;
+
+    /**
+     * Finds a subprogram debugging info entry that contains a given address among
+     * children of given die. Depth first search.
+     */
+    void findSubProgramDieForAddress(
+        const CompilationUnit & cu, const Die & die, uint64_t address, std::optional<uint64_t> base_addr_cu, Die & subprogram) const;
+
     // Interpreter for the line number bytecode VM
     class LineNumberVM
     {
@@ -188,6 +287,13 @@ private:
 
         bool findAddress(uintptr_t target, Path & file, uint64_t & line);
 
+        /** Gets full file name at given index including directory. */
+        Path getFullFileName(uint64_t index) const
+        {
+            auto fn = getFileName(index);
+            return Path({}, getIncludeDirectory(fn.directoryIndex), fn.relativeName);
+        }
+
     private:
         void init();
         void reset();
@@ -259,18 +365,50 @@ private:
         uint64_t discriminator_;
     };
 
+    /**
+     * Finds inlined subroutine DIEs and their caller lines that contains a given
+     * address among children of given die. Depth first search.
+     */
+    void findInlinedSubroutineDieForAddress(
+        const CompilationUnit & cu,
+        const Die & die,
+        const LineNumberVM & line_vm,
+        uint64_t address,
+        std::optional<uint64_t> base_addr_cu,
+        std::vector<CallLocation> & locations,
+        size_t max_size) const;
+
     // Read an abbreviation from a std::string_view, return true if at end; remove_prefix section
     static bool readAbbreviation(std::string_view & section, DIEAbbreviation & abbr);
 
+    static void readCompilationUnitAbbrs(std::string_view abbrev, CompilationUnit & cu);
+
+    /**
+     * Iterates over all children of a debugging info entry, calling the given
+     * callable for each. Iteration is stopped early if any of the calls return
+     * false. Returns the offset of next DIE after iterations.
+     */
+    size_t forEachChild(const CompilationUnit & cu, const Die & die, std::function<bool(const Die & die)> f) const;
+
     // Get abbreviation corresponding to a code, in the chunk starting at
     // offset in the .debug_abbrev section
     DIEAbbreviation getAbbreviation(uint64_t code, uint64_t offset) const;
 
+    /**
+     * Iterates over all attributes of a debugging info entry, calling the given
+     * callable for each. If all attributes are visited, then return the offset of
+     * next DIE, or else iteration is stopped early and return size_t(-1) if any
+     * of the calls return false.
+     */
+    size_t forEachAttribute(const CompilationUnit & cu, const Die & die, std::function<bool(const Attribute & die)> f) const;
+
+    Attribute readAttribute(const Die & die, AttributeSpec spec, std::string_view & info) const;
+
     // Read one attribute <name, form> pair, remove_prefix sp; returns <0, 0> at end.
-    static DIEAbbreviation::Attribute readAttribute(std::string_view & sp);
+    static AttributeSpec readAttributeSpec(std::string_view & sp);
 
     // Read one attribute value, remove_prefix sp
-    typedef std::variant<uint64_t, std::string_view> AttributeValue;
+    using AttributeValue = std::variant<uint64_t, std::string_view>;
     AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const;
 
     // Get an ELF section by name, return true if found
@@ -279,11 +417,34 @@ private:
     // Get a string from the .debug_str section
     std::string_view getStringFromStringSection(uint64_t offset) const;
 
+    template <class T>
+    std::optional<T> getAttribute(const CompilationUnit & cu, const Die & die, uint64_t attr_name) const
+    {
+        std::optional<T> result;
+        forEachAttribute(cu, die, [&](const Attribute & attr)
+        {
+            if (attr.spec.name == attr_name)
+            {
+                result = std::get<T>(attr.attr_value);
+                return false;
+            }
+            return true;
+        });
+        return result;
+    }
+
+    // Check if the given address is in the range list at the given offset in .debug_ranges.
+    bool isAddrInRangeList(uint64_t address, std::optional<uint64_t> base_addr, size_t offset, uint8_t addr_size) const;
+
+    // Finds the Compilation Unit starting at offset.
+    static CompilationUnit findCompilationUnit(std::string_view info, uint64_t targetOffset);
+
     std::string_view info_; // .debug_info
     std::string_view abbrev_; // .debug_abbrev
     std::string_view aranges_; // .debug_aranges
     std::string_view line_; // .debug_line
     std::string_view strings_; // .debug_str
+    std::string_view ranges_; // .debug_ranges
 };
 
 }
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index a2cd65137c0..ba8741efae7 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -404,7 +404,7 @@
     M(432, UNKNOWN_CODEC) \
     M(433, ILLEGAL_CODEC_PARAMETER) \
     M(434, CANNOT_PARSE_PROTOBUF_SCHEMA) \
-    M(435, NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD) \
+    M(435, NO_COLUMN_SERIALIZED_TO_REQUIRED_PROTOBUF_FIELD) \
     M(436, PROTOBUF_BAD_CAST) \
     M(437, PROTOBUF_FIELD_NOT_REPEATED) \
     M(438, DATA_TYPE_CANNOT_BE_PROMOTED) \
@@ -412,7 +412,7 @@
     M(440, INVALID_LIMIT_EXPRESSION) \
     M(441, CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING) \
     M(442, BAD_DATABASE_FOR_TEMPORARY_TABLE) \
-    M(443, NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA) \
+    M(443, NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS) \
     M(444, UNKNOWN_PROTOBUF_FORMAT) \
     M(445, CANNOT_MPROTECT) \
     M(446, FUNCTION_NOT_ALLOWED) \
@@ -533,11 +533,17 @@
     M(564, INTERSERVER_SCHEME_DOESNT_MATCH) \
     M(565, TOO_MANY_PARTITIONS) \
     M(566, CANNOT_RMDIR) \
+    M(567, DUPLICATED_PART_UUIDS) \
+    M(568, RAFT_ERROR) \
+    M(569, MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD) \
+    M(570, DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD) \
+    M(571, DATABASE_REPLICATION_FAILED) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
     M(1001, STD_EXCEPTION) \
-    M(1002, UNKNOWN_EXCEPTION)
+    M(1002, UNKNOWN_EXCEPTION) \
+    M(1003, INVALID_SHARD_ID)
 
 /* See END */
 
diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp
index 231b45a49c6..f5a40a11d9c 100644
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@@ -119,6 +119,13 @@ void tryLogCurrentException(const char * log_name, const std::string & start_of_
 
 void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message)
 {
+    /// Under high memory pressure, any new allocation will definitelly lead
+    /// to MEMORY_LIMIT_EXCEEDED exception.
+    ///
+    /// And in this case the exception will not be logged, so let's block the
+    /// MemoryTracker until the exception will be logged.
+    MemoryTracker::LockExceptionInThread lock_memory_tracker;
+
     try
     {
         if (start_of_message.empty())
diff --git a/src/Common/FieldVisitorsAccurateComparison.h b/src/Common/FieldVisitorsAccurateComparison.h
index 91fa4bf28de..84099eafb0f 100644
--- a/src/Common/FieldVisitorsAccurateComparison.h
+++ b/src/Common/FieldVisitorsAccurateComparison.h
@@ -53,7 +53,7 @@ public:
                 if constexpr (std::is_arithmetic_v<U>)
                 {
                     ReadBufferFromString in(l);
-                    T parsed;
+                    U parsed;
                     readText(parsed, in);
                     return operator()(parsed, r);
                 }
@@ -113,7 +113,7 @@ public:
                 if constexpr (std::is_arithmetic_v<U>)
                 {
                     ReadBufferFromString in(l);
-                    T parsed;
+                    U parsed;
                     readText(parsed, in);
                     return operator()(parsed, r);
                 }
diff --git a/src/Common/HTMLForm.h b/src/Common/HTMLForm.h
deleted file mode 100644
index 2b62167dce7..00000000000
--- a/src/Common/HTMLForm.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#pragma once
-
-#include <sstream>
-#include <Poco/Net/HTMLForm.h>
-#include <Poco/Net/HTTPRequest.h>
-#include <Poco/URI.h>
-
-#include <IO/ReadHelpers.h>
-
-
-/** Somehow, in case of POST, Poco::Net::HTMLForm doesn't read parameters from URL, only from body.
-  * This helper allows to read parameters just from URL.
-  */
-struct HTMLForm : public Poco::Net::HTMLForm
-{
-    HTMLForm(const Poco::Net::HTTPRequest & request)
-    {
-        Poco::URI uri(request.getURI());
-        std::istringstream istr(uri.getRawQuery());     // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-        readUrl(istr);
-    }
-
-    HTMLForm(const Poco::URI & uri)
-    {
-        std::istringstream istr(uri.getRawQuery());     // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-        readUrl(istr);
-    }
-
-
-    template <typename T>
-    T getParsed(const std::string & key, T default_value)
-    {
-        auto it = find(key);
-        return (it != end()) ? DB::parse<T>(it->second) : default_value;
-    }
-
-    template <typename T>
-    T getParsed(const std::string & key)
-    {
-        return DB::parse<T>(get(key));
-    }
-};
diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index b3fa5b7a26c..e4715a6c1da 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -476,6 +476,17 @@ public:
 
     size_t getBufferSizeInCells() const { return NUM_CELLS; }
 
+    /// Return offset for result in internal buffer.
+    /// Result can have value up to `getBufferSizeInCells() + 1`
+    /// because offset for zero value considered to be 0
+    /// and for other values it will be `offset in buffer + 1`
+    size_t offsetInternal(ConstLookupResult ptr) const
+    {
+        if (ptr->isZero(*this))
+            return 0;
+        return ptr - buf + 1;
+    }
+
     const Cell * data() const { return buf; }
     Cell * data() { return buf; }
 
diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h
index e09f60c4294..99dc5414107 100644
--- a/src/Common/HashTable/HashMap.h
+++ b/src/Common/HashTable/HashMap.h
@@ -109,6 +109,11 @@ struct HashMapCell
         DB::assertChar(',', rb);
         DB::readDoubleQuoted(value.second, rb);
     }
+
+    static bool constexpr need_to_notify_cell_during_move = false;
+
+    static void move(HashMapCell * /* old_location */, HashMapCell * /* new_location */) {}
+
 };
 
 template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index 15fa09490e6..809d0691049 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -204,6 +204,13 @@ struct HashTableCell
     /// Deserialization, in binary and text form.
     void read(DB::ReadBuffer & rb)        { DB::readBinary(key, rb); }
     void readText(DB::ReadBuffer & rb)    { DB::readDoubleQuoted(key, rb); }
+
+    /// When cell pointer is moved during erase, reinsert or resize operations
+
+    static constexpr bool need_to_notify_cell_during_move = false;
+
+    static void move(HashTableCell * /* old_location */, HashTableCell * /* new_location */) {}
+
 };
 
 /**
@@ -334,6 +341,32 @@ struct ZeroValueStorage<false, Cell>
 };
 
 
+template <bool enable, typename Allocator, typename Cell>
+struct AllocatorBufferDeleter;
+
+template <typename Allocator, typename Cell>
+struct AllocatorBufferDeleter<false, Allocator, Cell>
+{
+    AllocatorBufferDeleter(Allocator &, size_t) {}
+
+    void operator()(Cell *) const {}
+
+};
+
+template <typename Allocator, typename Cell>
+struct AllocatorBufferDeleter<true, Allocator, Cell>
+{
+    AllocatorBufferDeleter(Allocator & allocator_, size_t size_)
+        : allocator(allocator_)
+        , size(size_) {}
+
+    void operator()(Cell * buffer) const { allocator.free(buffer, size); }
+
+    Allocator & allocator;
+    size_t size;
+};
+
+
 // The HashTable
 template
 <
@@ -427,7 +460,6 @@ protected:
         }
     }
 
-
     /// Increase the size of the buffer.
     void resize(size_t for_num_elems = 0, size_t for_buf_size = 0)
     {
@@ -460,7 +492,24 @@ protected:
             new_grower.increaseSize();
 
         /// Expand the space.
-        buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, getBufferSizeInBytes(), new_grower.bufSize() * sizeof(Cell)));
+
+        size_t old_buffer_size = getBufferSizeInBytes();
+
+        /** If cell required to be notified during move we need to temporary keep old buffer
+         * because realloc does not quarantee for reallocated buffer to have same base address
+         */
+        using Deleter = AllocatorBufferDeleter<Cell::need_to_notify_cell_during_move, Allocator, Cell>;
+        Deleter buffer_deleter(*this, old_buffer_size);
+        std::unique_ptr<Cell, Deleter> old_buffer(buf, buffer_deleter);
+
+        if constexpr (Cell::need_to_notify_cell_during_move)
+        {
+            buf = reinterpret_cast<Cell *>(Allocator::alloc(new_grower.bufSize() * sizeof(Cell)));
+            memcpy(reinterpret_cast<void *>(buf), reinterpret_cast<const void *>(old_buffer.get()), old_buffer_size);
+        }
+        else
+            buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, old_buffer_size, new_grower.bufSize() * sizeof(Cell)));
+
         grower = new_grower;
 
         /** Now some items may need to be moved to a new location.
@@ -470,7 +519,12 @@ protected:
         size_t i = 0;
         for (; i < old_size; ++i)
             if (!buf[i].isZero(*this))
-                reinsert(buf[i], buf[i].getHash(*this));
+            {
+                size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this));
+
+                if constexpr (Cell::need_to_notify_cell_during_move)
+                    Cell::move(&(old_buffer.get())[i], &buf[updated_place_value]);
+            }
 
         /** There is also a special case:
           *    if the element was to be at the end of the old buffer,                  [        x]
@@ -480,8 +534,15 @@ protected:
           *    after transferring all the elements from the old halves you need to     [         o   x    ]
           *    process tail from the collision resolution chain immediately after it   [        o    x    ]
           */
-        for (; !buf[i].isZero(*this); ++i)
-            reinsert(buf[i], buf[i].getHash(*this));
+        size_t new_size = grower.bufSize();
+        for (; i < new_size && !buf[i].isZero(*this); ++i)
+        {
+            size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this));
+
+            if constexpr (Cell::need_to_notify_cell_during_move)
+                if (&buf[i] != &buf[updated_place_value])
+                    Cell::move(&buf[i], &buf[updated_place_value]);
+        }
 
 #ifdef DBMS_HASH_MAP_DEBUG_RESIZES
         watch.stop();
@@ -495,20 +556,20 @@ protected:
     /** Paste into the new buffer the value that was in the old buffer.
       * Used when increasing the buffer size.
       */
-    void reinsert(Cell & x, size_t hash_value)
+    size_t reinsert(Cell & x, size_t hash_value)
     {
         size_t place_value = grower.place(hash_value);
 
         /// If the element is in its place.
         if (&x == &buf[place_value])
-            return;
+            return place_value;
 
         /// Compute a new location, taking into account the collision resolution chain.
         place_value = findCell(Cell::getKey(x.getValue()), hash_value, place_value);
 
         /// If the item remains in its place in the old collision resolution chain.
         if (!buf[place_value].isZero(*this))
-            return;
+            return place_value;
 
         /// Copy to a new location and zero the old one.
         x.setHash(hash_value);
@@ -516,6 +577,7 @@ protected:
         x.setZero();
 
         /// Then the elements that previously were in collision with this can move to the old place.
+        return place_value;
     }
 
 
@@ -881,7 +943,11 @@ public:
     /// Reinsert node pointed to by iterator
     void ALWAYS_INLINE reinsert(iterator & it, size_t hash_value)
     {
-        reinsert(*it.getPtr(), hash_value);
+        size_t place_value = reinsert(*it.getPtr(), hash_value);
+
+        if constexpr (Cell::need_to_notify_cell_during_move)
+            if (it.getPtr() != &buf[place_value])
+                Cell::move(it.getPtr(), &buf[place_value]);
     }
 
 
@@ -958,8 +1024,14 @@ public:
         return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
     }
 
-    std::enable_if_t<Grower::performs_linear_probing_with_single_step, void>
+    std::enable_if_t<Grower::performs_linear_probing_with_single_step, bool>
     ALWAYS_INLINE erase(const Key & x)
+    {
+        return erase(x, hash(x));
+    }
+
+    std::enable_if_t<Grower::performs_linear_probing_with_single_step, bool>
+    ALWAYS_INLINE erase(const Key & x, size_t hash_value)
     {
         /** Deletion from open addressing hash table without tombstones
           *
@@ -977,21 +1049,19 @@ public:
             {
                 --m_size;
                 this->clearHasZero();
+                return true;
             }
             else
             {
-                return;
+                return false;
             }
         }
 
-        size_t hash_value = hash(x);
         size_t erased_key_position = findCell(x, hash_value, grower.place(hash_value));
 
         /// Key is not found
         if (buf[erased_key_position].isZero(*this))
-        {
-            return;
-        }
+            return false;
 
         /// We need to guarantee loop termination because there will be empty position
         assert(m_size < grower.bufSize());
@@ -1056,12 +1126,18 @@ public:
 
             /// Move the element to the freed place
             memcpy(static_cast<void *>(&buf[erased_key_position]), static_cast<void *>(&buf[next_position]), sizeof(Cell));
+
+            if constexpr (Cell::need_to_notify_cell_during_move)
+                Cell::move(&buf[next_position], &buf[erased_key_position]);
+
             /// Now we have another freed place
             erased_key_position = next_position;
         }
 
         buf[erased_key_position].setZero();
         --m_size;
+
+        return true;
     }
 
     bool ALWAYS_INLINE has(const Key & x) const
@@ -1214,6 +1290,17 @@ public:
         return grower.bufSize();
     }
 
+    /// Return offset for result in internal buffer.
+    /// Result can have value up to `getBufferSizeInCells() + 1`
+    /// because offset for zero value considered to be 0
+    /// and for other values it will be `offset in buffer + 1`
+    size_t offsetInternal(ConstLookupResult ptr) const
+    {
+        if (ptr->isZero(*this))
+            return 0;
+        return ptr - buf + 1;
+    }
+
 #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
     size_t getCollisions() const
     {
diff --git a/src/Common/HashTable/LRUHashMap.h b/src/Common/HashTable/LRUHashMap.h
new file mode 100644
index 00000000000..292006f2438
--- /dev/null
+++ b/src/Common/HashTable/LRUHashMap.h
@@ -0,0 +1,244 @@
+#pragma once
+
+#include <common/types.h>
+
+#include <boost/intrusive/trivial_value_traits.hpp>
+#include <boost/intrusive/list.hpp>
+#include <boost/noncopyable.hpp>
+
+#include <Core/Defines.h>
+#include <Common/Exception.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/PODArray.h>
+
+
+template <typename TKey, typename TMapped, typename Hash, bool save_hash_in_cell>
+struct LRUHashMapCell :
+    public std::conditional_t<save_hash_in_cell,
+        HashMapCellWithSavedHash<TKey, TMapped, Hash, HashTableNoState>,
+        HashMapCell<TKey, TMapped, Hash, HashTableNoState>>
+{
+public:
+    using Key = TKey;
+
+    using Base = std::conditional_t<save_hash_in_cell,
+        HashMapCellWithSavedHash<TKey, TMapped, Hash, HashTableNoState>,
+        HashMapCell<TKey, TMapped, Hash, HashTableNoState>>;
+
+    using Mapped = typename Base::Mapped;
+    using State = typename Base::State;
+
+    using mapped_type = Mapped;
+    using key_type = Key;
+
+    using Base::Base;
+
+    static bool constexpr need_to_notify_cell_during_move = true;
+
+    static void move(LRUHashMapCell * __restrict old_location, LRUHashMapCell * __restrict new_location)
+    {
+        /** We update new location prev and next pointers because during hash table resize
+         *  they can be updated during move of another cell.
+         */
+
+        new_location->prev = old_location->prev;
+        new_location->next = old_location->next;
+
+        LRUHashMapCell * prev = new_location->prev;
+        LRUHashMapCell * next = new_location->next;
+
+        /// Updated previous next and next previous nodes of list to point to new location
+
+        if (prev)
+            prev->next = new_location;
+
+        if (next)
+            next->prev = new_location;
+    }
+
+private:
+    template<typename, typename, typename, bool>
+    friend class LRUHashMapCellNodeTraits;
+
+    LRUHashMapCell * next = nullptr;
+    LRUHashMapCell * prev = nullptr;
+};
+
+template<typename Key, typename Value, typename Hash, bool save_hash_in_cell>
+struct LRUHashMapCellNodeTraits
+{
+    using node = LRUHashMapCell<Key, Value, Hash, save_hash_in_cell>;
+    using node_ptr = LRUHashMapCell<Key, Value, Hash, save_hash_in_cell> *;
+    using const_node_ptr = const LRUHashMapCell<Key, Value, Hash, save_hash_in_cell> *;
+
+    static node * get_next(const node * ptr) { return ptr->next; }
+    static void set_next(node * __restrict ptr, node * __restrict next) { ptr->next = next; }
+    static node * get_previous(const node * ptr) { return ptr->prev; }
+    static void set_previous(node * __restrict ptr, node * __restrict prev) { ptr->prev = prev; }
+};
+
+template <typename TKey, typename TValue, typename Hash, bool save_hash_in_cells>
+class LRUHashMapImpl :
+    private HashMapTable<
+        TKey,
+        LRUHashMapCell<TKey, TValue, Hash, save_hash_in_cells>,
+        Hash,
+        HashTableGrower<>,
+        HashTableAllocator>
+{
+    using Base = HashMapTable<
+        TKey,
+        LRUHashMapCell<TKey, TValue, Hash, save_hash_in_cells>,
+        Hash,
+        HashTableGrower<>,
+        HashTableAllocator>;
+public:
+    using Key = TKey;
+    using Value = TValue;
+
+    using Cell = LRUHashMapCell<Key, Value, Hash, save_hash_in_cells>;
+
+    using LRUHashMapCellIntrusiveValueTraits =
+        boost::intrusive::trivial_value_traits<
+            LRUHashMapCellNodeTraits<Key, Value, Hash, save_hash_in_cells>,
+            boost::intrusive::link_mode_type::normal_link>;
+
+    using LRUList = boost::intrusive::list<
+        Cell,
+        boost::intrusive::value_traits<LRUHashMapCellIntrusiveValueTraits>,
+        boost::intrusive::constant_time_size<false>>;
+
+    using iterator = typename LRUList::iterator;
+    using const_iterator = typename LRUList::const_iterator;
+    using reverse_iterator = typename LRUList::reverse_iterator;
+    using const_reverse_iterator = typename LRUList::const_reverse_iterator;
+
+    LRUHashMapImpl(size_t max_size_, bool preallocate_max_size_in_hash_map = false)
+        : Base(preallocate_max_size_in_hash_map ? max_size_ : 32)
+        , max_size(max_size_)
+    {
+        assert(max_size > 0);
+    }
+
+    std::pair<Cell *, bool> insert(const Key & key, const Value & value)
+    {
+        return emplace(key, value);
+    }
+
+    std::pair<Cell *, bool> insert(const Key & key, Value && value)
+    {
+        return emplace(key, std::move(value));
+    }
+
+    template<typename ...Args>
+    std::pair<Cell *, bool> emplace(const Key & key, Args&&... args)
+    {
+        size_t hash_value = Base::hash(key);
+
+        Cell * it = Base::find(key, hash_value);
+
+        if (it)
+        {
+            /// Cell contains element return it and put to the end of lru list
+            lru_list.splice(lru_list.end(), lru_list, lru_list.iterator_to(*it));
+            return std::make_pair(it, false);
+        }
+
+        if (size() == max_size)
+        {
+            /// Erase least recently used element from front of the list
+            Cell & node = lru_list.front();
+
+            const Key & element_to_remove_key = node.getKey();
+            size_t key_hash = node.getHash(*this);
+
+            lru_list.pop_front();
+
+            [[maybe_unused]] bool erased = Base::erase(element_to_remove_key, key_hash);
+            assert(erased);
+        }
+
+        [[maybe_unused]] bool inserted;
+
+        /// Insert value first try to insert in zero storage if not then insert in buffer
+        if (!Base::emplaceIfZero(key, it, inserted, hash_value))
+            Base::emplaceNonZero(key, it, inserted, hash_value);
+
+        assert(inserted);
+
+        new (&it->getMapped()) Value(std::forward<Args>(args)...);
+
+        /// Put cell to the end of lru list
+        lru_list.insert(lru_list.end(), *it);
+
+        return std::make_pair(it, true);
+    }
+
+    using Base::find;
+
+    Value & get(const Key & key)
+    {
+        auto it = Base::find(key);
+        assert(it);
+
+        Value & value = it->getMapped();
+
+        /// Put cell to the end of lru list
+        lru_list.splice(lru_list.end(), lru_list, lru_list.iterator_to(*it));
+
+        return value;
+    }
+
+    const Value & get(const Key & key) const
+    {
+        return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
+    }
+
+    bool contains(const Key & key) const
+    {
+        return Base::has(key);
+    }
+
+    bool erase(const Key & key)
+    {
+        auto hash = Base::hash(key);
+        auto it = Base::find(key, hash);
+
+        if (!it)
+            return false;
+
+        lru_list.erase(lru_list.iterator_to(*it));
+
+        return Base::erase(key, hash);
+    }
+
+    void clear()
+    {
+        lru_list.clear();
+        Base::clear();
+    }
+
+    using Base::size;
+
+    size_t getMaxSize() const { return max_size; }
+
+    iterator begin() { return lru_list.begin(); }
+    const_iterator begin() const { return lru_list.cbegin(); }
+    iterator end() { return lru_list.end(); }
+    const_iterator end() const { return lru_list.cend(); }
+
+    reverse_iterator rbegin() { return lru_list.rbegin(); }
+    const_reverse_iterator rbegin() const { return lru_list.crbegin(); }
+    reverse_iterator rend() { return lru_list.rend(); }
+    const_reverse_iterator rend() const { return lru_list.crend(); }
+
+private:
+    size_t max_size;
+    LRUList lru_list;
+};
+
+template <typename Key, typename Value, typename Hash = DefaultHash<Key>>
+using LRUHashMap = LRUHashMapImpl<Key, Value, Hash, false>;
+
+template <typename Key, typename Value, typename Hash = DefaultHash<Key>>
+using LRUHashMapWithSavedHash = LRUHashMapImpl<Key, Value, Hash, true>;
diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h
index 49c03049b92..5ef795c92d0 100644
--- a/src/Common/IFactoryWithAliases.h
+++ b/src/Common/IFactoryWithAliases.h
@@ -35,6 +35,8 @@ protected:
             return name;
     }
 
+    std::unordered_map<String, String> case_insensitive_name_mapping;
+
 public:
     /// For compatibility with SQL, it's possible to specify that certain function name is case insensitive.
     enum CaseSensitiveness
@@ -68,9 +70,12 @@ public:
                 factory_name + ": the alias name '" + alias_name + "' is already registered as real name", ErrorCodes::LOGICAL_ERROR);
 
         if (case_sensitiveness == CaseInsensitive)
+        {
             if (!case_insensitive_aliases.emplace(alias_name_lowercase, real_dict_name).second)
                 throw Exception(
                     factory_name + ": case insensitive alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR);
+            case_insensitive_name_mapping[alias_name_lowercase] = real_name;
+        }
 
         if (!aliases.emplace(alias_name, real_dict_name).second)
             throw Exception(factory_name + ": alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR);
@@ -111,6 +116,15 @@ public:
         return getMap().count(name) || getCaseInsensitiveMap().count(name) || isAlias(name);
     }
 
+    /// Return the canonical name (the name used in registration) if it's different from `name`.
+    const String & getCanonicalNameIfAny(const String & name) const
+    {
+        auto it = case_insensitive_name_mapping.find(Poco::toLower(name));
+        if (it != case_insensitive_name_mapping.end())
+            return it->second;
+        return name;
+    }
+
     virtual ~IFactoryWithAliases() override {}
 
 private:
diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index d037142fbfb..a584885cf0f 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -24,8 +24,8 @@ namespace
 ///
 /// - when it is explicitly blocked with LockExceptionInThread
 ///
-/// - to avoid std::terminate(), when stack unwinding is current in progress in
-///   this thread.
+/// - to avoid std::terminate(), when stack unwinding is currently in progress
+///   in this thread.
 ///
 ///   NOTE: that since C++11 destructor marked with noexcept by default, and
 ///   this means that any throw from destructor (that is not marked with
diff --git a/src/Common/PODArray.cpp b/src/Common/PODArray.cpp
index e0b17c8125c..c1edc5bafad 100644
--- a/src/Common/PODArray.cpp
+++ b/src/Common/PODArray.cpp
@@ -6,4 +6,14 @@ namespace DB
 /// Used for left padding of PODArray when empty
 const char empty_pod_array[empty_pod_array_size]{};
 
+template class PODArray<UInt8, 4096, Allocator<false>, 15, 16>;
+template class PODArray<UInt16, 4096, Allocator<false>, 15, 16>;
+template class PODArray<UInt32, 4096, Allocator<false>, 15, 16>;
+template class PODArray<UInt64, 4096, Allocator<false>, 15, 16>;
+
+template class PODArray<Int8, 4096, Allocator<false>, 15, 16>;
+template class PODArray<Int16, 4096, Allocator<false>, 15, 16>;
+template class PODArray<Int32, 4096, Allocator<false>, 15, 16>;
+template class PODArray<Int64, 4096, Allocator<false>, 15, 16>;
+
 }
diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h
index f0cc9df11cd..8e05dfea8b3 100644
--- a/src/Common/PODArray.h
+++ b/src/Common/PODArray.h
@@ -725,4 +725,16 @@ void swap(PODArray<T, initial_bytes, TAllocator, pad_right_> & lhs, PODArray<T,
 }
 #pragma GCC diagnostic pop
 
+/// Prevent implicit template instantiation of PODArray for common numeric types
+
+extern template class PODArray<UInt8, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<UInt16, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<UInt32, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<UInt64, 4096, Allocator<false>, 15, 16>;
+
+extern template class PODArray<Int8, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<Int16, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<Int32, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<Int64, 4096, Allocator<false>, 15, 16>;
+
 }
diff --git a/src/Common/PODArray_fwd.h b/src/Common/PODArray_fwd.h
index f817d2f6dde..22f9230c01c 100644
--- a/src/Common/PODArray_fwd.h
+++ b/src/Common/PODArray_fwd.h
@@ -3,8 +3,8 @@
   * This file contains some using-declarations that define various kinds of
   * PODArray.
   */
-#pragma once
 
+#include <common/types.h>
 #include <Common/Allocator_fwd.h>
 
 namespace DB
diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index b285a45bdc5..c4cf7f11e68 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -217,10 +217,12 @@ void StackTrace::symbolize(const StackTrace::FramePointers & frame_pointers, siz
             current_frame.object = object->name;
             if (std::filesystem::exists(current_frame.object.value()))
             {
-                auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first;
+                auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
 
                 DB::Dwarf::LocationInfo location;
-                if (dwarf_it->second.findAddress(uintptr_t(current_frame.physical_addr), location, DB::Dwarf::LocationInfoMode::FAST))
+                std::vector<DB::Dwarf::SymbolizedFrame> inline_frames;
+                if (dwarf_it->second.findAddress(
+                        uintptr_t(current_frame.physical_addr), location, DB::Dwarf::LocationInfoMode::FAST, inline_frames))
                 {
                     current_frame.file = location.file.toString();
                     current_frame.line = location.line;
@@ -261,6 +263,9 @@ StackTrace::StackTrace(const ucontext_t & signal_context)
 {
     tryCapture();
 
+    /// This variable from signal handler is not instrumented by Memory Sanitizer.
+    __msan_unpoison(&signal_context, sizeof(signal_context));
+
     void * caller_address = getCallerAddress(signal_context);
 
     if (size == 0 && caller_address)
@@ -311,7 +316,11 @@ const StackTrace::FramePointers & StackTrace::getFramePointers() const
 }
 
 static void toStringEveryLineImpl(
-    const StackTrace::FramePointers & frame_pointers, size_t offset, size_t size, std::function<void(const std::string &)> callback)
+    bool fatal,
+    const StackTrace::FramePointers & frame_pointers,
+    size_t offset,
+    size_t size,
+    std::function<void(const std::string &)> callback)
 {
     if (size == 0)
         return callback("<Empty trace>");
@@ -321,11 +330,12 @@ static void toStringEveryLineImpl(
     const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
     std::unordered_map<std::string, DB::Dwarf> dwarfs;
 
-    std::stringstream out;      // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+    std::stringstream out;  // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     out.exceptions(std::ios::failbit);
 
     for (size_t i = offset; i < size; ++i)
     {
+        std::vector<DB::Dwarf::SymbolizedFrame> inline_frames;
         const void * virtual_addr = frame_pointers[i];
         const auto * object = symbol_index.findObject(virtual_addr);
         uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0;
@@ -337,10 +347,11 @@ static void toStringEveryLineImpl(
         {
             if (std::filesystem::exists(object->name))
             {
-                auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first;
+                auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
 
                 DB::Dwarf::LocationInfo location;
-                if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, DB::Dwarf::LocationInfoMode::FAST))
+                auto mode = fatal ? DB::Dwarf::LocationInfoMode::FULL_WITH_INLINE : DB::Dwarf::LocationInfoMode::FAST;
+                if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, mode, inline_frames))
                     out << location.file.toString() << ":" << location.line << ": ";
             }
         }
@@ -357,11 +368,20 @@ static void toStringEveryLineImpl(
         out << " @ " << physical_addr;
         out << " in " << (object ? object->name : "?");
 
+        for (size_t j = 0; j < inline_frames.size(); ++j)
+        {
+            const auto & frame = inline_frames[j];
+            int status = 0;
+            callback(fmt::format("{}.{}. inlined from {}:{}: {}",
+                     i, j+1, frame.location.file.toString(), frame.location.line, demangle(frame.name, status)));
+        }
+
         callback(out.str());
         out.str({});
     }
 #else
-    std::stringstream out;      // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+    UNUSED(fatal);
+    std::stringstream out;  // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     out.exceptions(std::ios::failbit);
 
     for (size_t i = offset; i < size; ++i)
@@ -379,13 +399,13 @@ static std::string toStringImpl(const StackTrace::FramePointers & frame_pointers
 {
     std::stringstream out;      // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     out.exceptions(std::ios::failbit);
-    toStringEveryLineImpl(frame_pointers, offset, size, [&](const std::string & str) { out << str << '\n'; });
+    toStringEveryLineImpl(false, frame_pointers, offset, size, [&](const std::string & str) { out << str << '\n'; });
     return out.str();
 }
 
 void StackTrace::toStringEveryLine(std::function<void(const std::string &)> callback) const
 {
-    toStringEveryLineImpl(frame_pointers, offset, size, std::move(callback));
+    toStringEveryLineImpl(true, frame_pointers, offset, size, std::move(callback));
 }
 
 
diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h
index 3ae4b964838..58660f9e4da 100644
--- a/src/Common/StackTrace.h
+++ b/src/Common/StackTrace.h
@@ -34,7 +34,15 @@ public:
         std::optional<std::string> file;
         std::optional<UInt64> line;
     };
-    static constexpr size_t capacity = 32;
+
+    static constexpr size_t capacity =
+#ifndef NDEBUG
+        /* The stacks are normally larger in debug version due to less inlining. */
+        64
+#else
+        32
+#endif
+        ;
     using FramePointers = std::array<void *, capacity>;
     using Frames = std::array<Frame, capacity>;
 
@@ -43,10 +51,10 @@ public:
 
     /// Tries to capture stack trace. Fallbacks on parsing caller address from
     /// signal context if no stack trace could be captured
-    StackTrace(const ucontext_t & signal_context);
+    explicit StackTrace(const ucontext_t & signal_context);
 
     /// Creates empty object for deferred initialization
-    StackTrace(NoCapture);
+    explicit StackTrace(NoCapture);
 
     size_t getSize() const;
     size_t getOffset() const;
@@ -57,6 +65,7 @@ public:
     static void symbolize(const FramePointers & frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames);
 
     void toStringEveryLine(std::function<void(const std::string &)> callback) const;
+
 protected:
     void tryCapture();
 
diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h
index e39253c439f..0d2ee26889f 100644
--- a/src/Common/StringSearcher.h
+++ b/src/Common/StringSearcher.h
@@ -98,14 +98,31 @@ public:
         }
         else
         {
-            const auto first_u32 = UTF8::convert(needle);
-            const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
-            const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
+            auto first_u32 = UTF8::convertUTF8ToCodePoint(needle, needle_size);
+
+            /// Invalid UTF-8
+            if (!first_u32)
+            {
+                /// Process it verbatim as a sequence of bytes.
+                size_t src_len = UTF8::seqLength(*needle);
+
+                memcpy(l_seq, needle, src_len);
+                memcpy(u_seq, needle, src_len);
+            }
+            else
+            {
+                uint32_t first_l_u32 = Poco::Unicode::toLower(*first_u32);
+                uint32_t first_u_u32 = Poco::Unicode::toUpper(*first_u32);
+
+                /// lower and uppercase variants of the first octet of the first character in `needle`
+                size_t length_l = UTF8::convertCodePointToUTF8(first_l_u32, l_seq, sizeof(l_seq));
+                size_t length_r = UTF8::convertCodePointToUTF8(first_u_u32, u_seq, sizeof(u_seq));
+
+                if (length_l != length_r)
+                    throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
+            }
 
-            /// lower and uppercase variants of the first octet of the first character in `needle`
-            UTF8::convert(first_l_u32, l_seq, sizeof(l_seq));
             l = l_seq[0];
-            UTF8::convert(first_u_u32, u_seq, sizeof(u_seq));
             u = u_seq[0];
         }
 
@@ -128,18 +145,21 @@ public:
                 continue;
             }
 
-            const auto src_len = UTF8::seqLength(*needle_pos);
-            const auto c_u32 = UTF8::convert(needle_pos);
+            size_t src_len = std::min<size_t>(needle_end - needle_pos, UTF8::seqLength(*needle_pos));
+            auto c_u32 = UTF8::convertUTF8ToCodePoint(needle_pos, src_len);
 
-            const auto c_l_u32 = Poco::Unicode::toLower(c_u32);
-            const auto c_u_u32 = Poco::Unicode::toUpper(c_u32);
+            if (c_u32)
+            {
+                int c_l_u32 = Poco::Unicode::toLower(*c_u32);
+                int c_u_u32 = Poco::Unicode::toUpper(*c_u32);
 
-            const auto dst_l_len = static_cast<uint8_t>(UTF8::convert(c_l_u32, l_seq, sizeof(l_seq)));
-            const auto dst_u_len = static_cast<uint8_t>(UTF8::convert(c_u_u32, u_seq, sizeof(u_seq)));
+                uint8_t dst_l_len = static_cast<uint8_t>(UTF8::convertCodePointToUTF8(c_l_u32, l_seq, sizeof(l_seq)));
+                uint8_t dst_u_len = static_cast<uint8_t>(UTF8::convertCodePointToUTF8(c_u_u32, u_seq, sizeof(u_seq)));
 
-            /// @note Unicode standard states it is a rare but possible occasion
-            if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
-                throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
+                /// @note Unicode standard states it is a rare but possible occasion
+                if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
+                    throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
+            }
 
             cache_actual_len += src_len;
             if (cache_actual_len < n)
@@ -164,7 +184,7 @@ public:
     }
 
     template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
-    ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const
+    ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * haystack_end, const CharT * pos) const
     {
 
 #ifdef __SSE4_1__
@@ -183,11 +203,20 @@ public:
                     pos += cache_valid_len;
                     auto needle_pos = needle + cache_valid_len;
 
-                    while (needle_pos < needle_end &&
-                           Poco::Unicode::toLower(UTF8::convert(pos)) ==
-                           Poco::Unicode::toLower(UTF8::convert(needle_pos)))
+                    while (needle_pos < needle_end)
                     {
-                        /// @note assuming sequences for lowercase and uppercase have exact same length
+                        auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
+                        auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
+
+                        /// Invalid UTF-8, should not compare equals
+                        if (!haystack_code_point || !needle_code_point)
+                            break;
+
+                        /// Not equals case insensitive.
+                        if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
+                            break;
+
+                        /// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
                         const auto len = UTF8::seqLength(*pos);
                         pos += len;
                         needle_pos += len;
@@ -209,10 +238,19 @@ public:
             pos += first_needle_symbol_is_ascii;
             auto needle_pos = needle + first_needle_symbol_is_ascii;
 
-            while (needle_pos < needle_end &&
-                   Poco::Unicode::toLower(UTF8::convert(pos)) ==
-                   Poco::Unicode::toLower(UTF8::convert(needle_pos)))
+            while (needle_pos < needle_end)
             {
+                auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
+                auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
+
+                /// Invalid UTF-8, should not compare equals
+                if (!haystack_code_point || !needle_code_point)
+                    break;
+
+                /// Not equals case insensitive.
+                if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
+                    break;
+
                 const auto len = UTF8::seqLength(*pos);
                 pos += len;
                 needle_pos += len;
@@ -270,11 +308,20 @@ public:
                             auto haystack_pos = haystack + cache_valid_len;
                             auto needle_pos = needle + cache_valid_len;
 
-                            while (haystack_pos < haystack_end && needle_pos < needle_end &&
-                                   Poco::Unicode::toLower(UTF8::convert(haystack_pos)) ==
-                                   Poco::Unicode::toLower(UTF8::convert(needle_pos)))
+                            while (haystack_pos < haystack_end && needle_pos < needle_end)
                             {
-                                /// @note assuming sequences for lowercase and uppercase have exact same length
+                                auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
+                                auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
+
+                                /// Invalid UTF-8, should not compare equals
+                                if (!haystack_code_point || !needle_code_point)
+                                    break;
+
+                                /// Not equals case insensitive.
+                                if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
+                                    break;
+
+                                /// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
                                 const auto len = UTF8::seqLength(*haystack_pos);
                                 haystack_pos += len;
                                 needle_pos += len;
@@ -302,10 +349,19 @@ public:
                 auto haystack_pos = haystack + first_needle_symbol_is_ascii;
                 auto needle_pos = needle + first_needle_symbol_is_ascii;
 
-                while (haystack_pos < haystack_end && needle_pos < needle_end &&
-                       Poco::Unicode::toLower(UTF8::convert(haystack_pos)) ==
-                       Poco::Unicode::toLower(UTF8::convert(needle_pos)))
+                while (haystack_pos < haystack_end && needle_pos < needle_end)
                 {
+                    auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
+                    auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
+
+                    /// Invalid UTF-8, should not compare equals
+                    if (!haystack_code_point || !needle_code_point)
+                        break;
+
+                    /// Not equals case insensitive.
+                    if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
+                        break;
+
                     const auto len = UTF8::seqLength(*haystack_pos);
                     haystack_pos += len;
                     needle_pos += len;
diff --git a/src/Common/StringUtils/StringUtils.h b/src/Common/StringUtils/StringUtils.h
index 904e3035dd8..cb2227f01a8 100644
--- a/src/Common/StringUtils/StringUtils.h
+++ b/src/Common/StringUtils/StringUtils.h
@@ -120,6 +120,12 @@ inline bool isWhitespaceASCII(char c)
     return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v';
 }
 
+/// Since |isWhiteSpaceASCII()| is used inside algorithms it's easier to implement another function than add extra argument.
+inline bool isWhitespaceASCIIOneLine(char c)
+{
+    return c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v';
+}
+
 inline bool isControlASCII(char c)
 {
     return static_cast<unsigned char>(c) <= 31;
diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h
index b310f90988e..65e446a7fc4 100644
--- a/src/Common/SymbolIndex.h
+++ b/src/Common/SymbolIndex.h
@@ -36,7 +36,7 @@ public:
         const void * address_begin;
         const void * address_end;
         std::string name;
-        std::unique_ptr<Elf> elf;
+        std::shared_ptr<Elf> elf;
     };
 
     /// Address in virtual memory should be passed. These addresses include offset where the object is loaded in memory.
diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp
index e6336baecda..327178c92ff 100644
--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@@ -68,7 +68,7 @@ TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider p
     case MetricsProvider::Netlink:
         stats_getter = [metrics_provider = std::make_shared<TaskStatsInfoGetter>(), tid]()
                 {
-                    ::taskstats result;
+                    ::taskstats result{};
                     metrics_provider->getStat(result, tid);
                     return result;
                 };
@@ -76,7 +76,7 @@ TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider p
     case MetricsProvider::Procfs:
         stats_getter = [metrics_provider = std::make_shared<ProcfsMetricsProvider>(tid)]()
                 {
-                    ::taskstats result;
+                    ::taskstats result{};
                     metrics_provider->getTaskStats(result);
                     return result;
                 };
diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp
index 5105fff03b2..8c01ed2d46f 100644
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@@ -99,6 +99,11 @@ ThreadStatus::~ThreadStatus()
         /// We've already allocated a little bit more than the limit and cannot track it in the thread memory tracker or its parent.
     }
 
+#if !defined(ARCADIA_BUILD)
+    /// It may cause segfault if query_context was destroyed, but was not detached
+    assert((!query_context && query_id.empty()) || (query_context && query_id == query_context->getCurrentQueryId()));
+#endif
+
     if (deleter)
         deleter();
     current_thread = nullptr;
diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h
index 1be1f2cd4df..dc5f09c5f3d 100644
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@@ -201,7 +201,7 @@ public:
     void setFatalErrorCallback(std::function<void()> callback);
     void onFatalError();
 
-    /// Sets query context for current thread and its thread group
+    /// Sets query context for current master thread and its thread group
     /// NOTE: query_context have to be alive until detachQuery() is called
     void attachQueryContext(Context & query_context);
 
diff --git a/src/Common/UTF8Helpers.h b/src/Common/UTF8Helpers.h
index e795b6846b2..f25ed55a6af 100644
--- a/src/Common/UTF8Helpers.h
+++ b/src/Common/UTF8Helpers.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <optional>
 #include <common/types.h>
 #include <Common/BitHelpers.h>
 #include <Poco/UTF8Encoding.h>
@@ -73,26 +74,27 @@ inline size_t countCodePoints(const UInt8 * data, size_t size)
     return res;
 }
 
+
 template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
-int convert(const CharT * bytes)
+size_t convertCodePointToUTF8(uint32_t code_point, CharT * out_bytes, size_t out_length)
 {
     static const Poco::UTF8Encoding utf8;
-    return utf8.convert(reinterpret_cast<const uint8_t *>(bytes));
+    int res = utf8.convert(code_point, reinterpret_cast<uint8_t *>(out_bytes), out_length);
+    assert(res >= 0);
+    return res;
 }
 
 template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
-int convert(int ch, CharT * bytes, int length)
+std::optional<uint32_t> convertUTF8ToCodePoint(const CharT * in_bytes, size_t in_length)
 {
     static const Poco::UTF8Encoding utf8;
-    return utf8.convert(ch, reinterpret_cast<uint8_t *>(bytes), length);
+    int res = utf8.queryConvert(reinterpret_cast<const uint8_t *>(in_bytes), in_length);
+
+    if (res >= 0)
+        return res;
+    return {};
 }
 
-template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
-int queryConvert(const CharT * bytes, int length)
-{
-    static const Poco::UTF8Encoding utf8;
-    return utf8.queryConvert(reinterpret_cast<const uint8_t *>(bytes), length);
-}
 
 /// returns UTF-8 wcswidth. Invalid sequence is treated as zero width character.
 /// `prefix` is used to compute the `\t` width which extends the string before
diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h
index a1fa83b4f33..c674015fba9 100644
--- a/src/Common/Volnitsky.h
+++ b/src/Common/Volnitsky.h
@@ -60,7 +60,7 @@ namespace VolnitskyTraits
     static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
 
     template <typename Callback>
-    static inline void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset, const Callback & putNGramBase)
+    static inline void putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase)
     {
         struct Chars
         {
@@ -109,199 +109,234 @@ namespace VolnitskyTraits
             putNGramBase(n, offset);
     }
 
-    template <bool CaseSensitive, bool ASCII, typename Callback>
-    static inline void putNGram(const UInt8 * const pos, const int offset, [[maybe_unused]] const UInt8 * const begin, const Callback & putNGramBase)
+    template <typename Callback>
+    static inline void putNGramUTF8CaseInsensitive(
+        const UInt8 * pos, int offset, const UInt8 * begin, size_t size, Callback && putNGramBase)
     {
-        if constexpr (CaseSensitive)
+        const UInt8 * end = begin + size;
+
+        struct Chars
         {
-            putNGramBase(toNGram(pos), offset);
+            UInt8 c0;
+            UInt8 c1;
+        };
+
+        union
+        {
+            VolnitskyTraits::Ngram n;
+            Chars chars;
+        };
+
+        n = toNGram(pos);
+
+        if (isascii(chars.c0) && isascii(chars.c1))
+        {
+            putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
         }
         else
         {
-            if constexpr (ASCII)
+            /** n-gram (in the case of n = 2)
+                *  can be entirely located within one code point,
+                *  or intersect with two code points.
+                *
+                * In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
+                *  and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
+                *
+                * It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
+                *  as well as composition / decomposition and other features.
+                *
+                * It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
+                */
+
+            using Seq = UInt8[6];
+
+            if (UTF8::isContinuationOctet(chars.c1))
             {
-                putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
-            }
-            else
-            {
-                struct Chars
+                /// ngram is inside a sequence
+                auto seq_pos = pos;
+                UTF8::syncBackward(seq_pos, begin);
+
+                auto u32 = UTF8::convertUTF8ToCodePoint(seq_pos, end - seq_pos);
+                /// Invalid UTF-8
+                if (!u32)
                 {
-                    UInt8 c0;
-                    UInt8 c1;
-                };
-
-                union
-                {
-                    VolnitskyTraits::Ngram n;
-                    Chars chars;
-                };
-
-                n = toNGram(pos);
-
-                if (isascii(chars.c0) && isascii(chars.c1))
-                    putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
+                    putNGramBase(n, offset);
+                }
                 else
                 {
-                    /** n-gram (in the case of n = 2)
-                      *  can be entirely located within one code point,
-                      *  or intersect with two code points.
-                      *
-                      * In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
-                      *  and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
-                      *
-                      * It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
-                      *  as well as composition / decomposition and other features.
-                      *
-                      * It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
-                      */
+                    int l_u32 = Poco::Unicode::toLower(*u32);
+                    int u_u32 = Poco::Unicode::toUpper(*u32);
 
-                    using Seq = UInt8[6];
-
-                    if (UTF8::isContinuationOctet(chars.c1))
+                    /// symbol is case-independent
+                    if (l_u32 == u_u32)
                     {
-                        /// ngram is inside a sequence
-                        auto seq_pos = pos;
-                        UTF8::syncBackward(seq_pos, begin);
-
-                        const auto u32 = UTF8::convert(seq_pos);
-                        const auto l_u32 = Poco::Unicode::toLower(u32);
-                        const auto u_u32 = Poco::Unicode::toUpper(u32);
-
-                        /// symbol is case-independent
-                        if (l_u32 == u_u32)
-                            putNGramBase(n, offset);
-                        else
-                        {
-                            /// where is the given ngram in respect to the start of UTF-8 sequence?
-                            const auto seq_ngram_offset = pos - seq_pos;
-
-                            Seq seq;
-
-                            /// put ngram for lowercase
-                            UTF8::convert(l_u32, seq, sizeof(seq));
-                            chars.c0 = seq[seq_ngram_offset];
-                            chars.c1 = seq[seq_ngram_offset + 1];
-                            putNGramBase(n, offset);
-
-                            /// put ngram for uppercase
-                            UTF8::convert(u_u32, seq, sizeof(seq));
-                            chars.c0 = seq[seq_ngram_offset]; //-V519
-                            chars.c1 = seq[seq_ngram_offset + 1]; //-V519
-                            putNGramBase(n, offset);
-                        }
+                        putNGramBase(n, offset);
                     }
                     else
                     {
-                        /// ngram is on the boundary of two sequences
-                        /// first sequence may start before u_pos if it is not ASCII
-                        auto first_seq_pos = pos;
-                        UTF8::syncBackward(first_seq_pos, begin);
-                        /// where is the given ngram in respect to the start of first UTF-8 sequence?
-                        const auto seq_ngram_offset = pos - first_seq_pos;
+                        /// where is the given ngram in respect to the start of UTF-8 sequence?
+                        size_t seq_ngram_offset = pos - seq_pos;
 
-                        const auto first_u32 = UTF8::convert(first_seq_pos);
-                        const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
-                        const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
+                        Seq seq;
 
-                        /// second sequence always start immediately after u_pos
-                        auto second_seq_pos = pos + 1;
+                        /// put ngram for lowercase
+                        size_t length_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(l_u32, seq, sizeof(seq));
+                        assert(length_l >= 2);
+                        chars.c0 = seq[seq_ngram_offset];
+                        chars.c1 = seq[seq_ngram_offset + 1];
+                        putNGramBase(n, offset);
 
-                        const auto second_u32 = UTF8::convert(second_seq_pos); /// TODO This assumes valid UTF-8 or zero byte after needle.
-                        const auto second_l_u32 = Poco::Unicode::toLower(second_u32);
-                        const auto second_u_u32 = Poco::Unicode::toUpper(second_u32);
+                        /// put ngram for uppercase
+                        size_t length_r [[maybe_unused]] = UTF8::convertCodePointToUTF8(u_u32, seq, sizeof(seq));
+                        assert(length_r >= 2);
+                        chars.c0 = seq[seq_ngram_offset]; //-V519
+                        chars.c1 = seq[seq_ngram_offset + 1]; //-V519
+                        putNGramBase(n, offset);
+                    }
+                }
+            }
+            else
+            {
+                /// ngram is on the boundary of two sequences
+                /// first sequence may start before u_pos if it is not ASCII
+                auto first_seq_pos = pos;
+                UTF8::syncBackward(first_seq_pos, begin);
+                /// where is the given ngram in respect to the start of first UTF-8 sequence?
+                size_t seq_ngram_offset = pos - first_seq_pos;
 
-                        /// both symbols are case-independent
-                        if (first_l_u32 == first_u_u32 && second_l_u32 == second_u_u32)
-                        {
-                            putNGramBase(n, offset);
-                        }
-                        else if (first_l_u32 == first_u_u32)
-                        {
-                            /// first symbol is case-independent
-                            Seq seq;
+                auto first_u32 = UTF8::convertUTF8ToCodePoint(first_seq_pos, end - first_seq_pos);
+                int first_l_u32 = 0;
+                int first_u_u32 = 0;
 
-                            /// put ngram for lowercase
-                            UTF8::convert(second_l_u32, seq, sizeof(seq));
-                            chars.c1 = seq[0];
-                            putNGramBase(n, offset);
+                if (first_u32)
+                {
+                    first_l_u32 = Poco::Unicode::toLower(*first_u32);
+                    first_u_u32 = Poco::Unicode::toUpper(*first_u32);
+                }
 
-                            /// put ngram from uppercase, if it is different
-                            UTF8::convert(second_u_u32, seq, sizeof(seq));
-                            if (chars.c1 != seq[0])
-                            {
-                                chars.c1 = seq[0];
-                                putNGramBase(n, offset);
-                            }
-                        }
-                        else if (second_l_u32 == second_u_u32)
-                        {
-                            /// second symbol is case-independent
-                            Seq seq;
+                /// second sequence always start immediately after u_pos
+                auto second_seq_pos = pos + 1;
 
-                            /// put ngram for lowercase
-                            UTF8::convert(first_l_u32, seq, sizeof(seq));
-                            chars.c0 = seq[seq_ngram_offset];
-                            putNGramBase(n, offset);
+                auto second_u32 = UTF8::convertUTF8ToCodePoint(second_seq_pos, end - second_seq_pos);
+                int second_l_u32 = 0;
+                int second_u_u32 = 0;
 
-                            /// put ngram for uppercase, if it is different
-                            UTF8::convert(first_u_u32, seq, sizeof(seq));
-                            if (chars.c0 != seq[seq_ngram_offset])
-                            {
-                                chars.c0 = seq[seq_ngram_offset];
-                                putNGramBase(n, offset);
-                            }
-                        }
-                        else
-                        {
-                            Seq first_l_seq;
-                            Seq first_u_seq;
-                            Seq second_l_seq;
-                            Seq second_u_seq;
+                if (second_u32)
+                {
+                    second_l_u32 = Poco::Unicode::toLower(*second_u32);
+                    second_u_u32 = Poco::Unicode::toUpper(*second_u32);
+                }
 
-                            UTF8::convert(first_l_u32, first_l_seq, sizeof(first_l_seq));
-                            UTF8::convert(first_u_u32, first_u_seq, sizeof(first_u_seq));
-                            UTF8::convert(second_l_u32, second_l_seq, sizeof(second_l_seq));
-                            UTF8::convert(second_u_u32, second_u_seq, sizeof(second_u_seq));
+                /// both symbols are case-independent
+                if (first_l_u32 == first_u_u32 && second_l_u32 == second_u_u32)
+                {
+                    putNGramBase(n, offset);
+                }
+                else if (first_l_u32 == first_u_u32)
+                {
+                    /// first symbol is case-independent
+                    Seq seq;
 
-                            auto c0l = first_l_seq[seq_ngram_offset];
-                            auto c0u = first_u_seq[seq_ngram_offset];
-                            auto c1l = second_l_seq[0];
-                            auto c1u = second_u_seq[0];
+                    /// put ngram for lowercase
+                    size_t size_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_l_u32, seq, sizeof(seq));
+                    assert(size_l >= 1);
+                    chars.c1 = seq[0];
+                    putNGramBase(n, offset);
 
-                            /// ngram for ll
-                            chars.c0 = c0l;
-                            chars.c1 = c1l;
-                            putNGramBase(n, offset);
+                    /// put ngram from uppercase, if it is different
+                    size_t size_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_u_u32, seq, sizeof(seq));
+                    assert(size_u >= 1);
+                    if (chars.c1 != seq[0])
+                    {
+                        chars.c1 = seq[0];
+                        putNGramBase(n, offset);
+                    }
+                }
+                else if (second_l_u32 == second_u_u32)
+                {
+                    /// second symbol is case-independent
+                    Seq seq;
 
-                            if (c0l != c0u)
-                            {
-                                /// ngram for Ul
-                                chars.c0 = c0u;
-                                chars.c1 = c1l;
-                                putNGramBase(n, offset);
-                            }
+                    /// put ngram for lowercase
+                    size_t size_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_l_u32, seq, sizeof(seq));
+                    assert(size_l > seq_ngram_offset);
+                    chars.c0 = seq[seq_ngram_offset];
+                    putNGramBase(n, offset);
 
-                            if (c1l != c1u)
-                            {
-                                /// ngram for lU
-                                chars.c0 = c0l;
-                                chars.c1 = c1u;
-                                putNGramBase(n, offset);
-                            }
+                    /// put ngram for uppercase, if it is different
+                    size_t size_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_u_u32, seq, sizeof(seq));
+                    assert(size_u > seq_ngram_offset);
+                    if (chars.c0 != seq[seq_ngram_offset])
+                    {
+                        chars.c0 = seq[seq_ngram_offset];
+                        putNGramBase(n, offset);
+                    }
+                }
+                else
+                {
+                    Seq first_l_seq;
+                    Seq first_u_seq;
+                    Seq second_l_seq;
+                    Seq second_u_seq;
 
-                            if (c0l != c0u && c1l != c1u)
-                            {
-                                /// ngram for UU
-                                chars.c0 = c0u;
-                                chars.c1 = c1u;
-                                putNGramBase(n, offset);
-                            }
-                        }
+                    size_t size_first_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_l_u32, first_l_seq, sizeof(first_l_seq));
+                    size_t size_first_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_u_u32, first_u_seq, sizeof(first_u_seq));
+                    size_t size_second_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_l_u32, second_l_seq, sizeof(second_l_seq));
+                    size_t size_second_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_u_u32, second_u_seq, sizeof(second_u_seq));
+
+                    assert(size_first_l > seq_ngram_offset);
+                    assert(size_first_u > seq_ngram_offset);
+                    assert(size_second_l > 0);
+                    assert(size_second_u > 0);
+
+                    auto c0l = first_l_seq[seq_ngram_offset];
+                    auto c0u = first_u_seq[seq_ngram_offset];
+                    auto c1l = second_l_seq[0];
+                    auto c1u = second_u_seq[0];
+
+                    /// ngram for ll
+                    chars.c0 = c0l;
+                    chars.c1 = c1l;
+                    putNGramBase(n, offset);
+
+                    if (c0l != c0u)
+                    {
+                        /// ngram for Ul
+                        chars.c0 = c0u;
+                        chars.c1 = c1l;
+                        putNGramBase(n, offset);
+                    }
+
+                    if (c1l != c1u)
+                    {
+                        /// ngram for lU
+                        chars.c0 = c0l;
+                        chars.c1 = c1u;
+                        putNGramBase(n, offset);
+                    }
+
+                    if (c0l != c0u && c1l != c1u)
+                    {
+                        /// ngram for UU
+                        chars.c0 = c0u;
+                        chars.c1 = c1u;
+                        putNGramBase(n, offset);
                     }
                 }
             }
         }
     }
+
+    template <bool CaseSensitive, bool ASCII, typename Callback>
+    static inline void putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase)
+    {
+        if constexpr (CaseSensitive)
+            putNGramBase(toNGram(pos), offset);
+        else if constexpr (ASCII)
+            putNGramASCIICaseInsensitive(pos, offset, std::forward<Callback>(putNGramBase));
+        else
+            putNGramUTF8CaseInsensitive(pos, offset, begin, size, std::forward<Callback>(putNGramBase));
+    }
 }
 
 
@@ -310,17 +345,17 @@ template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
 class VolnitskyBase
 {
 protected:
-    const UInt8 * const needle;
-    const size_t needle_size;
-    const UInt8 * const needle_end = needle + needle_size;
+    const UInt8 * needle;
+    size_t needle_size;
+    const UInt8 * needle_end = needle + needle_size;
     /// For how long we move, if the n-gram from haystack is not found in the hash table.
-    const size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
+    size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
 
     /** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
       *  storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
     std::unique_ptr<VolnitskyTraits::Offset[]> hash; /// Hash table.
 
-    const bool fallback; /// Do we need to use the fallback algorithm.
+    bool fallback; /// Do we need to use the fallback algorithm.
 
     FallbackSearcher fallback_searcher;
 
@@ -346,7 +381,7 @@ public:
         /// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
         /// And also adding from the end guarantees that we will find first occurrence because we will lookup bigger offsets first.
         for (auto i = static_cast<ssize_t>(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
-            VolnitskyTraits::putNGram<CaseSensitive, ASCII>(this->needle + i, i + 1, this->needle, callback);
+            VolnitskyTraits::putNGram<CaseSensitive, ASCII>(needle + i, i + 1, needle, needle_size, callback);
     }
 
 
@@ -493,6 +528,7 @@ public:
                         reinterpret_cast<const UInt8 *>(cur_needle_data) + i,
                         i + 1,
                         reinterpret_cast<const UInt8 *>(cur_needle_data),
+                        cur_needle_size,
                         callback);
                 }
             }
diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp
index ad18fdd992a..94fd291bd12 100644
--- a/src/Common/ZooKeeper/IKeeper.cpp
+++ b/src/Common/ZooKeeper/IKeeper.cpp
@@ -59,7 +59,7 @@ static void addRootPath(String & path, const String & root_path)
         throw Exception("Path cannot be empty", Error::ZBADARGUMENTS);
 
     if (path[0] != '/')
-        throw Exception("Path must begin with /", Error::ZBADARGUMENTS);
+        throw Exception("Path must begin with /, got " + path, Error::ZBADARGUMENTS);
 
     if (root_path.empty())
         return;
diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h
index 9d4a2ebb16a..c53ea60ec7c 100644
--- a/src/Common/ZooKeeper/IKeeper.h
+++ b/src/Common/ZooKeeper/IKeeper.h
@@ -331,7 +331,7 @@ public:
 class IKeeper
 {
 public:
-    virtual ~IKeeper() {}
+    virtual ~IKeeper() = default;
 
     /// If expired, you can only destroy the object. All other methods will throw exception.
     virtual bool isExpired() const = 0;
diff --git a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp b/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp
deleted file mode 100644
index 434a6a2e747..00000000000
--- a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-#include <Common/ZooKeeper/TestKeeperStorageDispatcher.h>
-#include <Common/setThreadName.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-
-    extern const int LOGICAL_ERROR;
-    extern const int TIMEOUT_EXCEEDED;
-}
-
-}
-namespace zkutil
-{
-
-void TestKeeperStorageDispatcher::processingThread()
-{
-    setThreadName("TestKeeperSProc");
-
-    while (!shutdown)
-    {
-        RequestInfo info;
-
-        UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
-
-        if (requests_queue.tryPop(info, max_wait))
-        {
-            if (shutdown)
-                break;
-
-            try
-            {
-                auto responses = storage.processRequest(info.request, info.session_id);
-                for (const auto & response_for_session : responses)
-                    setResponse(response_for_session.session_id, response_for_session.response);
-            }
-            catch (...)
-            {
-                tryLogCurrentException(__PRETTY_FUNCTION__);
-            }
-        }
-    }
-}
-
-void TestKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
-{
-    std::lock_guard lock(session_to_response_callback_mutex);
-    auto session_writer = session_to_response_callback.find(session_id);
-    if (session_writer == session_to_response_callback.end())
-        return;
-
-    session_writer->second(response);
-    /// Session closed, no more writes
-    if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close)
-        session_to_response_callback.erase(session_writer);
-}
-
-void TestKeeperStorageDispatcher::finalize()
-{
-    {
-        std::lock_guard lock(push_request_mutex);
-
-        if (shutdown)
-            return;
-
-        shutdown = true;
-
-        if (processing_thread.joinable())
-            processing_thread.join();
-    }
-
-    RequestInfo info;
-    TestKeeperStorage::RequestsForSessions expired_requests;
-    while (requests_queue.tryPop(info))
-        expired_requests.push_back(TestKeeperStorage::RequestForSession{info.session_id, info.request});
-
-    auto expired_responses = storage.finalize(expired_requests);
-
-    for (const auto & response_for_session : expired_responses)
-        setResponse(response_for_session.session_id, response_for_session.response);
-}
-
-void TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
-{
-
-    {
-        std::lock_guard lock(session_to_response_callback_mutex);
-        if (session_to_response_callback.count(session_id) == 0)
-            throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown session id {}", session_id);
-    }
-
-    RequestInfo request_info;
-    request_info.time = clock::now();
-    request_info.request = request;
-    request_info.session_id = session_id;
-
-    std::lock_guard lock(push_request_mutex);
-    /// Put close requests without timeouts
-    if (request->getOpNum() == Coordination::OpNum::Close)
-        requests_queue.push(std::move(request_info));
-    else if (!requests_queue.tryPush(std::move(request_info), operation_timeout.totalMilliseconds()))
-        throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
-}
-
-TestKeeperStorageDispatcher::TestKeeperStorageDispatcher()
-{
-    processing_thread = ThreadFromGlobalPool([this] { processingThread(); });
-}
-
-TestKeeperStorageDispatcher::~TestKeeperStorageDispatcher()
-{
-    try
-    {
-        finalize();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
-}
-
-void TestKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
-{
-    std::lock_guard lock(session_to_response_callback_mutex);
-    if (!session_to_response_callback.try_emplace(session_id, callback).second)
-        throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id);
-}
-
-void TestKeeperStorageDispatcher::finishSession(int64_t session_id)
-{
-    std::lock_guard lock(session_to_response_callback_mutex);
-    auto session_it = session_to_response_callback.find(session_id);
-    if (session_it != session_to_response_callback.end())
-        session_to_response_callback.erase(session_it);
-}
-
-}
diff --git a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h b/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h
deleted file mode 100644
index a86895b5be1..00000000000
--- a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#pragma once
-
-#include <Common/ThreadPool.h>
-#include <Common/ConcurrentBoundedQueue.h>
-#include <Common/ZooKeeper/TestKeeperStorage.h>
-#include <functional>
-
-namespace zkutil
-{
-
-using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr & response)>;
-
-class TestKeeperStorageDispatcher
-{
-private:
-    Poco::Timespan operation_timeout{0, Coordination::DEFAULT_OPERATION_TIMEOUT_MS * 1000};
-
-    using clock = std::chrono::steady_clock;
-
-    struct RequestInfo
-    {
-        Coordination::ZooKeeperRequestPtr request;
-        clock::time_point time;
-        int64_t session_id;
-    };
-
-    std::mutex push_request_mutex;
-
-    using RequestsQueue = ConcurrentBoundedQueue<RequestInfo>;
-    RequestsQueue requests_queue{1};
-    std::atomic<bool> shutdown{false};
-    using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;
-
-    std::mutex session_to_response_callback_mutex;
-    SessionToResponseCallback session_to_response_callback;
-
-    ThreadFromGlobalPool processing_thread;
-
-    TestKeeperStorage storage;
-
-private:
-    void processingThread();
-    void finalize();
-    void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
-
-public:
-    TestKeeperStorageDispatcher();
-    ~TestKeeperStorageDispatcher();
-
-    void putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
-    int64_t getSessionID()
-    {
-        return storage.getSessionID();
-    }
-    void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
-    /// Call if we don't need any responses for this session no more (session was expired)
-    void finishSession(int64_t session_id);
-};
-
-}
diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index 4537d5ad8cd..a1c6eb9b481 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -602,7 +602,7 @@ void ZooKeeper::removeChildren(const std::string & path)
 }
 
 
-void ZooKeeper::removeChildrenRecursive(const std::string & path)
+void ZooKeeper::removeChildrenRecursive(const std::string & path, const String & keep_child_node)
 {
     Strings children = getChildren(path);
     while (!children.empty())
@@ -611,14 +611,15 @@ void ZooKeeper::removeChildrenRecursive(const std::string & path)
         for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
         {
             removeChildrenRecursive(path + "/" + children.back());
-            ops.emplace_back(makeRemoveRequest(path + "/" + children.back(), -1));
+            if (likely(keep_child_node.empty() || keep_child_node != children.back()))
+                ops.emplace_back(makeRemoveRequest(path + "/" + children.back(), -1));
             children.pop_back();
         }
         multi(ops);
     }
 }
 
-void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path)
+void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path, const String & keep_child_node)
 {
     Strings children;
     if (tryGetChildren(path, children) != Coordination::Error::ZOK)
@@ -629,14 +630,14 @@ void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path)
         Strings batch;
         for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
         {
-            batch.push_back(path + "/" + children.back());
+            String child_path = path + "/" + children.back();
+            tryRemoveChildrenRecursive(child_path);
+            if (likely(keep_child_node.empty() || keep_child_node != children.back()))
+            {
+                batch.push_back(child_path);
+                ops.emplace_back(zkutil::makeRemoveRequest(child_path, -1));
+            }
             children.pop_back();
-            tryRemoveChildrenRecursive(batch.back());
-
-            Coordination::RemoveRequest request;
-            request.path = batch.back();
-
-            ops.emplace_back(std::make_shared<Coordination::RemoveRequest>(std::move(request)));
         }
 
         /// Try to remove the children with a faster method - in bulk. If this fails,
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index 0d9dc104c48..5b37e4d6024 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -184,10 +184,17 @@ public:
     /// result would be the same as for the single call.
     void tryRemoveRecursive(const std::string & path);
 
+    /// Similar to removeRecursive(...) and tryRemoveRecursive(...), but does not remove path itself.
+    /// If keep_child_node is not empty, this method will not remove path/keep_child_node (but will remove its subtree).
+    /// It can be useful to keep some child node as a flag which indicates that path is currently removing.
+    void removeChildrenRecursive(const std::string & path, const String & keep_child_node = {});
+    void tryRemoveChildrenRecursive(const std::string & path, const String & keep_child_node = {});
+
     /// Remove all children nodes (non recursive).
     void removeChildren(const std::string & path);
 
     using WaitCondition = std::function<bool()>;
+
     /// Wait for the node to disappear or return immediately if it doesn't exist.
     /// If condition is specified, it is used to return early (when condition returns false)
     /// The function returns true if waited and false if waiting was interrupted by condition.
@@ -246,9 +253,6 @@ private:
     void init(const std::string & implementation_, const std::string & hosts_, const std::string & identity_,
               int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_);
 
-    void removeChildrenRecursive(const std::string & path);
-    void tryRemoveChildrenRecursive(const std::string & path);
-
     /// The following methods don't throw exceptions but return error codes.
     Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);
     Coordination::Error removeImpl(const std::string & path, int32_t version);
@@ -311,8 +315,15 @@ public:
         return std::make_shared<EphemeralNodeHolder>(path, zookeeper, false, false, "");
     }
 
+    void setAlreadyRemoved()
+    {
+        need_remove = false;
+    }
+
     ~EphemeralNodeHolder()
     {
+        if (!need_remove)
+            return;
         try
         {
             zookeeper.tryRemove(path);
@@ -320,7 +331,7 @@ public:
         catch (...)
         {
             ProfileEvents::increment(ProfileEvents::CannotRemoveEphemeralNode);
-            DB::tryLogCurrentException(__PRETTY_FUNCTION__);
+            DB::tryLogCurrentException(__PRETTY_FUNCTION__, "Cannot remove " + path + ": ");
         }
     }
 
@@ -328,6 +339,7 @@ private:
     std::string path;
     ZooKeeper & zookeeper;
     CurrentMetrics::Increment metric_increment{CurrentMetrics::EphemeralNode};
+    bool need_remove = true;
 };
 
 using EphemeralNodeHolderPtr = EphemeralNodeHolder::Ptr;
diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
index 9c699ee298a..56f9de31ec8 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
@@ -37,6 +37,26 @@ void ZooKeeperRequest::write(WriteBuffer & out) const
     out.next();
 }
 
+void ZooKeeperSyncRequest::writeImpl(WriteBuffer & out) const
+{
+    Coordination::write(path, out);
+}
+
+void ZooKeeperSyncRequest::readImpl(ReadBuffer & in)
+{
+    Coordination::read(path, in);
+}
+
+void ZooKeeperSyncResponse::readImpl(ReadBuffer & in)
+{
+    Coordination::read(path, in);
+}
+
+void ZooKeeperSyncResponse::writeImpl(WriteBuffer & out) const
+{
+    Coordination::write(path, out);
+}
+
 void ZooKeeperWatchResponse::readImpl(ReadBuffer & in)
 {
     Coordination::read(type, in);
@@ -51,6 +71,13 @@ void ZooKeeperWatchResponse::writeImpl(WriteBuffer & out) const
     Coordination::write(path, out);
 }
 
+void ZooKeeperWatchResponse::write(WriteBuffer & out) const
+{
+    if (error == Error::ZOK)
+        ZooKeeperResponse::write(out);
+    /// skip bad responses for watches
+}
+
 void ZooKeeperAuthRequest::writeImpl(WriteBuffer & out) const
 {
     Coordination::write(type, out);
@@ -326,6 +353,12 @@ void ZooKeeperMultiRequest::readImpl(ReadBuffer & in)
     }
 }
 
+bool ZooKeeperMultiRequest::isReadRequest() const
+{
+    /// Possibly we can do better
+    return false;
+}
+
 void ZooKeeperMultiResponse::readImpl(ReadBuffer & in)
 {
     for (auto & response : responses)
@@ -410,6 +443,7 @@ void ZooKeeperMultiResponse::writeImpl(WriteBuffer & out) const
 }
 
 ZooKeeperResponsePtr ZooKeeperHeartbeatRequest::makeResponse() const { return std::make_shared<ZooKeeperHeartbeatResponse>(); }
+ZooKeeperResponsePtr ZooKeeperSyncRequest::makeResponse() const { return std::make_shared<ZooKeeperSyncResponse>(); }
 ZooKeeperResponsePtr ZooKeeperAuthRequest::makeResponse() const { return std::make_shared<ZooKeeperAuthResponse>(); }
 ZooKeeperResponsePtr ZooKeeperCreateRequest::makeResponse() const { return std::make_shared<ZooKeeperCreateResponse>(); }
 ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return std::make_shared<ZooKeeperRemoveResponse>(); }
@@ -465,6 +499,7 @@ void registerZooKeeperRequest(ZooKeeperRequestFactory & factory)
 ZooKeeperRequestFactory::ZooKeeperRequestFactory()
 {
     registerZooKeeperRequest<OpNum::Heartbeat, ZooKeeperHeartbeatRequest>(*this);
+    registerZooKeeperRequest<OpNum::Sync, ZooKeeperSyncRequest>(*this);
     registerZooKeeperRequest<OpNum::Auth, ZooKeeperAuthRequest>(*this);
     registerZooKeeperRequest<OpNum::Close, ZooKeeperCloseRequest>(*this);
     registerZooKeeperRequest<OpNum::Create, ZooKeeperCreateRequest>(*this);
diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h
index 9adb0c06e4c..92b1e7c9858 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.h
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.h
@@ -30,7 +30,7 @@ struct ZooKeeperResponse : virtual Response
     virtual ~ZooKeeperResponse() override = default;
     virtual void readImpl(ReadBuffer &) = 0;
     virtual void writeImpl(WriteBuffer &) const = 0;
-    void write(WriteBuffer & out) const;
+    virtual void write(WriteBuffer & out) const;
     virtual OpNum getOpNum() const = 0;
 };
 
@@ -60,6 +60,7 @@ struct ZooKeeperRequest : virtual Request
     static std::shared_ptr<ZooKeeperRequest> read(ReadBuffer & in);
 
     virtual ZooKeeperResponsePtr makeResponse() const = 0;
+    virtual bool isReadRequest() const = 0;
 };
 
 using ZooKeeperRequestPtr = std::shared_ptr<ZooKeeperRequest>;
@@ -71,6 +72,26 @@ struct ZooKeeperHeartbeatRequest final : ZooKeeperRequest
     void writeImpl(WriteBuffer &) const override {}
     void readImpl(ReadBuffer &) override {}
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
+};
+
+struct ZooKeeperSyncRequest final : ZooKeeperRequest
+{
+    String path;
+    String getPath() const override { return path; }
+    OpNum getOpNum() const override { return OpNum::Sync; }
+    void writeImpl(WriteBuffer & out) const override;
+    void readImpl(ReadBuffer & in) override;
+    ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
+};
+
+struct ZooKeeperSyncResponse final : ZooKeeperResponse
+{
+    String path;
+    void readImpl(ReadBuffer & in) override;
+    void writeImpl(WriteBuffer & out) const override;
+    OpNum getOpNum() const override { return OpNum::Sync; }
 };
 
 struct ZooKeeperHeartbeatResponse final : ZooKeeperResponse
@@ -86,6 +107,8 @@ struct ZooKeeperWatchResponse final : WatchResponse, ZooKeeperResponse
 
     void writeImpl(WriteBuffer & out) const override;
 
+    void write(WriteBuffer & out) const override;
+
     OpNum getOpNum() const override
     {
         throw Exception("OpNum for watch response doesn't exist", Error::ZRUNTIMEINCONSISTENCY);
@@ -104,6 +127,7 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
 };
 
 struct ZooKeeperAuthResponse final : ZooKeeperResponse
@@ -122,6 +146,7 @@ struct ZooKeeperCloseRequest final : ZooKeeperRequest
     void readImpl(ReadBuffer &) override {}
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
 };
 
 struct ZooKeeperCloseResponse final : ZooKeeperResponse
@@ -146,6 +171,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
 };
 
 struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse
@@ -167,6 +193,7 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
 };
 
 struct ZooKeeperRemoveResponse final : RemoveResponse, ZooKeeperResponse
@@ -183,6 +210,7 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return !has_watch; }
 };
 
 struct ZooKeeperExistsResponse final : ExistsResponse, ZooKeeperResponse
@@ -199,6 +227,7 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return !has_watch; }
 };
 
 struct ZooKeeperGetResponse final : GetResponse, ZooKeeperResponse
@@ -217,6 +246,7 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest
     void writeImpl(WriteBuffer & out) const override;
     void readImpl(ReadBuffer & in) override;
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
 };
 
 struct ZooKeeperSetResponse final : SetResponse, ZooKeeperResponse
@@ -232,6 +262,7 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest
     void writeImpl(WriteBuffer & out) const override;
     void readImpl(ReadBuffer & in) override;
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return !has_watch; }
 };
 
 struct ZooKeeperSimpleListRequest final : ZooKeeperListRequest
@@ -261,6 +292,7 @@ struct ZooKeeperCheckRequest final : CheckRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return !has_watch; }
 };
 
 struct ZooKeeperCheckResponse final : CheckResponse, ZooKeeperResponse
@@ -290,6 +322,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override;
 };
 
 struct ZooKeeperMultiResponse final : MultiResponse, ZooKeeperResponse
diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.cpp b/src/Common/ZooKeeper/ZooKeeperConstants.cpp
index b4cb9feb518..295094b336b 100644
--- a/src/Common/ZooKeeper/ZooKeeperConstants.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperConstants.cpp
@@ -15,6 +15,7 @@ static const std::unordered_set<int32_t> VALID_OPERATIONS =
     static_cast<int32_t>(OpNum::Get),
     static_cast<int32_t>(OpNum::Set),
     static_cast<int32_t>(OpNum::SimpleList),
+    static_cast<int32_t>(OpNum::Sync),
     static_cast<int32_t>(OpNum::Heartbeat),
     static_cast<int32_t>(OpNum::List),
     static_cast<int32_t>(OpNum::Check),
@@ -48,6 +49,8 @@ std::string toString(OpNum op_num)
             return "Check";
         case OpNum::Multi:
             return "Multi";
+        case OpNum::Sync:
+            return "Sync";
         case OpNum::Heartbeat:
             return "Heartbeat";
         case OpNum::Auth:
diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h
index 8a20330a2d7..81ca6c6a460 100644
--- a/src/Common/ZooKeeper/ZooKeeperConstants.h
+++ b/src/Common/ZooKeeper/ZooKeeperConstants.h
@@ -24,6 +24,7 @@ enum class OpNum : int32_t
     Get = 4,
     Set = 5,
     SimpleList = 8,
+    Sync = 9,
     Heartbeat = 11,
     List = 12,
     Check = 13,
diff --git a/src/Common/ZooKeeper/ZooKeeperIO.cpp b/src/Common/ZooKeeper/ZooKeeperIO.cpp
index a0e4161f111..3f0905ea186 100644
--- a/src/Common/ZooKeeper/ZooKeeperIO.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperIO.cpp
@@ -3,6 +3,13 @@
 namespace Coordination
 {
 
+
+void write(size_t x, WriteBuffer & out)
+{
+    x = __builtin_bswap64(x);
+    writeBinary(x, out);
+}
+
 void write(int64_t x, WriteBuffer & out)
 {
     x = __builtin_bswap64(x);
@@ -57,6 +64,12 @@ void write(const Error & x, WriteBuffer & out)
     write(static_cast<int32_t>(x), out);
 }
 
+void read(size_t & x, ReadBuffer & in)
+{
+    readBinary(x, in);
+    x = __builtin_bswap64(x);
+}
+
 void read(int64_t & x, ReadBuffer & in)
 {
     readBinary(x, in);
diff --git a/src/Common/ZooKeeper/ZooKeeperIO.h b/src/Common/ZooKeeper/ZooKeeperIO.h
index edeb995f27b..fd47e324664 100644
--- a/src/Common/ZooKeeper/ZooKeeperIO.h
+++ b/src/Common/ZooKeeper/ZooKeeperIO.h
@@ -13,6 +13,7 @@ namespace Coordination
 
 using namespace DB;
 
+void write(size_t x, WriteBuffer & out);
 void write(int64_t x, WriteBuffer & out);
 void write(int32_t x, WriteBuffer & out);
 void write(OpNum x, WriteBuffer & out);
@@ -37,6 +38,7 @@ void write(const std::vector<T> & arr, WriteBuffer & out)
         write(elem, out);
 }
 
+void read(size_t & x, ReadBuffer & in);
 void read(int64_t & x, ReadBuffer & in);
 void read(int32_t & x, ReadBuffer & in);
 void read(OpNum & x, ReadBuffer & in);
diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h
index 63c064b21f8..bd0c68d70f9 100644
--- a/src/Common/formatIPv6.h
+++ b/src/Common/formatIPv6.h
@@ -85,9 +85,9 @@ inline bool parseIPv6(const char * src, unsigned char * dst)
             return clear_dst();
 
     unsigned char tmp[IPV6_BINARY_LENGTH]{};
-    auto tp = tmp;
-    auto endp = tp + IPV6_BINARY_LENGTH;
-    auto curtok = src;
+    auto * tp = tmp;
+    auto * endp = tp + IPV6_BINARY_LENGTH;
+    const auto * curtok = src;
     auto saw_xdigit = false;
     UInt32 val{};
     unsigned char * colonp = nullptr;
@@ -97,14 +97,14 @@ inline bool parseIPv6(const char * src, unsigned char * dst)
     {
         const auto num = unhex(ch);
 
-        if (num != '\xff')
+        if (num != u8'\xff')
         {
             val <<= 4;
             val |= num;
             if (val > 0xffffu)
                 return clear_dst();
 
-            saw_xdigit = 1;
+            saw_xdigit = true;
             continue;
         }
 
@@ -204,7 +204,7 @@ inline void formatIPv4(const unsigned char * src, char *& dst, uint8_t mask_tail
     for (size_t octet = 0; octet < limit; ++octet)
     {
         const uint8_t value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
-        auto rep = one_byte_to_string_lookup_table[value];
+        const auto * rep = one_byte_to_string_lookup_table[value];
         const uint8_t len = rep[0];
         const char* str = rep + 1;
 
diff --git a/src/Common/hex.h b/src/Common/hex.h
index db094e1dfd1..a1fa7b32465 100644
--- a/src/Common/hex.h
+++ b/src/Common/hex.h
@@ -90,12 +90,12 @@ std::string getHexUIntLowercase(TUInt uint_)
 
 extern const char * const hex_char_to_digit_table;
 
-inline char unhex(char c)
+inline UInt8 unhex(char c)
 {
     return hex_char_to_digit_table[static_cast<UInt8>(c)];
 }
 
-inline char unhex2(const char * data)
+inline UInt8 unhex2(const char * data)
 {
     return
           static_cast<UInt8>(unhex(data[0])) * 0x10
diff --git a/src/Common/memcmpSmall.h b/src/Common/memcmpSmall.h
index bafc08a9cbe..db8641cb44d 100644
--- a/src/Common/memcmpSmall.h
+++ b/src/Common/memcmpSmall.h
@@ -120,9 +120,10 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
 
     for (size_t offset = min_size; offset < max_size; offset += 16)
     {
-        uint16_t mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
+        uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
             _mm_loadu_si128(reinterpret_cast<const __m128i *>(longest + offset)),
             zero16));
+        mask = ~mask;
 
         if (mask)
         {
diff --git a/src/Common/tests/CMakeLists.txt b/src/Common/tests/CMakeLists.txt
index cb36e2b97d2..2dd56e862f0 100644
--- a/src/Common/tests/CMakeLists.txt
+++ b/src/Common/tests/CMakeLists.txt
@@ -38,6 +38,9 @@ target_link_libraries (arena_with_free_lists PRIVATE dbms)
 add_executable (pod_array pod_array.cpp)
 target_link_libraries (pod_array PRIVATE clickhouse_common_io)
 
+add_executable (lru_hash_map_perf lru_hash_map_perf.cpp)
+target_link_libraries (lru_hash_map_perf PRIVATE clickhouse_common_io)
+
 add_executable (thread_creation_latency thread_creation_latency.cpp)
 target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
 
diff --git a/src/Common/tests/compact_array.cpp b/src/Common/tests/compact_array.cpp
index 91fb59d543f..a63859ac712 100644
--- a/src/Common/tests/compact_array.cpp
+++ b/src/Common/tests/compact_array.cpp
@@ -50,6 +50,7 @@ struct Test
             {
                 DB::WriteBufferFromFile wb(filename);
                 wb.write(reinterpret_cast<const char *>(&store), sizeof(store));
+                wb.close();
             }
 
             {
diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp
index 41255dcbba1..1c673166ca9 100644
--- a/src/Common/tests/gtest_hash_table.cpp
+++ b/src/Common/tests/gtest_hash_table.cpp
@@ -317,3 +317,51 @@ TEST(HashTable, SerializationDeserialization)
         ASSERT_EQ(convertToSet(cont), convertToSet(deserialized));
     }
 }
+
+template <typename T>
+struct IdentityHash
+{
+    size_t operator()(T x) const { return x; }
+};
+
+struct OneElementResizeGrower
+{
+    /// If collision resolution chains are contiguous, we can implement erase operation by moving the elements.
+    static constexpr auto performs_linear_probing_with_single_step = true;
+
+    static constexpr size_t initial_count = 1;
+
+    size_t bufSize() const { return buf_size; }
+
+    size_t place(size_t x) const { return x % buf_size; }
+
+    size_t next(size_t pos) const { return (pos + 1) % buf_size; }
+
+    bool overflow(size_t elems) const { return elems >= buf_size; }
+
+    void increaseSize() { ++buf_size; }
+
+    void set(size_t) { }
+
+    void setBufSize(size_t buf_size_) { buf_size = buf_size_; }
+
+    size_t buf_size = initial_count;
+};
+
+TEST(HashTable, Resize)
+{
+    {
+        /// Test edge case if after resize all cells are resized in end of buf and will take half of
+        /// hash table place.
+        using HashSet = HashSet<int, IdentityHash<int>, OneElementResizeGrower>;
+        HashSet cont;
+
+        cont.insert(3);
+        cont.insert(1);
+
+        std::set<int> expected = {1, 3};
+        std::set<int> actual = convertToSet(cont);
+
+        ASSERT_EQ(actual, expected);
+    }
+}
diff --git a/src/Common/tests/gtest_lru_hash_map.cpp b/src/Common/tests/gtest_lru_hash_map.cpp
new file mode 100644
index 00000000000..562ee667b7b
--- /dev/null
+++ b/src/Common/tests/gtest_lru_hash_map.cpp
@@ -0,0 +1,161 @@
+#include <iomanip>
+#include <iostream>
+
+#include <Common/HashTable/LRUHashMap.h>
+
+#include <gtest/gtest.h>
+
+template<typename LRUHashMap>
+std::vector<typename LRUHashMap::Key> convertToVector(const LRUHashMap & map)
+{
+    std::vector<typename LRUHashMap::Key> result;
+    result.reserve(map.size());
+
+    for (auto & node: map)
+        result.emplace_back(node.getKey());
+
+    return result;
+}
+
+void testInsert(size_t elements_to_insert_size, size_t map_size)
+{
+    using LRUHashMap = LRUHashMap<int, int>;
+
+    LRUHashMap map(map_size);
+
+    std::vector<int> expected;
+
+    for (size_t i = 0; i < elements_to_insert_size; ++i)
+        map.insert(i, i);
+
+    for (size_t i = elements_to_insert_size - map_size; i < elements_to_insert_size; ++i)
+        expected.emplace_back(i);
+
+    std::vector<int> actual = convertToVector(map);
+    ASSERT_EQ(map.size(), actual.size());
+    ASSERT_EQ(actual, expected);
+}
+
+TEST(LRUHashMap, Insert)
+{
+    {
+        using LRUHashMap = LRUHashMap<int, int>;
+
+        LRUHashMap map(3);
+
+        map.emplace(1, 1);
+        map.insert(2, 2);
+        int v = 3;
+        map.insert(3, v);
+        map.emplace(4, 4);
+
+        std::vector<int> expected = { 2, 3, 4 };
+        std::vector<int> actual = convertToVector(map);
+
+        ASSERT_EQ(actual, expected);
+    }
+
+    testInsert(1200000, 1200000);
+    testInsert(10, 5);
+    testInsert(1200000, 2);
+    testInsert(1200000, 1);
+}
+
+TEST(LRUHashMap, GetModify)
+{
+    using LRUHashMap = LRUHashMap<int, int>;
+
+    LRUHashMap map(3);
+
+    map.emplace(1, 1);
+    map.emplace(2, 2);
+    map.emplace(3, 3);
+
+    map.get(3) = 4;
+
+    std::vector<int> expected = { 1, 2, 4 };
+    std::vector<int> actual;
+    actual.reserve(map.size());
+
+    for (auto & node : map)
+        actual.emplace_back(node.getMapped());
+
+    ASSERT_EQ(actual, expected);
+}
+
+TEST(LRUHashMap, SetRecentKeyToTop)
+{
+    using LRUHashMap = LRUHashMap<int, int>;
+
+    LRUHashMap map(3);
+
+    map.emplace(1, 1);
+    map.emplace(2, 2);
+    map.emplace(3, 3);
+    map.emplace(1, 4);
+
+    std::vector<int> expected = { 2, 3, 1 };
+    std::vector<int> actual = convertToVector(map);
+
+    ASSERT_EQ(actual, expected);
+}
+
+TEST(LRUHashMap, GetRecentKeyToTop)
+{
+    using LRUHashMap = LRUHashMap<int, int>;
+
+    LRUHashMap map(3);
+
+    map.emplace(1, 1);
+    map.emplace(2, 2);
+    map.emplace(3, 3);
+    map.get(1);
+
+    std::vector<int> expected = { 2, 3, 1 };
+    std::vector<int> actual = convertToVector(map);
+
+    ASSERT_EQ(actual, expected);
+}
+
+TEST(LRUHashMap, Contains)
+{
+    using LRUHashMap = LRUHashMap<int, int>;
+
+    LRUHashMap map(3);
+
+    map.emplace(1, 1);
+    map.emplace(2, 2);
+    map.emplace(3, 3);
+
+    ASSERT_TRUE(map.contains(1));
+    ASSERT_TRUE(map.contains(2));
+    ASSERT_TRUE(map.contains(3));
+    ASSERT_EQ(map.size(), 3);
+
+    map.erase(1);
+    map.erase(2);
+    map.erase(3);
+
+    ASSERT_EQ(map.size(), 0);
+    ASSERT_FALSE(map.contains(1));
+    ASSERT_FALSE(map.contains(2));
+    ASSERT_FALSE(map.contains(3));
+}
+
+TEST(LRUHashMap, Clear)
+{
+    using LRUHashMap = LRUHashMap<int, int>;
+
+    LRUHashMap map(3);
+
+    map.emplace(1, 1);
+    map.emplace(2, 2);
+    map.emplace(3, 3);
+    map.clear();
+
+    std::vector<int> expected = {};
+    std::vector<int> actual = convertToVector(map);
+
+    ASSERT_EQ(actual, expected);
+    ASSERT_EQ(map.size(), 0);
+}
diff --git a/src/Common/tests/lru_hash_map_perf.cpp b/src/Common/tests/lru_hash_map_perf.cpp
new file mode 100644
index 00000000000..14beff3f7da
--- /dev/null
+++ b/src/Common/tests/lru_hash_map_perf.cpp
@@ -0,0 +1,244 @@
+#include <vector>
+#include <list>
+#include <map>
+#include <random>
+#include <pcg_random.hpp>
+
+#include <Common/Stopwatch.h>
+#include <Common/HashTable/LRUHashMap.h>
+
+template<class Key, class Value>
+class LRUHashMapBasic
+{
+public:
+    using key_type = Key;
+    using value_type = Value;
+    using list_type = std::list<key_type>;
+    using node = std::pair<value_type, typename list_type::iterator>;
+    using map_type = std::unordered_map<key_type, node, DefaultHash<Key>>;
+
+    LRUHashMapBasic(size_t max_size_, bool preallocated)
+        : hash_map(preallocated ? max_size_ : 32)
+        , max_size(max_size_)
+    {
+    }
+
+    void insert(const Key &key, const Value &value)
+    {
+        auto it = hash_map.find(key);
+
+        if (it == hash_map.end())
+        {
+            if (size() >= max_size)
+            {
+                auto iterator_to_remove = list.begin();
+
+                hash_map.erase(*iterator_to_remove);
+                list.erase(iterator_to_remove);
+            }
+
+            list.push_back(key);
+            hash_map[key] = std::make_pair(value, --list.end());
+        }
+        else
+        {
+            auto & [value_to_update, iterator_in_list_to_update] = it->second;
+
+            list.splice(list.end(), list, iterator_in_list_to_update);
+
+            iterator_in_list_to_update = list.end();
+            value_to_update = value;
+        }
+    }
+
+    value_type & get(const key_type &key)
+    {
+        auto iterator_in_map = hash_map.find(key);
+        assert(iterator_in_map != hash_map.end());
+
+        auto & [value_to_return, iterator_in_list_to_update] = iterator_in_map->second;
+
+        list.splice(list.end(), list, iterator_in_list_to_update);
+        iterator_in_list_to_update = list.end();
+
+        return value_to_return;
+    }
+
+    const value_type & get(const key_type & key) const
+    {
+        return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
+    }
+
+    size_t getMaxSize() const
+    {
+        return max_size;
+    }
+
+    size_t size() const
+    {
+        return hash_map.size();
+    }
+
+    bool empty() const
+    {
+        return hash_map.empty();
+    }
+
+    bool contains(const Key & key)
+    {
+        return hash_map.find(key) != hash_map.end();
+    }
+
+    void clear()
+    {
+        hash_map.clear();
+        list.clear();
+    }
+
+private:
+    map_type hash_map;
+    list_type list;
+    size_t max_size;
+};
+
+std::vector<UInt64> generateNumbersToInsert(size_t numbers_to_insert_size)
+{
+    std::vector<UInt64> numbers;
+    numbers.reserve(numbers_to_insert_size);
+
+    std::random_device rd;
+    pcg64 gen(rd());
+
+    UInt64 min = std::numeric_limits<UInt64>::min();
+    UInt64 max = std::numeric_limits<UInt64>::max();
+
+    auto distribution = std::uniform_int_distribution<>(min, max);
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+    {
+        UInt64 number = distribution(gen);
+        numbers.emplace_back(number);
+    }
+
+    return numbers;
+}
+
+void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
+{
+    size_t numbers_to_insert_size = numbers_to_insert.size();
+    std::cout << "TestInsertElementsIntoHashMap preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
+    std::cout << std::endl;
+
+    HashMap<int, int> hash_map(preallocated ? map_size : 32);
+
+    Stopwatch watch;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+        hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
+
+    std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+
+    UInt64 summ = 0;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+    {
+        auto * it = hash_map.find(numbers_to_insert[i]);
+
+        if (it)
+            summ += it->getMapped();
+    }
+
+    std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+}
+
+void testInsertElementsIntoStandardMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
+{
+    size_t numbers_to_insert_size = numbers_to_insert.size();
+    std::cout << "TestInsertElementsIntoStandardMap map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
+    std::cout << std::endl;
+
+    std::unordered_map<int, int> hash_map(preallocated ? map_size : 32);
+
+    Stopwatch watch;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+        hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
+
+    std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+
+    UInt64 summ = 0;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+    {
+        auto it = hash_map.find(numbers_to_insert[i]);
+
+        if (it != hash_map.end())
+            summ += it->second;
+    }
+
+    std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+}
+
+template<typename LRUCache>
+UInt64 testInsertIntoEmptyCache(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
+{
+    size_t numbers_to_insert_size = numbers_to_insert.size();
+    std::cout << "Test testInsertPreallocated preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
+    std::cout << std::endl;
+
+    LRUCache cache(map_size, preallocated);
+    Stopwatch watch;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+    {
+        cache.insert(numbers_to_insert[i], numbers_to_insert[i]);
+    }
+
+    std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+
+    UInt64 summ = 0;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+        if (cache.contains(numbers_to_insert[i]))
+            summ += cache.get(numbers_to_insert[i]);
+
+    std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+
+    return summ;
+}
+
+int main(int argc, char ** argv)
+{
+    (void)(argc);
+    (void)(argv);
+
+    size_t hash_map_size = 1200000;
+    size_t numbers_to_insert_size = 12000000;
+    std::vector<UInt64> numbers = generateNumbersToInsert(numbers_to_insert_size);
+
+    std::cout << "Test insert into HashMap preallocated=0" << std::endl;
+    testInsertElementsIntoHashMap(hash_map_size, numbers, true);
+    std::cout << std::endl;
+
+    std::cout << "Test insert into HashMap preallocated=1" << std::endl;
+    testInsertElementsIntoHashMap(hash_map_size, numbers, true);
+    std::cout << std::endl;
+
+    std::cout << "Test LRUHashMap preallocated=0" << std::endl;
+    testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, false);
+    std::cout << std::endl;
+
+    std::cout << "Test LRUHashMap preallocated=1" << std::endl;
+    testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, true);
+    std::cout << std::endl;
+
+    std::cout << "Test LRUHashMapBasic preallocated=0" << std::endl;
+    testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, false);
+    std::cout << std::endl;
+
+    std::cout << "Test LRUHashMapBasic preallocated=1" << std::endl;
+    testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, true);
+    std::cout << std::endl;
+
+    return 0;
+}
diff --git a/src/Common/tests/symbol_index.cpp b/src/Common/tests/symbol_index.cpp
index 3811bbbdd71..496fa7dc3fe 100644
--- a/src/Common/tests/symbol_index.cpp
+++ b/src/Common/tests/symbol_index.cpp
@@ -47,10 +47,11 @@ int main(int argc, char ** argv)
         std::cerr << "dladdr: Not found\n";
 
     const auto * object = symbol_index.findObject(getAddress());
-    Dwarf dwarf(*object->elf);
+    Dwarf dwarf(object->elf);
 
     Dwarf::LocationInfo location;
-    if (dwarf.findAddress(uintptr_t(address) - uintptr_t(info.dli_fbase), location, Dwarf::LocationInfoMode::FAST))
+    std::vector<Dwarf::SymbolizedFrame> frames;
+    if (dwarf.findAddress(uintptr_t(address) - uintptr_t(info.dli_fbase), location, Dwarf::LocationInfoMode::FAST, frames))
         std::cerr << location.file.toString() << ":" << location.line << "\n";
     else
         std::cerr << "Dwarf: Not found\n";
diff --git a/src/Common/ya.make b/src/Common/ya.make
index a8cac313a76..64dd628c457 100644
--- a/src/Common/ya.make
+++ b/src/Common/ya.make
@@ -83,8 +83,6 @@ SRCS(
     WeakHash.cpp
     ZooKeeper/IKeeper.cpp
     ZooKeeper/TestKeeper.cpp
-    ZooKeeper/TestKeeperStorage.cpp
-    ZooKeeper/TestKeeperStorageDispatcher.cpp
     ZooKeeper/ZooKeeper.cpp
     ZooKeeper/ZooKeeperCommon.cpp
     ZooKeeper/ZooKeeperConstants.cpp
diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp
index 02f418dcdf7..8d146e8de23 100644
--- a/src/Compression/CompressedWriteBuffer.cpp
+++ b/src/Compression/CompressedWriteBuffer.cpp
@@ -8,6 +8,7 @@
 #include <Compression/CompressionFactory.h>
 
 #include <Common/MemorySanitizer.h>
+#include <Common/MemoryTracker.h>
 
 
 namespace DB
@@ -49,14 +50,9 @@ CompressedWriteBuffer::CompressedWriteBuffer(
 
 CompressedWriteBuffer::~CompressedWriteBuffer()
 {
-    try
-    {
-        next();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    next();
 }
 
 }
diff --git a/tests/queries/0_stateless/01443_merge_truncate.reference b/src/Coordination/CMakeLists.txt
similarity index 100%
rename from tests/queries/0_stateless/01443_merge_truncate.reference
rename to src/Coordination/CMakeLists.txt
diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
new file mode 100644
index 00000000000..cd46817e82f
--- /dev/null
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -0,0 +1,35 @@
+#include <Coordination/CoordinationSettings.h>
+#include <Core/Settings.h>
+#include <common/logger_useful.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_SETTING;
+}
+
+IMPLEMENT_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
+
+void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config)
+{
+    if (!config.has(config_elem))
+        return;
+
+    Poco::Util::AbstractConfiguration::Keys config_keys;
+    config.keys(config_elem, config_keys);
+
+    try
+    {
+        for (const String & key : config_keys)
+            set(key, config.getString(config_elem + "." + key));
+    }
+    catch (Exception & e)
+    {
+        if (e.code() == ErrorCodes::UNKNOWN_SETTING)
+            e.addMessage("in Coordination settings config");
+        throw;
+    }
+}
+
+}
diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h
new file mode 100644
index 00000000000..441e1a5936f
--- /dev/null
+++ b/src/Coordination/CoordinationSettings.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <Core/Defines.h>
+#include <Core/BaseSettings.h>
+#include <Core/SettingsEnums.h>
+#include <Common/ZooKeeper/ZooKeeperConstants.h>
+#include <Poco/Util/AbstractConfiguration.h>
+
+namespace DB
+{
+
+struct Settings;
+
+/** These settings represent fine tunes for internal details of Coordination storages
+  * and should not be changed by the user without a reason.
+  */
+
+#define LIST_OF_COORDINATION_SETTINGS(M) \
+    M(Milliseconds, session_timeout_ms, Coordination::DEFAULT_SESSION_TIMEOUT_MS, "Default client session timeout", 0) \
+    M(Milliseconds, operation_timeout_ms, Coordination::DEFAULT_OPERATION_TIMEOUT_MS, "Default client operation timeout", 0) \
+    M(Milliseconds, dead_session_check_period_ms, 500, "How often leader will check sessions to consider them dead and remove", 0) \
+    M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
+    M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
+    M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Lower bound of election timer (avoid too often leader elections)", 0) \
+    M(UInt64, reserved_log_items, 5000, "How many log items to store (don't remove during compaction)", 0) \
+    M(UInt64, snapshot_distance, 5000, "How many log items we have to collect to write new snapshot", 0) \
+    M(UInt64, max_stored_snapshots, 3, "How many snapshots we want to store", 0) \
+    M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
+    M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0) \
+    M(Milliseconds, startup_timeout, 30000, "How many time we will until RAFT to start", 0) \
+    M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0)
+
+DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
+
+
+struct CoordinationSettings : public BaseSettings<CoordinationSettingsTraits>
+{
+    void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config);
+};
+
+using CoordinationSettingsPtr = std::shared_ptr<CoordinationSettings>;
+
+}
diff --git a/src/Coordination/InMemoryLogStore.cpp b/src/Coordination/InMemoryLogStore.cpp
new file mode 100644
index 00000000000..101458891e7
--- /dev/null
+++ b/src/Coordination/InMemoryLogStore.cpp
@@ -0,0 +1,194 @@
+#include <Coordination/InMemoryLogStore.h>
+
+namespace DB
+{
+
+namespace
+{
+using namespace nuraft;
+ptr<log_entry> makeClone(const ptr<log_entry> & entry)
+{
+    ptr<log_entry> clone = cs_new<log_entry>(entry->get_term(), buffer::clone(entry->get_buf()), entry->get_val_type());
+    return clone;
+}
+}
+
+InMemoryLogStore::InMemoryLogStore()
+    : start_idx(1)
+{
+    nuraft::ptr<nuraft::buffer> buf = nuraft::buffer::alloc(sizeof(size_t));
+    logs[0] = nuraft::cs_new<nuraft::log_entry>(0, buf);
+}
+
+size_t InMemoryLogStore::start_index() const
+{
+    return start_idx;
+}
+
+size_t InMemoryLogStore::next_slot() const
+{
+    std::lock_guard<std::mutex> l(logs_lock);
+    // Exclude the dummy entry.
+    return start_idx + logs.size() - 1;
+}
+
+nuraft::ptr<nuraft::log_entry> InMemoryLogStore::last_entry() const
+{
+    size_t next_idx = next_slot();
+    std::lock_guard<std::mutex> lock(logs_lock);
+    auto entry = logs.find(next_idx - 1);
+    if (entry == logs.end())
+        entry = logs.find(0);
+
+    return makeClone(entry->second);
+}
+
+size_t InMemoryLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
+{
+    ptr<log_entry> clone = makeClone(entry);
+
+    std::lock_guard<std::mutex> l(logs_lock);
+    size_t idx = start_idx + logs.size() - 1;
+    logs[idx] = clone;
+    return idx;
+}
+
+void InMemoryLogStore::write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry)
+{
+    nuraft::ptr<log_entry> clone = makeClone(entry);
+
+    // Discard all logs equal to or greater than `index.
+    std::lock_guard<std::mutex> l(logs_lock);
+    auto itr = logs.lower_bound(index);
+    while (itr != logs.end())
+        itr = logs.erase(itr);
+    logs[index] = clone;
+}
+
+nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> InMemoryLogStore::log_entries(size_t start, size_t end)
+{
+    nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> ret =
+        nuraft::cs_new<std::vector<nuraft::ptr<nuraft::log_entry>>>();
+
+    ret->resize(end - start);
+    size_t cc = 0;
+    for (size_t ii = start; ii < end; ++ii)
+    {
+        nuraft::ptr<nuraft::log_entry> src = nullptr;
+        {
+            std::lock_guard<std::mutex> l(logs_lock);
+            auto entry = logs.find(ii);
+            if (entry == logs.end())
+            {
+                entry = logs.find(0);
+                assert(0);
+            }
+            src = entry->second;
+        }
+        (*ret)[cc++] = makeClone(src);
+    }
+    return ret;
+}
+
+nuraft::ptr<nuraft::log_entry> InMemoryLogStore::entry_at(size_t index)
+{
+    nuraft::ptr<nuraft::log_entry> src = nullptr;
+    {
+        std::lock_guard<std::mutex> l(logs_lock);
+        auto entry = logs.find(index);
+        if (entry == logs.end())
+            entry = logs.find(0);
+        src = entry->second;
+    }
+    return makeClone(src);
+}
+
+size_t InMemoryLogStore::term_at(size_t index)
+{
+    size_t term = 0;
+    {
+        std::lock_guard<std::mutex> l(logs_lock);
+        auto entry = logs.find(index);
+        if (entry == logs.end())
+            entry = logs.find(0);
+        term = entry->second->get_term();
+    }
+    return term;
+}
+
+nuraft::ptr<nuraft::buffer> InMemoryLogStore::pack(size_t index, Int32 cnt)
+{
+    std::vector<nuraft::ptr<nuraft::buffer>> returned_logs;
+
+    size_t size_total = 0;
+    for (size_t ii = index; ii < index + cnt; ++ii)
+    {
+        ptr<log_entry> le = nullptr;
+        {
+            std::lock_guard<std::mutex> l(logs_lock);
+            le = logs[ii];
+        }
+        assert(le.get());
+        nuraft::ptr<nuraft::buffer> buf = le->serialize();
+        size_total += buf->size();
+        returned_logs.push_back(buf);
+    }
+
+    nuraft::ptr<buffer> buf_out = nuraft::buffer::alloc(sizeof(int32) + cnt * sizeof(int32) + size_total);
+    buf_out->pos(0);
+    buf_out->put(static_cast<Int32>(cnt));
+
+    for (auto & entry : returned_logs)
+    {
+        nuraft::ptr<nuraft::buffer> & bb = entry;
+        buf_out->put(static_cast<Int32>(bb->size()));
+        buf_out->put(*bb);
+    }
+    return buf_out;
+}
+
+void InMemoryLogStore::apply_pack(size_t index, nuraft::buffer & pack)
+{
+    pack.pos(0);
+    Int32 num_logs = pack.get_int();
+
+    for (Int32 ii = 0; ii < num_logs; ++ii)
+    {
+        size_t cur_idx = index + ii;
+        Int32 buf_size = pack.get_int();
+
+        nuraft::ptr<nuraft::buffer> buf_local = nuraft::buffer::alloc(buf_size);
+        pack.get(buf_local);
+
+        nuraft::ptr<nuraft::log_entry> le = nuraft::log_entry::deserialize(*buf_local);
+        {
+            std::lock_guard<std::mutex> l(logs_lock);
+            logs[cur_idx] = le;
+        }
+    }
+
+    {
+        std::lock_guard<std::mutex> l(logs_lock);
+        auto entry = logs.upper_bound(0);
+        if (entry != logs.end())
+            start_idx = entry->first;
+        else
+            start_idx = 1;
+    }
+}
+
+bool InMemoryLogStore::compact(size_t last_log_index)
+{
+    std::lock_guard<std::mutex> l(logs_lock);
+    for (size_t ii = start_idx; ii <= last_log_index; ++ii)
+    {
+        auto entry = logs.find(ii);
+        if (entry != logs.end())
+            logs.erase(entry);
+    }
+
+    start_idx = last_log_index + 1;
+    return true;
+}
+
+}
diff --git a/src/Coordination/InMemoryLogStore.h b/src/Coordination/InMemoryLogStore.h
new file mode 100644
index 00000000000..425b056a81d
--- /dev/null
+++ b/src/Coordination/InMemoryLogStore.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <atomic>
+#include <map>
+#include <mutex>
+#include <Core/Types.h>
+#include <libnuraft/log_store.hxx> // Y_IGNORE
+
+namespace DB
+{
+
+class InMemoryLogStore : public nuraft::log_store
+{
+public:
+    InMemoryLogStore();
+
+    size_t start_index() const override;
+
+    size_t next_slot() const override;
+
+    nuraft::ptr<nuraft::log_entry> last_entry() const override;
+
+    size_t append(nuraft::ptr<nuraft::log_entry> & entry) override;
+
+    void write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry) override;
+
+    nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> log_entries(size_t start, size_t end) override;
+
+    nuraft::ptr<nuraft::log_entry> entry_at(size_t index) override;
+
+    size_t term_at(size_t index) override;
+
+    nuraft::ptr<nuraft::buffer> pack(size_t index, Int32 cnt) override;
+
+    void apply_pack(size_t index, nuraft::buffer & pack) override;
+
+    bool compact(size_t last_log_index) override;
+
+    bool flush() override { return true; }
+
+private:
+    std::map<size_t, nuraft::ptr<nuraft::log_entry>> logs;
+    mutable std::mutex logs_lock;
+    std::atomic<size_t> start_idx;
+};
+
+}
diff --git a/src/Coordination/InMemoryStateManager.cpp b/src/Coordination/InMemoryStateManager.cpp
new file mode 100644
index 00000000000..69e93578cc1
--- /dev/null
+++ b/src/Coordination/InMemoryStateManager.cpp
@@ -0,0 +1,78 @@
+#include <Coordination/InMemoryStateManager.h>
+#include <Common/Exception.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int RAFT_ERROR;
+}
+
+InMemoryStateManager::InMemoryStateManager(int server_id_, const std::string & host, int port)
+    : my_server_id(server_id_)
+    , my_port(port)
+    , log_store(nuraft::cs_new<InMemoryLogStore>())
+    , cluster_config(nuraft::cs_new<nuraft::cluster_config>())
+{
+    auto peer_config = nuraft::cs_new<nuraft::srv_config>(my_server_id, host + ":" + std::to_string(port));
+    cluster_config->get_servers().push_back(peer_config);
+}
+
+InMemoryStateManager::InMemoryStateManager(
+    int my_server_id_,
+    const std::string & config_prefix,
+    const Poco::Util::AbstractConfiguration & config)
+    : my_server_id(my_server_id_)
+    , log_store(nuraft::cs_new<InMemoryLogStore>())
+    , cluster_config(nuraft::cs_new<nuraft::cluster_config>())
+{
+    Poco::Util::AbstractConfiguration::Keys keys;
+    config.keys(config_prefix, keys);
+
+    for (const auto & server_key : keys)
+    {
+        std::string full_prefix = config_prefix + "." + server_key;
+        int server_id = config.getInt(full_prefix + ".id");
+        std::string hostname = config.getString(full_prefix + ".hostname");
+        int port = config.getInt(full_prefix + ".port");
+        bool can_become_leader = config.getBool(full_prefix + ".can_become_leader", true);
+        int32_t priority = config.getInt(full_prefix + ".priority", 1);
+        bool start_as_follower = config.getBool(full_prefix + ".start_as_follower", false);
+        if (start_as_follower)
+            start_as_follower_servers.insert(server_id);
+
+        auto endpoint = hostname + ":" + std::to_string(port);
+        auto peer_config = nuraft::cs_new<nuraft::srv_config>(server_id, 0, endpoint, "", !can_become_leader, priority);
+        if (server_id == my_server_id)
+        {
+            my_server_config = peer_config;
+            my_port = port;
+        }
+
+        cluster_config->get_servers().push_back(peer_config);
+    }
+    if (!my_server_config)
+        throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section");
+
+    if (start_as_follower_servers.size() == cluster_config->get_servers().size())
+        throw Exception(ErrorCodes::RAFT_ERROR, "At least one of servers should be able to start as leader (without <start_as_follower>)");
+}
+
+void InMemoryStateManager::save_config(const nuraft::cluster_config & config)
+{
+    // Just keep in memory in this example.
+    // Need to write to disk here, if want to make it durable.
+    nuraft::ptr<nuraft::buffer> buf = config.serialize();
+    cluster_config = nuraft::cluster_config::deserialize(*buf);
+}
+
+void InMemoryStateManager::save_state(const nuraft::srv_state & state)
+{
+     // Just keep in memory in this example.
+     // Need to write to disk here, if want to make it durable.
+     nuraft::ptr<nuraft::buffer> buf = state.serialize();
+     server_state = nuraft::srv_state::deserialize(*buf);
+ }
+
+}
diff --git a/src/Coordination/InMemoryStateManager.h b/src/Coordination/InMemoryStateManager.h
new file mode 100644
index 00000000000..2a5c2f00dba
--- /dev/null
+++ b/src/Coordination/InMemoryStateManager.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <string>
+#include <Coordination/InMemoryLogStore.h>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
+#include <Poco/Util/AbstractConfiguration.h>
+
+namespace DB
+{
+
+class InMemoryStateManager : public nuraft::state_mgr
+{
+public:
+    InMemoryStateManager(
+        int server_id_,
+        const std::string & config_prefix,
+        const Poco::Util::AbstractConfiguration & config);
+
+    InMemoryStateManager(
+        int server_id_,
+        const std::string & host,
+        int port);
+
+    nuraft::ptr<nuraft::cluster_config> load_config() override { return cluster_config; }
+
+    void save_config(const nuraft::cluster_config & config) override;
+
+    void save_state(const nuraft::srv_state & state) override;
+
+    nuraft::ptr<nuraft::srv_state> read_state() override { return server_state; }
+
+    nuraft::ptr<nuraft::log_store> load_log_store() override { return log_store; }
+
+    Int32 server_id() override { return my_server_id; }
+
+    nuraft::ptr<nuraft::srv_config> get_srv_config() const { return my_server_config; }
+
+    void system_exit(const int /* exit_code */) override {}
+
+    int getPort() const { return my_port; }
+
+    bool shouldStartAsFollower() const
+    {
+        return start_as_follower_servers.count(my_server_id);
+    }
+
+private:
+    int my_server_id;
+    int my_port;
+    std::unordered_set<int> start_as_follower_servers;
+    nuraft::ptr<InMemoryLogStore> log_store;
+    nuraft::ptr<nuraft::srv_config> my_server_config;
+    nuraft::ptr<nuraft::cluster_config> cluster_config;
+    nuraft::ptr<nuraft::srv_state> server_state;
+};
+
+}
diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h
new file mode 100644
index 00000000000..755b72c06cc
--- /dev/null
+++ b/src/Coordination/LoggerWrapper.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
+#include <common/logger_useful.h>
+#include <Core/SettingsEnums.h>
+
+namespace DB
+{
+
+class LoggerWrapper : public nuraft::logger
+{
+public:
+    LoggerWrapper(const std::string & name, LogsLevel level_)
+        : log(&Poco::Logger::get(name))
+        , level(static_cast<int>(level_))
+    {
+        log->setLevel(level);
+    }
+
+    void put_details(
+        int level_,
+        const char * /* source_file */,
+        const char * /* func_name */,
+        size_t /* line_number */,
+        const std::string & msg) override
+    {
+        LOG_IMPL(log, static_cast<DB::LogsLevel>(level_), static_cast<Poco::Message::Priority>(level_), msg);
+    }
+
+    void set_level(int level_) override
+    {
+        level_ = std::min(6, std::max(1, level_));
+        log->setLevel(level_);
+        level = level_;
+    }
+
+    int get_level() override
+    {
+        return level;
+    }
+
+private:
+    Poco::Logger * log;
+    std::atomic<int> level;
+};
+
+}
diff --git a/src/Coordination/NuKeeperCommon.h b/src/Coordination/NuKeeperCommon.h
new file mode 100644
index 00000000000..14fc612093c
--- /dev/null
+++ b/src/Coordination/NuKeeperCommon.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <Common/ZooKeeper/ZooKeeperCommon.h>
+
+namespace DB
+{
+
+struct NuKeeperRequest
+{
+    int64_t session_id;
+    Coordination::ZooKeeperRequestPtr request;
+};
+
+using NuKeeperRequests = std::vector<NuKeeperRequest>;
+
+struct NuKeeperResponse
+{
+    int64_t session_id;
+    Coordination::ZooKeeperRequestPtr response;
+};
+
+using NuKeeperResponses = std::vector<NuKeeperResponse>;
+
+}
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
new file mode 100644
index 00000000000..7464a06e86f
--- /dev/null
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -0,0 +1,182 @@
+#include <Coordination/NuKeeperServer.h>
+#include <Coordination/LoggerWrapper.h>
+#include <Coordination/NuKeeperStateMachine.h>
+#include <Coordination/InMemoryStateManager.h>
+#include <Coordination/WriteBufferFromNuraftBuffer.h>
+#include <Coordination/ReadBufferFromNuraftBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <chrono>
+#include <Common/ZooKeeper/ZooKeeperIO.h>
+#include <string>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int RAFT_ERROR;
+}
+
+NuKeeperServer::NuKeeperServer(
+    int server_id_,
+    const CoordinationSettingsPtr & coordination_settings_,
+    const Poco::Util::AbstractConfiguration & config,
+    ResponsesQueue & responses_queue_)
+    : server_id(server_id_)
+    , coordination_settings(coordination_settings_)
+    , state_machine(nuraft::cs_new<NuKeeperStateMachine>(responses_queue_, coordination_settings))
+    , state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, "test_keeper_server.raft_configuration", config))
+    , responses_queue(responses_queue_)
+{
+}
+
+void NuKeeperServer::startup()
+{
+    nuraft::raft_params params;
+    params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
+    params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds();
+    params.election_timeout_upper_bound_ = coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds();
+    params.reserved_log_items_ = coordination_settings->reserved_log_items;
+    params.snapshot_distance_ = coordination_settings->snapshot_distance;
+    params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds();
+    params.auto_forwarding_ = coordination_settings->auto_forwarding;
+    params.auto_forwarding_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds() * 2;
+
+    params.return_method_ = nuraft::raft_params::blocking;
+
+    nuraft::asio_service::options asio_opts{};
+    nuraft::raft_server::init_options init_options;
+    init_options.skip_initial_election_timeout_ = state_manager->shouldStartAsFollower();
+    init_options.raft_callback_ = [this] (nuraft::cb_func::Type type, nuraft::cb_func::Param * param)
+    {
+        return callbackFunc(type, param);
+    };
+
+    raft_instance = launcher.init(
+        state_machine, state_manager, nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level), state_manager->getPort(),
+        asio_opts, params, init_options);
+
+    if (!raft_instance)
+        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
+}
+
+void NuKeeperServer::shutdown()
+{
+    state_machine->shutdownStorage();
+    if (!launcher.shutdown(coordination_settings->shutdown_timeout.totalSeconds()))
+        LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Failed to shutdown RAFT server in {} seconds", 5);
+}
+
+namespace
+{
+
+nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
+{
+    DB::WriteBufferFromNuraftBuffer buf;
+    DB::writeIntBinary(session_id, buf);
+    request->write(buf);
+    return buf.getBuffer();
+}
+
+}
+
+void NuKeeperServer::putRequest(const NuKeeperStorage::RequestForSession & request_for_session)
+{
+    auto [session_id, request] = request_for_session;
+    if (isLeaderAlive() && request->isReadRequest())
+    {
+        state_machine->processReadRequest(request_for_session);
+    }
+    else
+    {
+        std::vector<nuraft::ptr<nuraft::buffer>> entries;
+        entries.push_back(getZooKeeperLogEntry(session_id, request));
+
+        std::lock_guard lock(append_entries_mutex);
+
+        auto result = raft_instance->append_entries(entries);
+        if (!result->get_accepted())
+        {
+            NuKeeperStorage::ResponsesForSessions responses;
+            auto response = request->makeResponse();
+            response->xid = request->xid;
+            response->zxid = 0;
+            response->error = Coordination::Error::ZOPERATIONTIMEOUT;
+            responses_queue.push(DB::NuKeeperStorage::ResponseForSession{session_id, response});
+        }
+
+        if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
+        {
+            NuKeeperStorage::ResponsesForSessions responses;
+            auto response = request->makeResponse();
+            response->xid = request->xid;
+            response->zxid = 0;
+            response->error = Coordination::Error::ZOPERATIONTIMEOUT;
+            responses_queue.push(DB::NuKeeperStorage::ResponseForSession{session_id, response});
+        }
+        else if (result->get_result_code() != nuraft::cmd_result_code::OK)
+            throw Exception(ErrorCodes::RAFT_ERROR, "Requests result failed with code {} and message: '{}'", result->get_result_code(), result->get_result_str());
+    }
+}
+
+int64_t NuKeeperServer::getSessionID(int64_t session_timeout_ms)
+{
+    auto entry = nuraft::buffer::alloc(sizeof(int64_t));
+    /// Just special session request
+    nuraft::buffer_serializer bs(entry);
+    bs.put_i64(session_timeout_ms);
+
+    std::lock_guard lock(append_entries_mutex);
+
+    auto result = raft_instance->append_entries({entry});
+
+    if (!result->get_accepted())
+        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send session_id request to RAFT");
+
+    if (result->get_result_code() != nuraft::cmd_result_code::OK)
+        throw Exception(ErrorCodes::RAFT_ERROR, "session_id request failed to RAFT");
+
+    auto resp = result->get();
+    if (resp == nullptr)
+        throw Exception(ErrorCodes::RAFT_ERROR, "Received nullptr as session_id");
+
+    nuraft::buffer_serializer bs_resp(resp);
+    return bs_resp.get_i64();
+}
+
+bool NuKeeperServer::isLeader() const
+{
+    return raft_instance->is_leader();
+}
+
+bool NuKeeperServer::isLeaderAlive() const
+{
+    return raft_instance->is_leader_alive();
+}
+
+nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */)
+{
+    if (type == nuraft::cb_func::Type::BecomeFresh || type == nuraft::cb_func::Type::BecomeLeader)
+    {
+        std::unique_lock lock(initialized_mutex);
+        initialized_flag = true;
+        initialized_cv.notify_all();
+    }
+    return nuraft::cb_func::ReturnCode::Ok;
+}
+
+void NuKeeperServer::waitInit()
+{
+    std::unique_lock lock(initialized_mutex);
+    int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds();
+    if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag; }))
+        throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization");
+}
+
+std::unordered_set<int64_t> NuKeeperServer::getDeadSessions()
+{
+    return state_machine->getDeadSessions();
+}
+
+}
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
new file mode 100644
index 00000000000..a8d269eb9eb
--- /dev/null
+++ b/src/Coordination/NuKeeperServer.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
+#include <Coordination/InMemoryLogStore.h>
+#include <Coordination/InMemoryStateManager.h>
+#include <Coordination/NuKeeperStateMachine.h>
+#include <Coordination/NuKeeperStorage.h>
+#include <Coordination/CoordinationSettings.h>
+#include <unordered_map>
+
+namespace DB
+{
+
+class NuKeeperServer
+{
+private:
+    int server_id;
+
+    CoordinationSettingsPtr coordination_settings;
+
+    nuraft::ptr<NuKeeperStateMachine> state_machine;
+
+    nuraft::ptr<InMemoryStateManager> state_manager;
+
+    nuraft::raft_launcher launcher;
+
+    nuraft::ptr<nuraft::raft_server> raft_instance;
+
+    std::mutex append_entries_mutex;
+
+    ResponsesQueue & responses_queue;
+
+    std::mutex initialized_mutex;
+    bool initialized_flag = false;
+    std::condition_variable initialized_cv;
+
+    nuraft::cb_func::ReturnCode callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param);
+
+public:
+    NuKeeperServer(
+        int server_id_,
+        const CoordinationSettingsPtr & coordination_settings_,
+        const Poco::Util::AbstractConfiguration & config,
+        ResponsesQueue & responses_queue_);
+
+    void startup();
+
+    void putRequest(const NuKeeperStorage::RequestForSession & request);
+
+    int64_t getSessionID(int64_t session_timeout_ms);
+
+    std::unordered_set<int64_t> getDeadSessions();
+
+    bool isLeader() const;
+
+    bool isLeaderAlive() const;
+
+    void waitInit();
+
+    void shutdown();
+};
+
+}
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
new file mode 100644
index 00000000000..0061645c75c
--- /dev/null
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -0,0 +1,262 @@
+#include <Coordination/NuKeeperStateMachine.h>
+#include <Coordination/ReadBufferFromNuraftBuffer.h>
+#include <Coordination/WriteBufferFromNuraftBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <Common/ZooKeeper/ZooKeeperIO.h>
+#include <Coordination/NuKeeperStorageSerializer.h>
+
+namespace DB
+{
+
+NuKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
+{
+    ReadBufferFromNuraftBuffer buffer(data);
+    NuKeeperStorage::RequestForSession request_for_session;
+    readIntBinary(request_for_session.session_id, buffer);
+
+    int32_t length;
+    Coordination::read(length, buffer);
+
+    int32_t xid;
+    Coordination::read(xid, buffer);
+
+    Coordination::OpNum opnum;
+    Coordination::read(opnum, buffer);
+
+    request_for_session.request = Coordination::ZooKeeperRequestFactory::instance().get(opnum);
+    request_for_session.request->xid = xid;
+    request_for_session.request->readImpl(buffer);
+    return request_for_session;
+}
+
+nuraft::ptr<nuraft::buffer> writeResponses(NuKeeperStorage::ResponsesForSessions & responses)
+{
+    WriteBufferFromNuraftBuffer buffer;
+    for (const auto & response_and_session : responses)
+    {
+        writeIntBinary(response_and_session.session_id, buffer);
+        response_and_session.response->write(buffer);
+    }
+    return buffer.getBuffer();
+}
+
+
+NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, const CoordinationSettingsPtr & coordination_settings_)
+    : coordination_settings(coordination_settings_)
+    , storage(coordination_settings->dead_session_check_period_ms.totalMilliseconds())
+    , responses_queue(responses_queue_)
+    , last_committed_idx(0)
+    , log(&Poco::Logger::get("NuRaftStateMachine"))
+{
+    LOG_DEBUG(log, "Created nukeeper state machine");
+}
+
+nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
+{
+    if (data.size() == sizeof(int64_t))
+    {
+        nuraft::buffer_serializer timeout_data(data);
+        int64_t session_timeout_ms = timeout_data.get_i64();
+        auto response = nuraft::buffer::alloc(sizeof(int64_t));
+        int64_t session_id;
+        nuraft::buffer_serializer bs(response);
+        {
+            std::lock_guard lock(storage_lock);
+            session_id = storage.getSessionID(session_timeout_ms);
+            bs.put_i64(session_id);
+        }
+        LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_timeout_ms);
+        last_committed_idx = log_idx;
+        return response;
+    }
+    else
+    {
+        auto request_for_session = parseRequest(data);
+        NuKeeperStorage::ResponsesForSessions responses_for_sessions;
+        {
+            std::lock_guard lock(storage_lock);
+            responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id);
+            for (auto & response_for_session : responses_for_sessions)
+                responses_queue.push(response_for_session);
+        }
+
+        last_committed_idx = log_idx;
+        return nullptr;
+    }
+}
+
+bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
+{
+    LOG_DEBUG(log, "Applying snapshot {}", s.get_last_log_idx());
+    StorageSnapshotPtr snapshot;
+    {
+        std::lock_guard<std::mutex> lock(snapshots_lock);
+        auto entry = snapshots.find(s.get_last_log_idx());
+        if (entry == snapshots.end())
+            return false;
+        snapshot = entry->second;
+    }
+    std::lock_guard lock(storage_lock);
+    storage = snapshot->storage;
+    last_committed_idx = s.get_last_log_idx();
+    return true;
+}
+
+nuraft::ptr<nuraft::snapshot> NuKeeperStateMachine::last_snapshot()
+{
+   // Just return the latest snapshot.
+    std::lock_guard<std::mutex> lock(snapshots_lock);
+    auto entry = snapshots.rbegin();
+    if (entry == snapshots.rend())
+        return nullptr;
+
+    return entry->second->snapshot;
+}
+
+NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::createSnapshotInternal(nuraft::snapshot & s)
+{
+    nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
+    nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
+    std::lock_guard lock(storage_lock);
+    return std::make_shared<NuKeeperStateMachine::StorageSnapshot>(ss, storage);
+}
+
+NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::readSnapshot(nuraft::snapshot & s, nuraft::buffer & in)
+{
+    nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
+    nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
+    NuKeeperStorageSerializer serializer;
+
+    ReadBufferFromNuraftBuffer reader(in);
+    NuKeeperStorage new_storage(coordination_settings->dead_session_check_period_ms.totalMilliseconds());
+    serializer.deserialize(new_storage, reader);
+    return std::make_shared<StorageSnapshot>(ss, new_storage);
+}
+
+
+void NuKeeperStateMachine::writeSnapshot(const NuKeeperStateMachine::StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out)
+{
+    NuKeeperStorageSerializer serializer;
+
+    WriteBufferFromNuraftBuffer writer;
+    serializer.serialize(snapshot->storage, writer);
+    out = writer.getBuffer();
+}
+
+void NuKeeperStateMachine::create_snapshot(
+    nuraft::snapshot & s,
+    nuraft::async_result<bool>::handler_type & when_done)
+{
+    LOG_DEBUG(log, "Creating snapshot {}", s.get_last_log_idx());
+    auto snapshot = createSnapshotInternal(s);
+    {
+        std::lock_guard<std::mutex> lock(snapshots_lock);
+        snapshots[s.get_last_log_idx()] = snapshot;
+        size_t num = snapshots.size();
+        if (num > coordination_settings->max_stored_snapshots)
+        {
+            auto entry = snapshots.begin();
+
+            for (size_t i = 0; i < num - coordination_settings->max_stored_snapshots; ++i)
+            {
+                if (entry == snapshots.end())
+                    break;
+                entry = snapshots.erase(entry);
+            }
+        }
+
+    }
+
+    LOG_DEBUG(log, "Created snapshot {}", s.get_last_log_idx());
+    nuraft::ptr<std::exception> except(nullptr);
+    bool ret = true;
+    when_done(ret, except);
+}
+
+void NuKeeperStateMachine::save_logical_snp_obj(
+    nuraft::snapshot & s,
+    size_t & obj_id,
+    nuraft::buffer & data,
+    bool /*is_first_obj*/,
+    bool /*is_last_obj*/)
+{
+    LOG_DEBUG(log, "Saving snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
+
+    if (obj_id == 0)
+    {
+        auto new_snapshot = createSnapshotInternal(s);
+        std::lock_guard<std::mutex> lock(snapshots_lock);
+        snapshots.try_emplace(s.get_last_log_idx(), std::move(new_snapshot));
+    }
+    else
+    {
+        auto received_snapshot = readSnapshot(s, data);
+
+        std::lock_guard<std::mutex> lock(snapshots_lock);
+        snapshots[s.get_last_log_idx()] = std::move(received_snapshot);
+    }
+
+    obj_id++;
+}
+
+int NuKeeperStateMachine::read_logical_snp_obj(
+    nuraft::snapshot & s,
+    void* & /*user_snp_ctx*/,
+    ulong obj_id,
+    nuraft::ptr<nuraft::buffer> & data_out,
+    bool & is_last_obj)
+{
+
+    LOG_DEBUG(log, "Reading snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
+    StorageSnapshotPtr required_snapshot;
+    {
+        std::lock_guard<std::mutex> lock(snapshots_lock);
+        auto entry = snapshots.find(s.get_last_log_idx());
+        if (entry == snapshots.end())
+        {
+            // Snapshot doesn't exist.
+            data_out = nullptr;
+            is_last_obj = true;
+            return 0;
+        }
+        required_snapshot = entry->second;
+    }
+
+    if (obj_id == 0)
+    {
+        auto new_snapshot = createSnapshotInternal(s);
+        writeSnapshot(new_snapshot, data_out);
+        is_last_obj = false;
+    }
+    else
+    {
+        writeSnapshot(required_snapshot, data_out);
+        is_last_obj = true;
+    }
+    return 0;
+}
+
+void NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session)
+{
+    NuKeeperStorage::ResponsesForSessions responses;
+    {
+        std::lock_guard lock(storage_lock);
+        responses = storage.processRequest(request_for_session.request, request_for_session.session_id);
+    }
+    for (const auto & response : responses)
+        responses_queue.push(response);
+}
+
+std::unordered_set<int64_t> NuKeeperStateMachine::getDeadSessions()
+{
+    std::lock_guard lock(storage_lock);
+    return storage.getDeadSessions();
+}
+
+void NuKeeperStateMachine::shutdownStorage()
+{
+    std::lock_guard lock(storage_lock);
+    storage.finalize();
+}
+
+}
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
new file mode 100644
index 00000000000..87748db20a5
--- /dev/null
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -0,0 +1,99 @@
+#pragma once
+
+#include <Coordination/NuKeeperStorage.h>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
+#include <common/logger_useful.h>
+#include <Coordination/ThreadSafeQueue.h>
+#include <Coordination/CoordinationSettings.h>
+
+namespace DB
+{
+
+using ResponsesQueue = ThreadSafeQueue<NuKeeperStorage::ResponseForSession>;
+
+class NuKeeperStateMachine : public nuraft::state_machine
+{
+public:
+    NuKeeperStateMachine(ResponsesQueue & responses_queue_, const CoordinationSettingsPtr & coordination_settings_);
+
+    nuraft::ptr<nuraft::buffer> pre_commit(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
+
+    nuraft::ptr<nuraft::buffer> commit(const size_t log_idx, nuraft::buffer & data) override;
+
+    void rollback(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override {}
+
+    size_t last_commit_index() override { return last_committed_idx; }
+
+    bool apply_snapshot(nuraft::snapshot & s) override;
+
+    nuraft::ptr<nuraft::snapshot> last_snapshot() override;
+
+    void create_snapshot(
+        nuraft::snapshot & s,
+        nuraft::async_result<bool>::handler_type & when_done) override;
+
+    void save_logical_snp_obj(
+        nuraft::snapshot & s,
+        size_t & obj_id,
+        nuraft::buffer & data,
+        bool is_first_obj,
+        bool is_last_obj) override;
+
+    int read_logical_snp_obj(
+        nuraft::snapshot & s,
+        void* & user_snp_ctx,
+        ulong obj_id,
+        nuraft::ptr<nuraft::buffer> & data_out,
+        bool & is_last_obj) override;
+
+    NuKeeperStorage & getStorage()
+    {
+        return storage;
+    }
+
+    void processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session);
+
+    std::unordered_set<int64_t> getDeadSessions();
+
+    void shutdownStorage();
+
+private:
+    struct StorageSnapshot
+    {
+        StorageSnapshot(const nuraft::ptr<nuraft::snapshot> & s, const NuKeeperStorage & storage_)
+            : snapshot(s)
+            , storage(storage_)
+        {}
+
+        nuraft::ptr<nuraft::snapshot> snapshot;
+        NuKeeperStorage storage;
+    };
+
+    using StorageSnapshotPtr = std::shared_ptr<StorageSnapshot>;
+
+    StorageSnapshotPtr createSnapshotInternal(nuraft::snapshot & s);
+
+    StorageSnapshotPtr readSnapshot(nuraft::snapshot & s, nuraft::buffer & in);
+
+    static void writeSnapshot(const StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out);
+
+    CoordinationSettingsPtr coordination_settings;
+
+    NuKeeperStorage storage;
+
+    ResponsesQueue & responses_queue;
+    /// Mutex for snapshots
+    std::mutex snapshots_lock;
+
+    /// Lock for storage
+    std::mutex storage_lock;
+
+    /// Fake snapshot storage
+    std::map<uint64_t, StorageSnapshotPtr> snapshots;
+
+    /// Last committed Raft log number.
+    std::atomic<size_t> last_committed_idx;
+    Poco::Logger * log;
+};
+
+}
diff --git a/src/Common/ZooKeeper/TestKeeperStorage.cpp b/src/Coordination/NuKeeperStorage.cpp
similarity index 69%
rename from src/Common/ZooKeeper/TestKeeperStorage.cpp
rename to src/Coordination/NuKeeperStorage.cpp
index e364b0efca9..631f975cddc 100644
--- a/src/Common/ZooKeeper/TestKeeperStorage.cpp
+++ b/src/Coordination/NuKeeperStorage.cpp
@@ -1,4 +1,4 @@
-#include <Common/ZooKeeper/TestKeeperStorage.h>
+#include <Coordination/NuKeeperStorage.h>
 #include <Common/ZooKeeper/IKeeper.h>
 #include <Common/setThreadName.h>
 #include <mutex>
@@ -17,13 +17,6 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-}
-
-namespace zkutil
-{
-
-using namespace DB;
-
 static String parentPath(const String & path)
 {
     auto rslash_pos = path.rfind('/');
@@ -38,20 +31,20 @@ static String baseName(const String & path)
     return path.substr(rslash_pos + 1);
 }
 
-static TestKeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches, Coordination::Event event_type)
+static NuKeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches, Coordination::Event event_type)
 {
-    TestKeeperStorage::ResponsesForSessions result;
+    NuKeeperStorage::ResponsesForSessions result;
     auto it = watches.find(path);
     if (it != watches.end())
     {
         std::shared_ptr<Coordination::ZooKeeperWatchResponse> watch_response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
         watch_response->path = path;
-        watch_response->xid = -1;
+        watch_response->xid = Coordination::WATCH_XID;
         watch_response->zxid = -1;
         watch_response->type = event_type;
         watch_response->state = Coordination::State::CONNECTED;
         for (auto watcher_session : it->second)
-            result.push_back(TestKeeperStorage::ResponseForSession{watcher_session, watch_response});
+            result.push_back(NuKeeperStorage::ResponseForSession{watcher_session, watch_response});
 
         watches.erase(it);
     }
@@ -62,58 +55,69 @@ static TestKeeperStorage::ResponsesForSessions processWatchesImpl(const String &
     {
         std::shared_ptr<Coordination::ZooKeeperWatchResponse> watch_list_response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
         watch_list_response->path = parent_path;
-        watch_list_response->xid = -1;
+        watch_list_response->xid = Coordination::WATCH_XID;
         watch_list_response->zxid = -1;
         watch_list_response->type = Coordination::Event::CHILD;
         watch_list_response->state = Coordination::State::CONNECTED;
         for (auto watcher_session : it->second)
-            result.push_back(TestKeeperStorage::ResponseForSession{watcher_session, watch_list_response});
+            result.push_back(NuKeeperStorage::ResponseForSession{watcher_session, watch_list_response});
 
         list_watches.erase(it);
     }
     return result;
 }
 
-TestKeeperStorage::TestKeeperStorage()
+NuKeeperStorage::NuKeeperStorage(int64_t tick_time_ms)
+    : session_expiry_queue(tick_time_ms)
 {
     container.emplace("/", Node());
 }
 
 using Undo = std::function<void()>;
 
-struct TestKeeperStorageRequest
+struct NuKeeperStorageRequest
 {
     Coordination::ZooKeeperRequestPtr zk_request;
 
-    explicit TestKeeperStorageRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
+    explicit NuKeeperStorageRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
         : zk_request(zk_request_)
     {}
-    virtual std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const = 0;
-    virtual TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & /*watches*/, TestKeeperStorage::Watches & /*list_watches*/) const { return {}; }
+    virtual std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const = 0;
+    virtual NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & /*watches*/, NuKeeperStorage::Watches & /*list_watches*/) const { return {}; }
 
-    virtual ~TestKeeperStorageRequest() = default;
+    virtual ~NuKeeperStorageRequest() = default;
 };
 
-struct TestKeeperStorageHeartbeatRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageHeartbeatRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & /* container */, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & /* container */, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
     {
         return {zk_request->makeResponse(), {}};
     }
 };
 
-
-struct TestKeeperStorageCreateRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageSyncRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & /* container */, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
+    {
+        auto response = zk_request->makeResponse();
+        dynamic_cast<Coordination::ZooKeeperSyncResponse *>(response.get())->path = dynamic_cast<Coordination::ZooKeeperSyncRequest *>(zk_request.get())->path;
+        return {response, {}};
+    }
+};
 
-    TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
+struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
+{
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+
+    NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
     {
         return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED);
     }
 
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Undo undo;
@@ -138,8 +142,7 @@ struct TestKeeperStorageCreateRequest final : public TestKeeperStorageRequest
             }
             else
             {
-                TestKeeperStorage::Node created_node;
-                created_node.seq_num = 0;
+                NuKeeperStorage::Node created_node;
                 created_node.stat.czxid = zxid;
                 created_node.stat.mzxid = zxid;
                 created_node.stat.ctime = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1);
@@ -193,10 +196,10 @@ struct TestKeeperStorageCreateRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageGetRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageGetRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperGetResponse & response = dynamic_cast<Coordination::ZooKeeperGetResponse &>(*response_ptr);
@@ -218,10 +221,10 @@ struct TestKeeperStorageGetRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageRemoveRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t session_id) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t session_id) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperRemoveResponse & response = dynamic_cast<Coordination::ZooKeeperRemoveResponse &>(*response_ptr);
@@ -268,16 +271,16 @@ struct TestKeeperStorageRemoveRequest final : public TestKeeperStorageRequest
         return { response_ptr, undo };
     }
 
-    TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
+    NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
     {
         return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED);
     }
 };
 
-struct TestKeeperStorageExistsRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageExistsRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /* session_id */) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /* session_id */) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperExistsResponse & response = dynamic_cast<Coordination::ZooKeeperExistsResponse &>(*response_ptr);
@@ -298,10 +301,10 @@ struct TestKeeperStorageExistsRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageSetRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageSetRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t zxid, int64_t /* session_id */) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t zxid, int64_t /* session_id */) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperSetResponse & response = dynamic_cast<Coordination::ZooKeeperSetResponse &>(*response_ptr);
@@ -341,17 +344,17 @@ struct TestKeeperStorageSetRequest final : public TestKeeperStorageRequest
         return { response_ptr, undo };
     }
 
-    TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
+    NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
     {
         return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED);
     }
 
 };
 
-struct TestKeeperStorageListRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageListRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperListResponse & response = dynamic_cast<Coordination::ZooKeeperListResponse &>(*response_ptr);
@@ -387,10 +390,10 @@ struct TestKeeperStorageListRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageCheckRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageCheckRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperCheckResponse & response = dynamic_cast<Coordination::ZooKeeperCheckResponse &>(*response_ptr);
@@ -413,11 +416,11 @@ struct TestKeeperStorageCheckRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageMultiRequest final : public NuKeeperStorageRequest
 {
-    std::vector<TestKeeperStorageRequestPtr> concrete_requests;
-    explicit TestKeeperStorageMultiRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
-        : TestKeeperStorageRequest(zk_request_)
+    std::vector<NuKeeperStorageRequestPtr> concrete_requests;
+    explicit NuKeeperStorageMultiRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
+        : NuKeeperStorageRequest(zk_request_)
     {
         Coordination::ZooKeeperMultiRequest & request = dynamic_cast<Coordination::ZooKeeperMultiRequest &>(*zk_request);
         concrete_requests.reserve(request.requests.size());
@@ -427,26 +430,26 @@ struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
             auto sub_zk_request = std::dynamic_pointer_cast<Coordination::ZooKeeperRequest>(sub_request);
             if (sub_zk_request->getOpNum() == Coordination::OpNum::Create)
             {
-                concrete_requests.push_back(std::make_shared<TestKeeperStorageCreateRequest>(sub_zk_request));
+                concrete_requests.push_back(std::make_shared<NuKeeperStorageCreateRequest>(sub_zk_request));
             }
             else if (sub_zk_request->getOpNum() == Coordination::OpNum::Remove)
             {
-                concrete_requests.push_back(std::make_shared<TestKeeperStorageRemoveRequest>(sub_zk_request));
+                concrete_requests.push_back(std::make_shared<NuKeeperStorageRemoveRequest>(sub_zk_request));
             }
             else if (sub_zk_request->getOpNum() == Coordination::OpNum::Set)
             {
-                concrete_requests.push_back(std::make_shared<TestKeeperStorageSetRequest>(sub_zk_request));
+                concrete_requests.push_back(std::make_shared<NuKeeperStorageSetRequest>(sub_zk_request));
             }
             else if (sub_zk_request->getOpNum() == Coordination::OpNum::Check)
             {
-                concrete_requests.push_back(std::make_shared<TestKeeperStorageCheckRequest>(sub_zk_request));
+                concrete_requests.push_back(std::make_shared<NuKeeperStorageCheckRequest>(sub_zk_request));
             }
             else
                 throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum());
         }
     }
 
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperMultiResponse & response = dynamic_cast<Coordination::ZooKeeperMultiResponse &>(*response_ptr);
@@ -499,9 +502,9 @@ struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
         }
     }
 
-    TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
+    NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
     {
-        TestKeeperStorage::ResponsesForSessions result;
+        NuKeeperStorage::ResponsesForSessions result;
         for (const auto & generic_request : concrete_requests)
         {
             auto responses = generic_request->processWatches(watches, list_watches);
@@ -511,75 +514,49 @@ struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageCloseRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageCloseRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container &, TestKeeperStorage::Ephemerals &, int64_t, int64_t) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container &, NuKeeperStorage::Ephemerals &, int64_t, int64_t) const override
     {
         throw DB::Exception("Called process on close request", ErrorCodes::LOGICAL_ERROR);
     }
 };
 
-TestKeeperStorage::ResponsesForSessions TestKeeperStorage::finalize(const RequestsForSessions & expired_requests)
+void NuKeeperStorage::finalize()
 {
     if (finalized)
         throw DB::Exception("Testkeeper storage already finalized", ErrorCodes::LOGICAL_ERROR);
 
     finalized = true;
 
-    ResponsesForSessions finalize_results;
-    auto finish_watch = [] (const auto & watch_pair) -> ResponsesForSessions
-    {
-        ResponsesForSessions results;
-        std::shared_ptr<Coordination::ZooKeeperWatchResponse> response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
-        response->type = Coordination::SESSION;
-        response->state = Coordination::EXPIRED_SESSION;
-        response->error = Coordination::Error::ZSESSIONEXPIRED;
+    for (const auto & [session_id, ephemerals_paths] : ephemerals)
+        for (const String & ephemeral_path : ephemerals_paths)
+            container.erase(ephemeral_path);
 
-        for (auto & watcher_session : watch_pair.second)
-            results.push_back(ResponseForSession{watcher_session, response});
-        return results;
-    };
-
-    for (auto & path_watch : watches)
-    {
-        auto watch_responses = finish_watch(path_watch);
-        finalize_results.insert(finalize_results.end(), watch_responses.begin(), watch_responses.end());
-    }
+    ephemerals.clear();
 
     watches.clear();
-    for (auto & path_watch : list_watches)
-    {
-        auto list_watch_responses = finish_watch(path_watch);
-        finalize_results.insert(finalize_results.end(), list_watch_responses.begin(), list_watch_responses.end());
-    }
     list_watches.clear();
     sessions_and_watchers.clear();
-
-    for (const auto & [session_id, zk_request] : expired_requests)
-    {
-        auto response = zk_request->makeResponse();
-        response->error = Coordination::Error::ZSESSIONEXPIRED;
-        finalize_results.push_back(ResponseForSession{session_id, response});
-    }
-    return finalize_results;
+    session_expiry_queue.clear();
 }
 
 
-class TestKeeperWrapperFactory final : private boost::noncopyable
+class NuKeeperWrapperFactory final : private boost::noncopyable
 {
 
 public:
-    using Creator = std::function<TestKeeperStorageRequestPtr(const Coordination::ZooKeeperRequestPtr &)>;
+    using Creator = std::function<NuKeeperStorageRequestPtr(const Coordination::ZooKeeperRequestPtr &)>;
     using OpNumToRequest = std::unordered_map<Coordination::OpNum, Creator>;
 
-    static TestKeeperWrapperFactory & instance()
+    static NuKeeperWrapperFactory & instance()
     {
-        static TestKeeperWrapperFactory factory;
+        static NuKeeperWrapperFactory factory;
         return factory;
     }
 
-    TestKeeperStorageRequestPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const
+    NuKeeperStorageRequestPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const
     {
         auto it = op_num_to_request.find(zk_request->getOpNum());
         if (it == op_num_to_request.end())
@@ -596,36 +573,37 @@ public:
 
 private:
     OpNumToRequest op_num_to_request;
-    TestKeeperWrapperFactory();
+    NuKeeperWrapperFactory();
 };
 
 template<Coordination::OpNum num, typename RequestT>
-void registerTestKeeperRequestWrapper(TestKeeperWrapperFactory & factory)
+void registerNuKeeperRequestWrapper(NuKeeperWrapperFactory & factory)
 {
     factory.registerRequest(num, [] (const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared<RequestT>(zk_request); });
 }
 
 
-TestKeeperWrapperFactory::TestKeeperWrapperFactory()
+NuKeeperWrapperFactory::NuKeeperWrapperFactory()
 {
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Heartbeat, TestKeeperStorageHeartbeatRequest>(*this);
-    //registerTestKeeperRequestWrapper<Coordination::OpNum::Auth, TestKeeperStorageAuthRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Close, TestKeeperStorageCloseRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Create, TestKeeperStorageCreateRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Remove, TestKeeperStorageRemoveRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Exists, TestKeeperStorageExistsRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Get, TestKeeperStorageGetRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Set, TestKeeperStorageSetRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::List, TestKeeperStorageListRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::SimpleList, TestKeeperStorageListRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Check, TestKeeperStorageCheckRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Multi, TestKeeperStorageMultiRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Heartbeat, NuKeeperStorageHeartbeatRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Sync, NuKeeperStorageSyncRequest>(*this);
+    //registerNuKeeperRequestWrapper<Coordination::OpNum::Auth, NuKeeperStorageAuthRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Close, NuKeeperStorageCloseRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Create, NuKeeperStorageCreateRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Remove, NuKeeperStorageRemoveRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Exists, NuKeeperStorageExistsRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Get, NuKeeperStorageGetRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Set, NuKeeperStorageSetRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::List, NuKeeperStorageListRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::SimpleList, NuKeeperStorageListRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Check, NuKeeperStorageCheckRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Multi, NuKeeperStorageMultiRequest>(*this);
 }
 
 
-TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id)
+NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id)
 {
-    TestKeeperStorage::ResponsesForSessions results;
+    NuKeeperStorage::ResponsesForSessions results;
     if (zk_request->getOpNum() == Coordination::OpNum::Close)
     {
         auto it = ephemerals.find(session_id);
@@ -645,12 +623,24 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const
         auto response = std::make_shared<Coordination::ZooKeeperCloseResponse>();
         response->xid = zk_request->xid;
         response->zxid = getZXID();
+        session_expiry_queue.remove(session_id);
+        session_and_timeout.erase(session_id);
+        results.push_back(ResponseForSession{session_id, response});
+    }
+    else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat)
+    {
+        session_expiry_queue.update(session_id, session_and_timeout[session_id]);
+        NuKeeperStorageRequestPtr storage_request = NuKeeperWrapperFactory::instance().get(zk_request);
+        auto [response, _] = storage_request->process(container, ephemerals, zxid, session_id);
+        response->xid = zk_request->xid;
+        response->zxid = getZXID();
+
         results.push_back(ResponseForSession{session_id, response});
     }
     else
     {
 
-        TestKeeperStorageRequestPtr storage_request = TestKeeperWrapperFactory::instance().get(zk_request);
+        NuKeeperStorageRequestPtr storage_request = NuKeeperWrapperFactory::instance().get(zk_request);
         auto [response, _] = storage_request->process(container, ephemerals, zxid, session_id);
 
         if (zk_request->has_watch)
@@ -669,15 +659,6 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const
                 watches[zk_request->getPath()].emplace_back(session_id);
                 sessions_and_watchers[session_id].emplace(zk_request->getPath());
             }
-            else
-            {
-                std::shared_ptr<Coordination::ZooKeeperWatchResponse> watch_response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
-                watch_response->path = zk_request->getPath();
-                watch_response->xid = -1;
-                watch_response->error = response->error;
-                watch_response->type = Coordination::Event::NOTWATCHING;
-                results.push_back(ResponseForSession{session_id, watch_response});
-            }
         }
 
         if (response->error == Coordination::Error::ZOK)
@@ -696,7 +677,7 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const
 }
 
 
-void TestKeeperStorage::clearDeadWatches(int64_t session_id)
+void NuKeeperStorage::clearDeadWatches(int64_t session_id)
 {
     auto watches_it = sessions_and_watchers.find(session_id);
     if (watches_it != sessions_and_watchers.end())
diff --git a/src/Common/ZooKeeper/TestKeeperStorage.h b/src/Coordination/NuKeeperStorage.h
similarity index 65%
rename from src/Common/ZooKeeper/TestKeeperStorage.h
rename to src/Coordination/NuKeeperStorage.h
index 21b1ce16c32..20ab1982b4e 100644
--- a/src/Common/ZooKeeper/TestKeeperStorage.h
+++ b/src/Coordination/NuKeeperStorage.h
@@ -4,27 +4,28 @@
 #include <Common/ZooKeeper/IKeeper.h>
 #include <Common/ConcurrentBoundedQueue.h>
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
+#include <Coordination/SessionExpiryQueue.h>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 
-namespace zkutil
+namespace DB
 {
 
 using namespace DB;
-struct TestKeeperStorageRequest;
-using TestKeeperStorageRequestPtr = std::shared_ptr<TestKeeperStorageRequest>;
+struct NuKeeperStorageRequest;
+using NuKeeperStorageRequestPtr = std::shared_ptr<NuKeeperStorageRequest>;
 using ResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr &)>;
 
-class TestKeeperStorage
+class NuKeeperStorage
 {
 public:
-    std::atomic<int64_t> session_id_counter{0};
+    int64_t session_id_counter{0};
 
     struct Node
     {
         String data;
-        Coordination::ACLs acls;
+        Coordination::ACLs acls{};
         bool is_ephemeral = false;
         bool is_sequental = false;
         Coordination::Stat stat{};
@@ -50,6 +51,7 @@ public:
     using Container = std::map<std::string, Node>;
     using Ephemerals = std::unordered_map<int64_t, std::unordered_set<String>>;
     using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<String>>;
+    using SessionAndTimeout = std::unordered_map<int64_t, long>;
     using SessionIDs = std::vector<int64_t>;
 
     using Watches = std::map<String /* path, relative of root_path */, SessionIDs>;
@@ -57,9 +59,11 @@ public:
     Container container;
     Ephemerals ephemerals;
     SessionAndWatcher sessions_and_watchers;
+    SessionExpiryQueue session_expiry_queue;
+    SessionAndTimeout session_and_timeout;
 
-    std::atomic<int64_t> zxid{0};
-    std::atomic<bool> finalized{false};
+    int64_t zxid{0};
+    bool finalized{false};
 
     Watches watches;
     Watches list_watches;   /// Watches for 'list' request (watches on children).
@@ -68,18 +72,27 @@ public:
 
     int64_t getZXID()
     {
-        return zxid.fetch_add(1);
+        return zxid++;
     }
 
 public:
-    TestKeeperStorage();
+    NuKeeperStorage(int64_t tick_time_ms);
+
+    int64_t getSessionID(int64_t session_timeout_ms)
+    {
+        auto result = session_id_counter++;
+        session_and_timeout.emplace(result, session_timeout_ms);
+        session_expiry_queue.update(result, session_timeout_ms);
+        return result;
+    }
 
     ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
-    ResponsesForSessions finalize(const RequestsForSessions & expired_requests);
 
-    int64_t getSessionID()
+    void finalize();
+
+    std::unordered_set<int64_t> getDeadSessions()
     {
-        return session_id_counter.fetch_add(1);
+        return session_expiry_queue.getExpiredSessions();
     }
 };
 
diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
new file mode 100644
index 00000000000..570087757ad
--- /dev/null
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -0,0 +1,237 @@
+#include <Coordination/NuKeeperStorageDispatcher.h>
+#include <Common/setThreadName.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+
+    extern const int LOGICAL_ERROR;
+    extern const int TIMEOUT_EXCEEDED;
+}
+
+NuKeeperStorageDispatcher::NuKeeperStorageDispatcher()
+    : coordination_settings(std::make_shared<CoordinationSettings>())
+    , log(&Poco::Logger::get("NuKeeperDispatcher"))
+{
+}
+
+void NuKeeperStorageDispatcher::requestThread()
+{
+    setThreadName("NuKeeperReqT");
+    while (!shutdown_called)
+    {
+        NuKeeperStorage::RequestForSession request;
+
+        UInt64 max_wait = UInt64(coordination_settings->operation_timeout_ms.totalMilliseconds());
+
+        if (requests_queue.tryPop(request, max_wait))
+        {
+            if (shutdown_called)
+                break;
+
+            try
+            {
+                server->putRequest(request);
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
+        }
+    }
+}
+
+void NuKeeperStorageDispatcher::responseThread()
+{
+    setThreadName("NuKeeperRspT");
+    while (!shutdown_called)
+    {
+        NuKeeperStorage::ResponseForSession response_for_session;
+
+        UInt64 max_wait = UInt64(coordination_settings->operation_timeout_ms.totalMilliseconds());
+
+        if (responses_queue.tryPop(response_for_session, max_wait))
+        {
+            if (shutdown_called)
+                break;
+
+            try
+            {
+                 setResponse(response_for_session.session_id, response_for_session.response);
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
+        }
+    }
+}
+
+void NuKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
+{
+    std::lock_guard lock(session_to_response_callback_mutex);
+    auto session_writer = session_to_response_callback.find(session_id);
+    if (session_writer == session_to_response_callback.end())
+        return;
+
+    session_writer->second(response);
+    /// Session closed, no more writes
+    if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close)
+        session_to_response_callback.erase(session_writer);
+}
+
+bool NuKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
+{
+    {
+        std::lock_guard lock(session_to_response_callback_mutex);
+        if (session_to_response_callback.count(session_id) == 0)
+            return false;
+    }
+
+    NuKeeperStorage::RequestForSession request_info;
+    request_info.request = request;
+    request_info.session_id = session_id;
+
+    std::lock_guard lock(push_request_mutex);
+    /// Put close requests without timeouts
+    if (request->getOpNum() == Coordination::OpNum::Close)
+        requests_queue.push(std::move(request_info));
+    else if (!requests_queue.tryPush(std::move(request_info), coordination_settings->operation_timeout_ms.totalMilliseconds()))
+        throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
+    return true;
+}
+
+void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config)
+{
+    LOG_DEBUG(log, "Initializing storage dispatcher");
+    int myid = config.getInt("test_keeper_server.server_id");
+
+    coordination_settings->loadFromConfig("test_keeper_server.coordination_settings", config);
+
+    server = std::make_unique<NuKeeperServer>(myid, coordination_settings, config, responses_queue);
+    try
+    {
+        LOG_DEBUG(log, "Waiting server to initialize");
+        server->startup();
+        LOG_DEBUG(log, "Server initialized, waiting for quorum");
+
+        server->waitInit();
+        LOG_DEBUG(log, "Quorum initialized");
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+        throw;
+    }
+
+    request_thread = ThreadFromGlobalPool([this] { requestThread(); });
+    responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
+    session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); });
+
+    LOG_DEBUG(log, "Dispatcher initialized");
+}
+
+void NuKeeperStorageDispatcher::shutdown()
+{
+    try
+    {
+        {
+            std::lock_guard lock(push_request_mutex);
+
+            if (shutdown_called)
+                return;
+
+            LOG_DEBUG(log, "Shutting down storage dispatcher");
+            shutdown_called = true;
+
+            if (session_cleaner_thread.joinable())
+                session_cleaner_thread.join();
+
+            if (request_thread.joinable())
+                request_thread.join();
+
+            if (responses_thread.joinable())
+                responses_thread.join();
+        }
+
+        if (server)
+            server->shutdown();
+
+        NuKeeperStorage::RequestForSession request_for_session;
+        while (requests_queue.tryPop(request_for_session))
+        {
+            auto response = request_for_session.request->makeResponse();
+            response->error = Coordination::Error::ZSESSIONEXPIRED;
+            setResponse(request_for_session.session_id, response);
+        }
+        session_to_response_callback.clear();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
+
+    LOG_DEBUG(log, "Dispatcher shut down");
+}
+
+NuKeeperStorageDispatcher::~NuKeeperStorageDispatcher()
+{
+    shutdown();
+}
+
+void NuKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
+{
+    std::lock_guard lock(session_to_response_callback_mutex);
+    if (!session_to_response_callback.try_emplace(session_id, callback).second)
+        throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id);
+}
+
+void NuKeeperStorageDispatcher::sessionCleanerTask()
+{
+    while (true)
+    {
+        if (shutdown_called)
+            return;
+
+        try
+        {
+            if (isLeader())
+            {
+                auto dead_sessions = server->getDeadSessions();
+                for (int64_t dead_session : dead_sessions)
+                {
+                    LOG_INFO(log, "Found dead session {}, will try to close it", dead_session);
+                    Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
+                    request->xid = Coordination::CLOSE_XID;
+                    NuKeeperStorage::RequestForSession request_info;
+                    request_info.request = request;
+                    request_info.session_id = dead_session;
+                    {
+                        std::lock_guard lock(push_request_mutex);
+                        requests_queue.push(std::move(request_info));
+                    }
+                    finishSession(dead_session);
+                    LOG_INFO(log, "Dead session close request pushed");
+                }
+            }
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
+
+        std::this_thread::sleep_for(std::chrono::milliseconds(coordination_settings->dead_session_check_period_ms.totalMilliseconds()));
+    }
+}
+
+void NuKeeperStorageDispatcher::finishSession(int64_t session_id)
+{
+    std::lock_guard lock(session_to_response_callback_mutex);
+    auto session_it = session_to_response_callback.find(session_id);
+    if (session_it != session_to_response_callback.end())
+        session_to_response_callback.erase(session_it);
+}
+
+}
diff --git a/src/Coordination/NuKeeperStorageDispatcher.h b/src/Coordination/NuKeeperStorageDispatcher.h
new file mode 100644
index 00000000000..62144b92a7a
--- /dev/null
+++ b/src/Coordination/NuKeeperStorageDispatcher.h
@@ -0,0 +1,89 @@
+#pragma once
+
+#if !defined(ARCADIA_BUILD)
+#    include <Common/config.h>
+#    include "config_core.h"
+#endif
+
+#if USE_NURAFT
+
+#include <Common/ThreadPool.h>
+#include <Common/ConcurrentBoundedQueue.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/Exception.h>
+#include <common/logger_useful.h>
+#include <functional>
+#include <Coordination/NuKeeperServer.h>
+#include <Coordination/CoordinationSettings.h>
+
+
+namespace DB
+{
+
+using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr & response)>;
+
+class NuKeeperStorageDispatcher
+{
+
+private:
+    std::mutex push_request_mutex;
+
+    CoordinationSettingsPtr coordination_settings;
+    using RequestsQueue = ConcurrentBoundedQueue<NuKeeperStorage::RequestForSession>;
+    RequestsQueue requests_queue{1};
+    ResponsesQueue responses_queue;
+    std::atomic<bool> shutdown_called{false};
+    using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;
+
+    std::mutex session_to_response_callback_mutex;
+    SessionToResponseCallback session_to_response_callback;
+
+    ThreadFromGlobalPool request_thread;
+    ThreadFromGlobalPool responses_thread;
+
+    ThreadFromGlobalPool session_cleaner_thread;
+
+    std::unique_ptr<NuKeeperServer> server;
+
+    Poco::Logger * log;
+
+private:
+    void requestThread();
+    void responseThread();
+    void sessionCleanerTask();
+    void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
+
+public:
+    NuKeeperStorageDispatcher();
+
+    void initialize(const Poco::Util::AbstractConfiguration & config);
+
+    void shutdown();
+
+    ~NuKeeperStorageDispatcher();
+
+    bool putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
+
+    bool isLeader() const
+    {
+        return server->isLeader();
+    }
+
+    bool hasLeader() const
+    {
+        return server->isLeaderAlive();
+    }
+
+    int64_t getSessionID(long session_timeout_ms)
+    {
+        return server->getSessionID(session_timeout_ms);
+    }
+
+    void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
+    /// Call if we don't need any responses for this session no more (session was expired)
+    void finishSession(int64_t session_id);
+};
+
+}
+
+#endif
diff --git a/src/Coordination/NuKeeperStorageSerializer.cpp b/src/Coordination/NuKeeperStorageSerializer.cpp
new file mode 100644
index 00000000000..298df45cde0
--- /dev/null
+++ b/src/Coordination/NuKeeperStorageSerializer.cpp
@@ -0,0 +1,87 @@
+#include <Coordination/NuKeeperStorageSerializer.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <Common/ZooKeeper/ZooKeeperIO.h>
+
+namespace DB
+{
+
+namespace
+{
+    void writeNode(const NuKeeperStorage::Node & node, WriteBuffer & out)
+    {
+        Coordination::write(node.data, out);
+        Coordination::write(node.acls, out);
+        Coordination::write(node.is_ephemeral, out);
+        Coordination::write(node.is_sequental, out);
+        Coordination::write(node.stat, out);
+        Coordination::write(node.seq_num, out);
+    }
+
+    void readNode(NuKeeperStorage::Node & node, ReadBuffer & in)
+    {
+        Coordination::read(node.data, in);
+        Coordination::read(node.acls, in);
+        Coordination::read(node.is_ephemeral, in);
+        Coordination::read(node.is_sequental, in);
+        Coordination::read(node.stat, in);
+        Coordination::read(node.seq_num, in);
+    }
+}
+
+void NuKeeperStorageSerializer::serialize(const NuKeeperStorage & storage, WriteBuffer & out)
+{
+    Coordination::write(storage.zxid, out);
+    Coordination::write(storage.session_id_counter, out);
+    Coordination::write(storage.container.size(), out);
+    for (const auto & [path, node] : storage.container)
+    {
+        Coordination::write(path, out);
+        writeNode(node, out);
+    }
+    Coordination::write(storage.ephemerals.size(), out);
+    for (const auto & [session_id, paths] : storage.ephemerals)
+    {
+        Coordination::write(session_id, out);
+        Coordination::write(paths.size(), out);
+        for (const auto & path : paths)
+            Coordination::write(path, out);
+    }
+}
+
+void NuKeeperStorageSerializer::deserialize(NuKeeperStorage & storage, ReadBuffer & in)
+{
+    int64_t session_id_counter, zxid;
+    Coordination::read(zxid, in);
+    Coordination::read(session_id_counter, in);
+    storage.zxid = zxid;
+    storage.session_id_counter = session_id_counter;
+
+    size_t container_size;
+    Coordination::read(container_size, in);
+    while (storage.container.size() < container_size)
+    {
+        std::string path;
+        Coordination::read(path, in);
+        NuKeeperStorage::Node node;
+        readNode(node, in);
+        storage.container[path] = node;
+    }
+    size_t ephemerals_size;
+    Coordination::read(ephemerals_size, in);
+    while (storage.ephemerals.size() < ephemerals_size)
+    {
+        int64_t session_id;
+        size_t ephemerals_for_session;
+        Coordination::read(session_id, in);
+        Coordination::read(ephemerals_for_session, in);
+        while (storage.ephemerals[session_id].size() < ephemerals_for_session)
+        {
+            std::string ephemeral_path;
+            Coordination::read(ephemeral_path, in);
+            storage.ephemerals[session_id].emplace(ephemeral_path);
+        }
+    }
+}
+
+}
diff --git a/src/Coordination/NuKeeperStorageSerializer.h b/src/Coordination/NuKeeperStorageSerializer.h
new file mode 100644
index 00000000000..e54c65a739d
--- /dev/null
+++ b/src/Coordination/NuKeeperStorageSerializer.h
@@ -0,0 +1,17 @@
+#pragma once
+#include <Coordination/NuKeeperStorage.h>
+#include <IO/WriteBuffer.h>
+#include <IO/ReadBuffer.h>
+
+namespace DB
+{
+
+class NuKeeperStorageSerializer
+{
+public:
+    static void serialize(const NuKeeperStorage & storage, WriteBuffer & out);
+
+    static void deserialize(NuKeeperStorage & storage, ReadBuffer & in);
+};
+
+}
diff --git a/src/Coordination/ReadBufferFromNuraftBuffer.h b/src/Coordination/ReadBufferFromNuraftBuffer.h
new file mode 100644
index 00000000000..3817e217881
--- /dev/null
+++ b/src/Coordination/ReadBufferFromNuraftBuffer.h
@@ -0,0 +1,20 @@
+#pragma once
+#include <IO/ReadBufferFromMemory.h>
+
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
+
+namespace DB
+{
+
+class ReadBufferFromNuraftBuffer : public ReadBufferFromMemory
+{
+public:
+    explicit ReadBufferFromNuraftBuffer(nuraft::ptr<nuraft::buffer> buffer)
+        : ReadBufferFromMemory(buffer->data_begin(), buffer->size())
+    {}
+    explicit ReadBufferFromNuraftBuffer(nuraft::buffer & buffer)
+        : ReadBufferFromMemory(buffer.data_begin(), buffer.size())
+    {}
+};
+
+}
diff --git a/src/Coordination/SessionExpiryQueue.cpp b/src/Coordination/SessionExpiryQueue.cpp
new file mode 100644
index 00000000000..51837087af5
--- /dev/null
+++ b/src/Coordination/SessionExpiryQueue.cpp
@@ -0,0 +1,83 @@
+#include <Coordination/SessionExpiryQueue.h>
+#include <common/logger_useful.h>
+namespace DB
+{
+
+bool SessionExpiryQueue::remove(int64_t session_id)
+{
+    auto session_it = session_to_timeout.find(session_id);
+    if (session_it != session_to_timeout.end())
+    {
+        auto set_it = expiry_to_sessions.find(session_it->second);
+        if (set_it != expiry_to_sessions.end())
+            set_it->second.erase(session_id);
+
+        return true;
+    }
+
+    return false;
+}
+
+bool SessionExpiryQueue::update(int64_t session_id, int64_t timeout_ms)
+{
+    auto session_it = session_to_timeout.find(session_id);
+    int64_t now = getNowMilliseconds();
+    int64_t new_expiry_time = roundToNextInterval(now + timeout_ms);
+
+    if (session_it != session_to_timeout.end())
+    {
+        if (new_expiry_time == session_it->second)
+            return false;
+
+        auto set_it = expiry_to_sessions.find(new_expiry_time);
+        if (set_it == expiry_to_sessions.end())
+            std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set<int64_t>());
+
+        set_it->second.insert(session_id);
+        int64_t prev_expiry_time = session_it->second;
+
+        if (prev_expiry_time != new_expiry_time)
+        {
+            auto prev_set_it = expiry_to_sessions.find(prev_expiry_time);
+            if (prev_set_it != expiry_to_sessions.end())
+                prev_set_it->second.erase(session_id);
+        }
+        session_it->second = new_expiry_time;
+        return true;
+    }
+    else
+    {
+        session_to_timeout[session_id] = new_expiry_time;
+        auto set_it = expiry_to_sessions.find(new_expiry_time);
+        if (set_it == expiry_to_sessions.end())
+            std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set<int64_t>());
+        set_it->second.insert(session_id);
+        return false;
+    }
+}
+
+std::unordered_set<int64_t> SessionExpiryQueue::getExpiredSessions()
+{
+    int64_t now = getNowMilliseconds();
+    if (now < next_expiration_time)
+        return {};
+
+    auto set_it = expiry_to_sessions.find(next_expiration_time);
+    int64_t new_expiration_time = next_expiration_time + expiration_interval;
+    next_expiration_time = new_expiration_time;
+    if (set_it != expiry_to_sessions.end())
+    {
+        auto result = set_it->second;
+        expiry_to_sessions.erase(set_it);
+        return result;
+    }
+    return {};
+}
+
+void SessionExpiryQueue::clear()
+{
+    session_to_timeout.clear();
+    expiry_to_sessions.clear();
+}
+
+}
diff --git a/src/Coordination/SessionExpiryQueue.h b/src/Coordination/SessionExpiryQueue.h
new file mode 100644
index 00000000000..dff629a2432
--- /dev/null
+++ b/src/Coordination/SessionExpiryQueue.h
@@ -0,0 +1,45 @@
+#pragma once
+#include <unordered_map>
+#include <unordered_set>
+#include <chrono>
+
+namespace DB
+{
+
+class SessionExpiryQueue
+{
+private:
+    std::unordered_map<int64_t, int64_t> session_to_timeout;
+    std::unordered_map<int64_t, std::unordered_set<int64_t>> expiry_to_sessions;
+
+    int64_t expiration_interval;
+    int64_t next_expiration_time;
+
+    static int64_t getNowMilliseconds()
+    {
+        using namespace std::chrono;
+        return duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
+    }
+
+    int64_t roundToNextInterval(int64_t time) const
+    {
+        return (time / expiration_interval + 1) * expiration_interval;
+    }
+
+public:
+    explicit SessionExpiryQueue(int64_t expiration_interval_)
+        : expiration_interval(expiration_interval_)
+        , next_expiration_time(roundToNextInterval(getNowMilliseconds()))
+    {
+    }
+
+    bool remove(int64_t session_id);
+
+    bool update(int64_t session_id, int64_t timeout_ms);
+
+    std::unordered_set<int64_t> getExpiredSessions();
+
+    void clear();
+};
+
+}
diff --git a/src/Coordination/SummingStateMachine.cpp b/src/Coordination/SummingStateMachine.cpp
new file mode 100644
index 00000000000..0cb7a7da6c3
--- /dev/null
+++ b/src/Coordination/SummingStateMachine.cpp
@@ -0,0 +1,167 @@
+#include <Coordination/SummingStateMachine.h>
+#include <iostream>
+#include <cstring>
+
+namespace DB
+{
+
+static constexpr int MAX_SNAPSHOTS = 3;
+
+static int64_t deserializeValue(nuraft::buffer & buffer)
+{
+    nuraft::buffer_serializer bs(buffer);
+    int64_t result;
+    memcpy(&result, bs.get_raw(buffer.size()), sizeof(result));
+    return result;
+}
+
+SummingStateMachine::SummingStateMachine()
+    : value(0)
+    , last_committed_idx(0)
+{
+}
+
+nuraft::ptr<nuraft::buffer> SummingStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
+{
+    int64_t value_to_add = deserializeValue(data);
+
+    value += value_to_add;
+    last_committed_idx = log_idx;
+
+    // Return Raft log number as a return result.
+    nuraft::ptr<nuraft::buffer> ret = nuraft::buffer::alloc(sizeof(log_idx));
+    nuraft::buffer_serializer bs(ret);
+    bs.put_u64(log_idx);
+    return ret;
+}
+
+bool SummingStateMachine::apply_snapshot(nuraft::snapshot & s)
+{
+    std::lock_guard<std::mutex> ll(snapshots_lock);
+    auto entry = snapshots.find(s.get_last_log_idx());
+    if (entry == snapshots.end())
+        return false;
+
+    auto ctx = entry->second;
+    value = ctx->value;
+    return true;
+}
+
+nuraft::ptr<nuraft::snapshot> SummingStateMachine::last_snapshot()
+{
+    // Just return the latest snapshot.
+    std::lock_guard<std::mutex> ll(snapshots_lock);
+    auto entry = snapshots.rbegin();
+    if (entry == snapshots.rend())
+        return nullptr;
+
+    auto ctx = entry->second;
+    return ctx->snapshot;
+}
+
+
+void SummingStateMachine::createSnapshotInternal(nuraft::snapshot & s)
+{
+    // Clone snapshot from `s`.
+    nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
+    nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
+
+    // Put into snapshot map.
+    auto ctx = cs_new<SingleValueSnapshotContext>(ss, value);
+    snapshots[s.get_last_log_idx()] = ctx;
+
+    // Maintain last 3 snapshots only.
+    int num = snapshots.size();
+    auto entry = snapshots.begin();
+
+    for (int ii = 0; ii < num - MAX_SNAPSHOTS; ++ii)
+    {
+        if (entry == snapshots.end())
+            break;
+        entry = snapshots.erase(entry);
+    }
+}
+
+void SummingStateMachine::save_logical_snp_obj(
+    nuraft::snapshot & s,
+    size_t & obj_id,
+    nuraft::buffer & data,
+    bool /*is_first_obj*/,
+    bool /*is_last_obj*/)
+{
+    if (obj_id == 0)
+    {
+        // Object ID == 0: it contains dummy value, create snapshot context.
+        createSnapshotInternal(s);
+    }
+    else
+    {
+        // Object ID > 0: actual snapshot value.
+        nuraft::buffer_serializer bs(data);
+        int64_t local_value = static_cast<int64_t>(bs.get_u64());
+
+        std::lock_guard<std::mutex> ll(snapshots_lock);
+        auto entry = snapshots.find(s.get_last_log_idx());
+        assert(entry != snapshots.end());
+        entry->second->value = local_value;
+    }
+    // Request next object.
+    obj_id++;
+}
+
+int SummingStateMachine::read_logical_snp_obj(
+    nuraft::snapshot & s,
+    void* & /*user_snp_ctx*/,
+    size_t obj_id,
+    nuraft::ptr<nuraft::buffer> & data_out,
+    bool & is_last_obj)
+{
+    nuraft::ptr<SingleValueSnapshotContext> ctx = nullptr;
+    {
+        std::lock_guard<std::mutex> ll(snapshots_lock);
+        auto entry = snapshots.find(s.get_last_log_idx());
+        if (entry == snapshots.end())
+        {
+            // Snapshot doesn't exist.
+            data_out = nullptr;
+            is_last_obj = true;
+            return 0;
+        }
+        ctx = entry->second;
+    }
+
+    if (obj_id == 0)
+    {
+        // Object ID == 0: first object, put dummy data.
+        data_out = nuraft::buffer::alloc(sizeof(Int32));
+        nuraft::buffer_serializer bs(data_out);
+        bs.put_i32(0);
+        is_last_obj = false;
+
+    }
+    else
+    {
+        // Object ID > 0: second object, put actual value.
+        data_out = nuraft::buffer::alloc(sizeof(size_t));
+        nuraft::buffer_serializer bs(data_out);
+        bs.put_u64(ctx->value);
+        is_last_obj = true;
+    }
+    return 0;
+}
+
+void SummingStateMachine::create_snapshot(
+    nuraft::snapshot & s,
+    nuraft::async_result<bool>::handler_type & when_done)
+{
+    {
+        std::lock_guard<std::mutex> ll(snapshots_lock);
+        createSnapshotInternal(s);
+    }
+    nuraft::ptr<std::exception> except(nullptr);
+    bool ret = true;
+    when_done(ret, except);
+}
+
+
+}
diff --git a/src/Coordination/SummingStateMachine.h b/src/Coordination/SummingStateMachine.h
new file mode 100644
index 00000000000..c8594ba7e8d
--- /dev/null
+++ b/src/Coordination/SummingStateMachine.h
@@ -0,0 +1,78 @@
+#pragma once
+
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
+#include <Core/Types.h>
+#include <atomic>
+#include <map>
+#include <mutex>
+
+namespace DB
+{
+
+/// Example trivial state machine.
+class SummingStateMachine : public nuraft::state_machine
+{
+public:
+    SummingStateMachine();
+
+    nuraft::ptr<nuraft::buffer> pre_commit(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
+
+    nuraft::ptr<nuraft::buffer> commit(const size_t log_idx, nuraft::buffer & data) override;
+
+    void rollback(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override {}
+
+    size_t last_commit_index() override { return last_committed_idx; }
+
+    bool apply_snapshot(nuraft::snapshot & s) override;
+
+    nuraft::ptr<nuraft::snapshot> last_snapshot() override;
+
+    void create_snapshot(
+        nuraft::snapshot & s,
+        nuraft::async_result<bool>::handler_type & when_done) override;
+
+    void save_logical_snp_obj(
+        nuraft::snapshot & s,
+        size_t & obj_id,
+        nuraft::buffer & data,
+        bool is_first_obj,
+        bool is_last_obj) override;
+
+    int read_logical_snp_obj(
+        nuraft::snapshot & s,
+        void* & user_snp_ctx,
+        size_t obj_id,
+        nuraft::ptr<nuraft::buffer> & data_out,
+        bool & is_last_obj) override;
+
+    int64_t getValue() const { return value; }
+
+private:
+    struct SingleValueSnapshotContext
+    {
+        SingleValueSnapshotContext(nuraft::ptr<nuraft::snapshot> & s, int64_t v)
+            : snapshot(s)
+            , value(v)
+        {}
+
+        nuraft::ptr<nuraft::snapshot> snapshot;
+        int64_t value;
+    };
+
+    void createSnapshotInternal(nuraft::snapshot & s);
+
+    // State machine's current value.
+    std::atomic<int64_t> value;
+
+    // Last committed Raft log number.
+    std::atomic<uint64_t> last_committed_idx;
+
+    // Keeps the last 3 snapshots, by their Raft log numbers.
+    std::map<uint64_t, nuraft::ptr<SingleValueSnapshotContext>> snapshots;
+
+    // Mutex for `snapshots_`.
+    std::mutex snapshots_lock;
+
+};
+
+}
diff --git a/src/Coordination/ThreadSafeQueue.h b/src/Coordination/ThreadSafeQueue.h
new file mode 100644
index 00000000000..d36e25244bb
--- /dev/null
+++ b/src/Coordination/ThreadSafeQueue.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <queue>
+#include <mutex>
+
+namespace DB
+{
+
+/// Queue with mutex and condvar. As simple as possible.
+template <typename T>
+class ThreadSafeQueue
+{
+private:
+    mutable std::mutex queue_mutex;
+    std::condition_variable cv;
+    std::queue<T> queue;
+public:
+
+    void push(const T & response)
+    {
+        std::lock_guard lock(queue_mutex);
+        queue.push(response);
+        cv.notify_one();
+    }
+
+    bool tryPop(T & response, int64_t timeout_ms = 0)
+    {
+        std::unique_lock lock(queue_mutex);
+        if (!cv.wait_for(lock,
+                std::chrono::milliseconds(timeout_ms), [this] { return !queue.empty(); }))
+            return false;
+
+        response = queue.front();
+        queue.pop();
+        return true;
+    }
+
+    size_t size() const
+    {
+        std::lock_guard lock(queue_mutex);
+        return queue.size();
+    }
+};
+
+}
diff --git a/src/Coordination/WriteBufferFromNuraftBuffer.cpp b/src/Coordination/WriteBufferFromNuraftBuffer.cpp
new file mode 100644
index 00000000000..1a16b7cef24
--- /dev/null
+++ b/src/Coordination/WriteBufferFromNuraftBuffer.cpp
@@ -0,0 +1,71 @@
+#include <Coordination/WriteBufferFromNuraftBuffer.h>
+#include <common/logger_useful.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_WRITE_AFTER_END_OF_BUFFER;
+}
+
+void WriteBufferFromNuraftBuffer::nextImpl()
+{
+    if (is_finished)
+        throw Exception("WriteBufferFromNuraftBuffer is finished", ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER);
+
+    /// pos may not be equal to vector.data() + old_size, because WriteBuffer::next() can be used to flush data
+    size_t pos_offset = pos - reinterpret_cast<Position>(buffer->data_begin());
+    size_t old_size = buffer->size();
+    if (pos_offset == old_size)
+    {
+        nuraft::ptr<nuraft::buffer> new_buffer = nuraft::buffer::alloc(old_size * size_multiplier);
+        memcpy(new_buffer->data_begin(), buffer->data_begin(), buffer->size());
+        buffer = new_buffer;
+    }
+    internal_buffer = Buffer(reinterpret_cast<Position>(buffer->data_begin() + pos_offset), reinterpret_cast<Position>(buffer->data_begin() + buffer->size()));
+    working_buffer = internal_buffer;
+
+}
+
+WriteBufferFromNuraftBuffer::WriteBufferFromNuraftBuffer()
+    : WriteBuffer(nullptr, 0)
+{
+    buffer = nuraft::buffer::alloc(initial_size);
+    set(reinterpret_cast<Position>(buffer->data_begin()), buffer->size());
+}
+
+void WriteBufferFromNuraftBuffer::finalize()
+{
+    if (is_finished)
+        return;
+
+    is_finished = true;
+    size_t real_size = pos - reinterpret_cast<Position>(buffer->data_begin());
+    nuraft::ptr<nuraft::buffer> new_buffer = nuraft::buffer::alloc(real_size);
+    memcpy(new_buffer->data_begin(), buffer->data_begin(), real_size);
+    buffer = new_buffer;
+
+    /// Prevent further writes.
+    set(nullptr, 0);
+}
+
+nuraft::ptr<nuraft::buffer> WriteBufferFromNuraftBuffer::getBuffer()
+{
+    finalize();
+    return buffer;
+}
+
+WriteBufferFromNuraftBuffer::~WriteBufferFromNuraftBuffer()
+{
+    try
+    {
+        finalize();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
+}
+
+}
diff --git a/src/Coordination/WriteBufferFromNuraftBuffer.h b/src/Coordination/WriteBufferFromNuraftBuffer.h
new file mode 100644
index 00000000000..d037a0e6a27
--- /dev/null
+++ b/src/Coordination/WriteBufferFromNuraftBuffer.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <IO/WriteBuffer.h>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
+
+namespace DB
+{
+
+class WriteBufferFromNuraftBuffer : public WriteBuffer
+{
+private:
+    nuraft::ptr<nuraft::buffer> buffer;
+    bool is_finished = false;
+
+    static constexpr size_t initial_size = 32;
+    static constexpr size_t size_multiplier = 2;
+
+    void nextImpl() override;
+
+public:
+    WriteBufferFromNuraftBuffer();
+
+    void finalize() override final;
+    nuraft::ptr<nuraft::buffer> getBuffer();
+    bool isFinished() const { return is_finished; }
+
+    ~WriteBufferFromNuraftBuffer() override;
+};
+
+}
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
new file mode 100644
index 00000000000..ed9777350c5
--- /dev/null
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -0,0 +1,336 @@
+#include <gtest/gtest.h>
+
+#if !defined(ARCADIA_BUILD)
+#    include <Common/config.h>
+#    include "config_core.h"
+#endif
+
+#if USE_NURAFT
+
+#include <Coordination/InMemoryLogStore.h>
+#include <Coordination/InMemoryStateManager.h>
+#include <Coordination/NuKeeperStorageSerializer.h>
+#include <Coordination/SummingStateMachine.h>
+#include <Coordination/NuKeeperStateMachine.h>
+#include <Coordination/LoggerWrapper.h>
+#include <Coordination/WriteBufferFromNuraftBuffer.h>
+#include <Coordination/ReadBufferFromNuraftBuffer.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromString.h>
+#include <Common/ZooKeeper/ZooKeeperCommon.h>
+#include <Common/ZooKeeper/ZooKeeperIO.h>
+#include <Common/Exception.h>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
+#include <thread>
+
+
+TEST(CoordinationTest, BuildTest)
+{
+    DB::InMemoryLogStore store;
+    DB::SummingStateMachine machine;
+    EXPECT_EQ(1, 1);
+}
+
+TEST(CoordinationTest, BufferSerde)
+{
+    Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Get);
+    request->xid = 3;
+    dynamic_cast<Coordination::ZooKeeperGetRequest *>(request.get())->path = "/path/value";
+
+    DB::WriteBufferFromNuraftBuffer wbuf;
+    request->write(wbuf);
+    auto nuraft_buffer = wbuf.getBuffer();
+    EXPECT_EQ(nuraft_buffer->size(), 28);
+
+    DB::ReadBufferFromNuraftBuffer rbuf(nuraft_buffer);
+
+    int32_t length;
+    Coordination::read(length, rbuf);
+    EXPECT_EQ(length + sizeof(length), nuraft_buffer->size());
+
+    int32_t xid;
+    Coordination::read(xid, rbuf);
+    EXPECT_EQ(xid, request->xid);
+
+    Coordination::OpNum opnum;
+    Coordination::read(opnum, rbuf);
+
+    Coordination::ZooKeeperRequestPtr request_read = Coordination::ZooKeeperRequestFactory::instance().get(opnum);
+    request_read->xid = xid;
+    request_read->readImpl(rbuf);
+
+    EXPECT_EQ(request_read->getOpNum(), Coordination::OpNum::Get);
+    EXPECT_EQ(request_read->xid, 3);
+    EXPECT_EQ(dynamic_cast<Coordination::ZooKeeperGetRequest *>(request_read.get())->path, "/path/value");
+}
+
+template <typename StateMachine>
+struct SimpliestRaftServer
+{
+    SimpliestRaftServer(int server_id_, const std::string & hostname_, int port_)
+        : server_id(server_id_)
+        , hostname(hostname_)
+        , port(port_)
+        , endpoint(hostname + ":" + std::to_string(port))
+        , state_machine(nuraft::cs_new<StateMachine>())
+        , state_manager(nuraft::cs_new<DB::InMemoryStateManager>(server_id, hostname, port))
+    {
+        nuraft::raft_params params;
+        params.heart_beat_interval_ = 100;
+        params.election_timeout_lower_bound_ = 200;
+        params.election_timeout_upper_bound_ = 400;
+        params.reserved_log_items_ = 5;
+        params.snapshot_distance_ = 1; /// forcefully send snapshots
+        params.client_req_timeout_ = 3000;
+        params.return_method_ = nuraft::raft_params::blocking;
+
+        raft_instance = launcher.init(
+            state_machine, state_manager, nuraft::cs_new<DB::LoggerWrapper>("ToyRaftLogger", DB::LogsLevel::trace), port,
+            nuraft::asio_service::options{}, params);
+
+        if (!raft_instance)
+        {
+            std::cerr << "Failed to initialize launcher (see the message "
+                         "in the log file)." << std::endl;
+            exit(-1);
+        }
+        std::cout << "init Raft instance " << server_id;
+        for (size_t ii = 0; ii < 20; ++ii)
+        {
+            if (raft_instance->is_initialized())
+            {
+                std::cout << " done" << std::endl;
+                break;
+            }
+            std::cout << ".";
+            fflush(stdout);
+            std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        }
+    }
+
+    // Server ID.
+    int server_id;
+
+    // Server address.
+    std::string hostname;
+
+    // Server port.
+    int port;
+
+    std::string endpoint;
+
+    // State machine.
+    nuraft::ptr<StateMachine> state_machine;
+
+    // State manager.
+    nuraft::ptr<nuraft::state_mgr> state_manager;
+
+    // Raft launcher.
+    nuraft::raft_launcher launcher;
+
+    // Raft server instance.
+    nuraft::ptr<nuraft::raft_server> raft_instance;
+};
+
+using SummingRaftServer = SimpliestRaftServer<DB::SummingStateMachine>;
+
+nuraft::ptr<nuraft::buffer> getLogEntry(int64_t number)
+{
+    nuraft::ptr<nuraft::buffer> ret = nuraft::buffer::alloc(sizeof(number));
+    nuraft::buffer_serializer bs(ret);
+    // WARNING: We don't consider endian-safety in this example.
+    bs.put_raw(&number, sizeof(number));
+    return ret;
+}
+
+
+TEST(CoordinationTest, TestSummingRaft1)
+{
+    SummingRaftServer s1(1, "localhost", 44444);
+
+    /// Single node is leader
+    EXPECT_EQ(s1.raft_instance->get_leader(), 1);
+
+    auto entry1 = getLogEntry(143);
+    auto ret = s1.raft_instance->append_entries({entry1});
+    EXPECT_TRUE(ret->get_accepted()) << "failed to replicate: entry 1" << ret->get_result_code();
+    EXPECT_EQ(ret->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 1" << ret->get_result_code();
+
+    while (s1.state_machine->getValue() != 143)
+    {
+        std::cout << "Waiting s1 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    EXPECT_EQ(s1.state_machine->getValue(), 143);
+
+    s1.launcher.shutdown(5);
+}
+
+TEST(CoordinationTest, TestSummingRaft3)
+{
+    SummingRaftServer s1(1, "localhost", 44444);
+    SummingRaftServer s2(2, "localhost", 44445);
+    SummingRaftServer s3(3, "localhost", 44446);
+
+    nuraft::srv_config first_config(1, "localhost:44444");
+    auto ret1 = s2.raft_instance->add_srv(first_config);
+    if (!ret1->get_accepted())
+    {
+        std::cout << "failed to add server: "
+                  << ret1->get_result_str() << std::endl;
+        EXPECT_TRUE(false);
+    }
+
+    while (s1.raft_instance->get_leader() != 2)
+    {
+        std::cout << "Waiting s1 to join to s2 quorum\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    nuraft::srv_config third_config(3, "localhost:44446");
+    auto ret3 = s2.raft_instance->add_srv(third_config);
+    if (!ret3->get_accepted())
+    {
+        std::cout << "failed to add server: "
+                  << ret3->get_result_str() << std::endl;
+        EXPECT_TRUE(false);
+    }
+
+    while (s3.raft_instance->get_leader() != 2)
+    {
+        std::cout << "Waiting s3 to join to s2 quorum\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    /// S2 is leader
+    EXPECT_EQ(s1.raft_instance->get_leader(), 2);
+    EXPECT_EQ(s2.raft_instance->get_leader(), 2);
+    EXPECT_EQ(s3.raft_instance->get_leader(), 2);
+
+    std::cerr << "Starting to add entries\n";
+    auto entry = getLogEntry(1);
+    auto ret = s2.raft_instance->append_entries({entry});
+    EXPECT_TRUE(ret->get_accepted()) << "failed to replicate: entry 1" << ret->get_result_code();
+    EXPECT_EQ(ret->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 1" << ret->get_result_code();
+
+    while (s1.state_machine->getValue() != 1)
+    {
+        std::cout << "Waiting s1 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    while (s2.state_machine->getValue() != 1)
+    {
+        std::cout << "Waiting s2 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    while (s3.state_machine->getValue() != 1)
+    {
+        std::cout << "Waiting s3 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    EXPECT_EQ(s1.state_machine->getValue(), 1);
+    EXPECT_EQ(s2.state_machine->getValue(), 1);
+    EXPECT_EQ(s3.state_machine->getValue(), 1);
+
+    auto non_leader_entry = getLogEntry(3);
+    auto ret_non_leader1 = s1.raft_instance->append_entries({non_leader_entry});
+
+    EXPECT_FALSE(ret_non_leader1->get_accepted());
+
+    auto ret_non_leader3 = s3.raft_instance->append_entries({non_leader_entry});
+
+    EXPECT_FALSE(ret_non_leader3->get_accepted());
+
+    auto leader_entry = getLogEntry(77);
+    auto ret_leader = s2.raft_instance->append_entries({leader_entry});
+    EXPECT_TRUE(ret_leader->get_accepted()) << "failed to replicate: entry 78" << ret_leader->get_result_code();
+    EXPECT_EQ(ret_leader->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 78" << ret_leader->get_result_code();
+
+    while (s1.state_machine->getValue() != 78)
+    {
+        std::cout << "Waiting s1 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    while (s3.state_machine->getValue() != 78)
+    {
+        std::cout << "Waiting s3 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    EXPECT_EQ(s1.state_machine->getValue(), 78);
+    EXPECT_EQ(s2.state_machine->getValue(), 78);
+    EXPECT_EQ(s3.state_machine->getValue(), 78);
+
+    s1.launcher.shutdown(5);
+    s2.launcher.shutdown(5);
+    s3.launcher.shutdown(5);
+}
+
+nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
+{
+    DB::WriteBufferFromNuraftBuffer buf;
+    DB::writeIntBinary(session_id, buf);
+    request->write(buf);
+    return buf.getBuffer();
+}
+
+DB::NuKeeperStorage::ResponsesForSessions getZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer, const Coordination::ZooKeeperRequestPtr & request)
+{
+    DB::NuKeeperStorage::ResponsesForSessions results;
+    DB::ReadBufferFromNuraftBuffer buf(buffer);
+    while (!buf.eof())
+    {
+        int64_t session_id;
+        DB::readIntBinary(session_id, buf);
+
+        int32_t length;
+        Coordination::XID xid;
+        int64_t zxid;
+        Coordination::Error err;
+
+        Coordination::read(length, buf);
+        Coordination::read(xid, buf);
+        Coordination::read(zxid, buf);
+        Coordination::read(err, buf);
+        auto response = request->makeResponse();
+        response->readImpl(buf);
+        results.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
+    }
+    return results;
+}
+
+TEST(CoordinationTest, TestStorageSerialization)
+{
+    DB::NuKeeperStorage storage(500);
+    storage.container["/hello"] = DB::NuKeeperStorage::Node{.data="world"};
+    storage.container["/hello/somepath"] =  DB::NuKeeperStorage::Node{.data="somedata"};
+    storage.session_id_counter = 5;
+    storage.zxid = 156;
+    storage.ephemerals[3] = {"/hello", "/"};
+    storage.ephemerals[1] = {"/hello/somepath"};
+
+    DB::WriteBufferFromOwnString buffer;
+    DB::NuKeeperStorageSerializer serializer;
+    serializer.serialize(storage, buffer);
+    std::string serialized = buffer.str();
+    EXPECT_NE(serialized.size(), 0);
+    DB::ReadBufferFromString read(serialized);
+    DB::NuKeeperStorage new_storage(500);
+    serializer.deserialize(new_storage, read);
+
+    EXPECT_EQ(new_storage.container.size(), 3);
+    EXPECT_EQ(new_storage.container["/hello"].data, "world");
+    EXPECT_EQ(new_storage.container["/hello/somepath"].data, "somedata");
+    EXPECT_EQ(new_storage.session_id_counter, 5);
+    EXPECT_EQ(new_storage.zxid, 156);
+    EXPECT_EQ(new_storage.ephemerals.size(), 2);
+    EXPECT_EQ(new_storage.ephemerals[3].size(), 2);
+    EXPECT_EQ(new_storage.ephemerals[1].size(), 1);
+}
+
+#endif
diff --git a/src/Coordination/ya.make b/src/Coordination/ya.make
new file mode 100644
index 00000000000..f3eae68806c
--- /dev/null
+++ b/src/Coordination/ya.make
@@ -0,0 +1,13 @@
+# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
+OWNER(g:clickhouse)
+
+LIBRARY()
+
+PEERDIR(
+    clickhouse/src/Common
+)
+
+SRCS(
+)
+
+END()
diff --git a/src/Coordination/ya.make.in b/src/Coordination/ya.make.in
new file mode 100644
index 00000000000..ba5f8bcbea4
--- /dev/null
+++ b/src/Coordination/ya.make.in
@@ -0,0 +1,12 @@
+OWNER(g:clickhouse)
+
+LIBRARY()
+
+PEERDIR(
+    clickhouse/src/Common
+)
+
+SRCS(
+)
+
+END()
diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp
index cd2855739e2..0c9a470dc1d 100644
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@@ -409,6 +409,15 @@ Block Block::cloneWithoutColumns() const
     return res;
 }
 
+Block Block::cloneWithCutColumns(size_t start, size_t length) const
+{
+    Block copy = *this;
+
+    for (auto & column_to_cut : copy.data)
+        column_to_cut.column = column_to_cut.column->cut(start, length);
+
+    return copy;
+}
 
 Block Block::sortColumns() const
 {
diff --git a/src/Core/Block.h b/src/Core/Block.h
index eef3c27363b..14f4f57caed 100644
--- a/src/Core/Block.h
+++ b/src/Core/Block.h
@@ -129,6 +129,7 @@ public:
     void setColumns(const Columns & columns);
     Block cloneWithColumns(const Columns & columns) const;
     Block cloneWithoutColumns() const;
+    Block cloneWithCutColumns(size_t start, size_t length) const;
 
     /** Get empty columns with the same types as in block. */
     MutableColumns cloneEmptyColumns() const;
@@ -163,6 +164,7 @@ private:
     friend class ActionsDAG;
 };
 
+using BlockPtr = std::shared_ptr<Block>;
 using Blocks = std::vector<Block>;
 using BlocksList = std::list<Block>;
 using BlocksPtr = std::shared_ptr<Blocks>;
diff --git a/src/Core/DecimalComparison.h b/src/Core/DecimalComparison.h
index aaf471cefd8..486c2c1f8f4 100644
--- a/src/Core/DecimalComparison.h
+++ b/src/Core/DecimalComparison.h
@@ -21,7 +21,7 @@ namespace ErrorCodes
     extern const int DECIMAL_OVERFLOW;
 }
 
-///
+
 inline bool allowDecimalComparison(const DataTypePtr & left_type, const DataTypePtr & right_type)
 {
     if (isColumnedAsDecimal(left_type))
@@ -30,7 +30,9 @@ inline bool allowDecimalComparison(const DataTypePtr & left_type, const DataType
             return true;
     }
     else if (isNotDecimalButComparableToDecimal(left_type) && isColumnedAsDecimal(right_type))
+    {
         return true;
+    }
     return false;
 }
 
@@ -76,7 +78,7 @@ public:
 
     static bool compare(A a, B b, UInt32 scale_a, UInt32 scale_b)
     {
-        static const UInt32 max_scale = DecimalUtils::maxPrecision<Decimal256>();
+        static const UInt32 max_scale = DecimalUtils::max_precision<Decimal256>;
         if (scale_a > max_scale || scale_b > max_scale)
             throw Exception("Bad scale of decimal field", ErrorCodes::DECIMAL_OVERFLOW);
 
@@ -252,9 +254,9 @@ private:
         else
         {
             if constexpr (scale_left)
-                x *= scale;
+                x = common::mulIgnoreOverflow(x, scale);
             if constexpr (scale_right)
-                y *= scale;
+                y = common::mulIgnoreOverflow(y, scale);
         }
 
         return Op::apply(x, y);
diff --git a/src/Core/DecimalFunctions.h b/src/Core/DecimalFunctions.h
index 2b916cbf538..355cf1d378a 100644
--- a/src/Core/DecimalFunctions.h
+++ b/src/Core/DecimalFunctions.h
@@ -24,13 +24,13 @@ namespace ErrorCodes
 namespace DecimalUtils
 {
 
-static constexpr size_t minPrecision() { return 1; }
-template <typename T> static constexpr size_t maxPrecision() { return 0; }
-template <> constexpr size_t maxPrecision<Decimal32>() { return 9; }
-template <> constexpr size_t maxPrecision<Decimal64>() { return 18; }
-template <> constexpr size_t maxPrecision<DateTime64>() { return 18; }
-template <> constexpr size_t maxPrecision<Decimal128>() { return 38; }
-template <> constexpr size_t maxPrecision<Decimal256>() { return 76; }
+inline constexpr size_t min_precision = 1;
+template <typename T> inline constexpr size_t max_precision = 0;
+template <> inline constexpr size_t max_precision<Decimal32> = 9;
+template <> inline constexpr size_t max_precision<Decimal64> = 18;
+template <> inline constexpr size_t max_precision<DateTime64> = 18;
+template <> inline constexpr size_t max_precision<Decimal128> = 38;
+template <> inline constexpr size_t max_precision<Decimal256> = 76;
 
 template <typename T>
 inline auto scaleMultiplier(UInt32 scale)
@@ -87,7 +87,7 @@ struct DataTypeDecimalTrait
   *
   * Sign of `whole` controls sign of result: negative whole => negative result, positive whole => positive result.
   * Sign of `fractional` is expected to be positive, otherwise result is undefined.
-  * If `scale` is to big (scale > maxPrecision<DecimalType::NativeType>), result is undefined.
+  * If `scale` is to big (scale > max_precision<DecimalType::NativeType>), result is undefined.
   */
 template <typename DecimalType>
 inline DecimalType decimalFromComponentsWithMultiplier(
@@ -287,21 +287,21 @@ inline auto binaryOpResult(const DecimalType<T> & tx, const DecimalType<U> & ty)
         scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale());
 
     if constexpr (sizeof(T) < sizeof(U))
-        return DataTypeDecimalTrait<U>(DecimalUtils::maxPrecision<U>(), scale);
+        return DataTypeDecimalTrait<U>(DecimalUtils::max_precision<U>, scale);
     else
-        return DataTypeDecimalTrait<T>(DecimalUtils::maxPrecision<T>(), scale);
+        return DataTypeDecimalTrait<T>(DecimalUtils::max_precision<T>, scale);
 }
 
 template <bool, bool, typename T, typename U, template <typename> typename DecimalType>
 inline const DataTypeDecimalTrait<T> binaryOpResult(const DecimalType<T> & tx, const DataTypeNumber<U> &)
 {
-    return DataTypeDecimalTrait<T>(DecimalUtils::maxPrecision<T>(), tx.getScale());
+    return DataTypeDecimalTrait<T>(DecimalUtils::max_precision<T>, tx.getScale());
 }
 
 template <bool, bool, typename T, typename U, template <typename> typename DecimalType>
 inline const DataTypeDecimalTrait<U> binaryOpResult(const DataTypeNumber<T> &, const DecimalType<U> & ty)
 {
-    return DataTypeDecimalTrait<U>(DecimalUtils::maxPrecision<U>(), ty.getScale());
+    return DataTypeDecimalTrait<U>(DecimalUtils::max_precision<U>, ty.getScale());
 }
 
 }
diff --git a/src/Core/Defines.h b/src/Core/Defines.h
index fdf250a6dd1..ff033aa6183 100644
--- a/src/Core/Defines.h
+++ b/src/Core/Defines.h
@@ -36,6 +36,7 @@
 #define DEFAULT_MERGE_BLOCK_SIZE 8192
 
 #define DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC 5
+#define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60
 #define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160)
 #define DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC 15
 #define DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE 1024
diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp
index 722bc5705c3..afc9fe00ef5 100644
--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@@ -1,18 +1,24 @@
 #include <boost/program_options.hpp>
+#include <DataStreams/IBlockOutputStream.h>
 #include <DataStreams/AsynchronousBlockInputStream.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <Storages/IStorage.h>
+#include <Storages/ColumnsDescription.h>
+#include <Storages/ConstraintsDescription.h>
 #include <Interpreters/Context.h>
-#include <IO/copyData.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <IO/ReadBufferFromIStream.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/LimitReadBuffer.h>
-#include <Storages/StorageMemory.h>
-#include <Processors/Sources/SourceFromInputStream.h>
+
 #include <Processors/Pipe.h>
 #include <Processors/Sources/SinkToOutputStream.h>
 #include <Processors/Executors/PipelineExecutor.h>
+#include <Processors/Sources/SourceFromInputStream.h>
+
 #include <Core/ExternalTable.h>
 #include <Poco/Net/MessageHeader.h>
+#include <Formats/FormatFactory.h>
 #include <common/find_symbols.h>
 
 
@@ -39,7 +45,7 @@ ExternalTableDataPtr BaseExternalTable::getData(const Context & context)
     return data;
 }
 
-void BaseExternalTable::clean()
+void BaseExternalTable::clear()
 {
     name.clear();
     file.clear();
@@ -49,17 +55,6 @@ void BaseExternalTable::clean()
     read_buffer.reset();
 }
 
-/// Function for debugging information output
-void BaseExternalTable::write()
-{
-    std::cerr << "file " << file << std::endl;
-    std::cerr << "name " << name << std::endl;
-    std::cerr << "format " << format << std::endl;
-    std::cerr << "structure: \n";
-    for (const auto & elem : structure)
-        std::cerr << '\t' << elem.first << ' ' << elem.second << std::endl;
-}
-
 void BaseExternalTable::parseStructureFromStructureField(const std::string & argument)
 {
     std::vector<std::string> vals;
@@ -130,19 +125,16 @@ ExternalTable::ExternalTable(const boost::program_options::variables_map & exter
 }
 
 
-void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, std::istream & stream)
+void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, ReadBuffer & stream)
 {
     const Settings & settings = context.getSettingsRef();
 
-    /// The buffer is initialized here, not in the virtual function initReadBuffer
-    read_buffer_impl = std::make_unique<ReadBufferFromIStream>(stream);
-
     if (settings.http_max_multipart_form_data_size)
         read_buffer = std::make_unique<LimitReadBuffer>(
-            *read_buffer_impl, settings.http_max_multipart_form_data_size,
+            stream, settings.http_max_multipart_form_data_size,
             true, "the maximum size of multipart/form-data. This limit can be tuned by 'http_max_multipart_form_data_size' setting");
     else
-        read_buffer = std::move(read_buffer_impl);
+        read_buffer = wrapReadBufferReference(stream);
 
     /// Retrieve a collection of parameters from MessageHeader
     Poco::Net::NameValueCollection content;
@@ -182,7 +174,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
     executor->execute(/*num_threads = */ 1);
 
     /// We are ready to receive the next file, for this we clear all the information received
-    clean();
+    clear();
 }
 
 }
diff --git a/src/Core/ExternalTable.h b/src/Core/ExternalTable.h
index f26af1cc6ca..aa15846d48a 100644
--- a/src/Core/ExternalTable.h
+++ b/src/Core/ExternalTable.h
@@ -1,15 +1,14 @@
 #pragma once
 
+#include <Client/Connection.h>
+#include <Core/Block.h>
+#include <IO/ReadBuffer.h>
+#include <Server/HTTP/HTMLForm.h>
+
+#include <iosfwd>
+#include <memory>
 #include <string>
 #include <vector>
-#include <memory>
-#include <iosfwd>
-
-#include <Poco/Net/PartHandler.h>
-
-#include <Core/Block.h>
-#include <Client/Connection.h>
-#include <IO/ReadBuffer.h>
 
 
 namespace Poco
@@ -51,7 +50,7 @@ public:
     std::unique_ptr<ReadBuffer> read_buffer;
     Block sample_block;
 
-    virtual ~BaseExternalTable() {}
+    virtual ~BaseExternalTable() = default;
 
     /// Initialize read_buffer, depending on the data source. By default, does nothing.
     virtual void initReadBuffer() {}
@@ -61,10 +60,7 @@ public:
 
 protected:
     /// Clear all accumulated information
-    void clean();
-
-    /// Function for debugging information output
-    void write();
+    void clear();
 
     /// Construct the `structure` vector from the text field `structure`
     virtual void parseStructureFromStructureField(const std::string & argument);
@@ -85,24 +81,23 @@ public:
     void initReadBuffer() override;
 
     /// Extract parameters from variables_map, which is built on the client command line
-    ExternalTable(const boost::program_options::variables_map & external_options);
+    explicit ExternalTable(const boost::program_options::variables_map & external_options);
 };
 
 
 /// Parsing of external table used when sending tables via http
 /// The `handlePart` function will be called for each table passed,
 /// so it's also necessary to call `clean` at the end of the `handlePart`.
-class ExternalTablesHandler : public Poco::Net::PartHandler, BaseExternalTable
+class ExternalTablesHandler : public HTMLForm::PartHandler, BaseExternalTable
 {
 public:
     ExternalTablesHandler(Context & context_, const Poco::Net::NameValueCollection & params_) : context(context_), params(params_) {}
 
-    void handlePart(const Poco::Net::MessageHeader & header, std::istream & stream) override;
+    void handlePart(const Poco::Net::MessageHeader & header, ReadBuffer & stream) override;
 
 private:
     Context & context;
     const Poco::Net::NameValueCollection & params;
-    std::unique_ptr<ReadBuffer> read_buffer_impl;
 };
 
 
diff --git a/src/Core/MySQL/MySQLClient.cpp b/src/Core/MySQL/MySQLClient.cpp
index f65fbe62274..e41b4128738 100644
--- a/src/Core/MySQL/MySQLClient.cpp
+++ b/src/Core/MySQL/MySQLClient.cpp
@@ -6,8 +6,10 @@
 #include <Core/MySQL/PacketsProtocolText.h>
 #include <Core/MySQL/PacketsReplication.h>
 #include <Core/MySQL/MySQLReplication.h>
+#include <Common/DNSResolver.h>
 #include <Poco/String.h>
 
+
 namespace DB
 {
 using namespace Generic;
diff --git a/src/Core/MySQL/MySQLClient.h b/src/Core/MySQL/MySQLClient.h
index 5835e980149..e503c985584 100644
--- a/src/Core/MySQL/MySQLClient.h
+++ b/src/Core/MySQL/MySQLClient.h
@@ -7,7 +7,6 @@
 #include <IO/WriteHelpers.h>
 #include <Poco/Net/NetException.h>
 #include <Poco/Net/StreamSocket.h>
-#include <Common/DNSResolver.h>
 #include <Common/Exception.h>
 #include <Common/NetException.h>
 #include <Core/MySQL/IMySQLWritePacket.h>
diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp
index b86d6447e26..1b202c4edb4 100644
--- a/src/Core/MySQL/MySQLReplication.cpp
+++ b/src/Core/MySQL/MySQLReplication.cpp
@@ -136,6 +136,7 @@ namespace MySQLReplication
         out << "XID: " << this->xid << '\n';
     }
 
+    /// https://dev.mysql.com/doc/internals/en/table-map-event.html
     void TableMapEvent::parseImpl(ReadBuffer & payload)
     {
         payload.readStrict(reinterpret_cast<char *>(&table_id), 6);
@@ -257,15 +258,19 @@ namespace MySQLReplication
         out << "Null Bitmap: " << bitmap_str << '\n';
     }
 
-    void RowsEvent::parseImpl(ReadBuffer & payload)
+    void RowsEventHeader::parse(ReadBuffer & payload)
     {
         payload.readStrict(reinterpret_cast<char *>(&table_id), 6);
         payload.readStrict(reinterpret_cast<char *>(&flags), 2);
 
+        UInt16 extra_data_len;
         /// This extra_data_len contains the 2 bytes length.
         payload.readStrict(reinterpret_cast<char *>(&extra_data_len), 2);
         payload.ignore(extra_data_len - 2);
+    }
 
+    void RowsEvent::parseImpl(ReadBuffer & payload)
+    {
         number_columns = readLengthEncodedNumber(payload);
         size_t columns_bitmap_size = (number_columns + 7) / 8;
         switch (header.type)
@@ -470,11 +475,11 @@ namespace MySQLReplication
                     {
                         const auto & dispatch = [](const size_t & precision, const size_t & scale, const auto & function) -> Field
                         {
-                            if (precision <= DecimalUtils::maxPrecision<Decimal32>())
+                            if (precision <= DecimalUtils::max_precision<Decimal32>)
                                 return Field(function(precision, scale, Decimal32()));
-                            else if (precision <= DecimalUtils::maxPrecision<Decimal64>())
+                            else if (precision <= DecimalUtils::max_precision<Decimal64>)
                                 return Field(function(precision, scale, Decimal64()));
-                            else if (precision <= DecimalUtils::maxPrecision<Decimal128>())
+                            else if (precision <= DecimalUtils::max_precision<Decimal128>)
                                 return Field(function(precision, scale, Decimal128()));
 
                             return Field(function(precision, scale, Decimal256()));
@@ -795,37 +800,50 @@ namespace MySQLReplication
             {
                 event = std::make_shared<TableMapEvent>(std::move(event_header));
                 event->parseEvent(event_payload);
-                table_map = std::static_pointer_cast<TableMapEvent>(event);
+                auto table_map = std::static_pointer_cast<TableMapEvent>(event);
+                table_maps[table_map->table_id] = table_map;
                 break;
             }
             case WRITE_ROWS_EVENT_V1:
             case WRITE_ROWS_EVENT_V2: {
-                if (doReplicate())
-                    event = std::make_shared<WriteRowsEvent>(table_map, std::move(event_header));
+                RowsEventHeader rows_header(event_header.type);
+                rows_header.parse(event_payload);
+                if (doReplicate(rows_header.table_id))
+                    event = std::make_shared<WriteRowsEvent>(table_maps.at(rows_header.table_id), std::move(event_header), rows_header);
                 else
                     event = std::make_shared<DryRunEvent>(std::move(event_header));
 
                 event->parseEvent(event_payload);
+                if (rows_header.flags & ROWS_END_OF_STATEMENT)
+                    table_maps.clear();
                 break;
             }
             case DELETE_ROWS_EVENT_V1:
             case DELETE_ROWS_EVENT_V2: {
-                if (doReplicate())
-                    event = std::make_shared<DeleteRowsEvent>(table_map, std::move(event_header));
+                RowsEventHeader rows_header(event_header.type);
+                rows_header.parse(event_payload);
+                if (doReplicate(rows_header.table_id))
+                    event = std::make_shared<DeleteRowsEvent>(table_maps.at(rows_header.table_id), std::move(event_header), rows_header);
                 else
                     event = std::make_shared<DryRunEvent>(std::move(event_header));
 
                 event->parseEvent(event_payload);
+                if (rows_header.flags & ROWS_END_OF_STATEMENT)
+                    table_maps.clear();
                 break;
             }
             case UPDATE_ROWS_EVENT_V1:
             case UPDATE_ROWS_EVENT_V2: {
-                if (doReplicate())
-                    event = std::make_shared<UpdateRowsEvent>(table_map, std::move(event_header));
+                RowsEventHeader rows_header(event_header.type);
+                rows_header.parse(event_payload);
+                if (doReplicate(rows_header.table_id))
+                    event = std::make_shared<UpdateRowsEvent>(table_maps.at(rows_header.table_id), std::move(event_header), rows_header);
                 else
                     event = std::make_shared<DryRunEvent>(std::move(event_header));
 
                 event->parseEvent(event_payload);
+                if (rows_header.flags & ROWS_END_OF_STATEMENT)
+                    table_maps.clear();
                 break;
             }
             case GTID_EVENT:
@@ -843,6 +861,19 @@ namespace MySQLReplication
             }
         }
     }
+
+    bool MySQLFlavor::doReplicate(UInt64 table_id)
+    {
+        if (replicate_do_db.empty())
+            return false;
+        if (table_id == 0x00ffffff)
+        {
+            // Special "dummy event"
+            return false;
+        }
+        auto table_map = table_maps.at(table_id);
+        return table_map->schema == replicate_do_db;
+    }
 }
 
 }
diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h
index 7c7604cad58..d415bdda70d 100644
--- a/src/Core/MySQL/MySQLReplication.h
+++ b/src/Core/MySQL/MySQLReplication.h
@@ -430,6 +430,22 @@ namespace MySQLReplication
         void parseMeta(String meta);
     };
 
+    enum RowsEventFlags
+    {
+        ROWS_END_OF_STATEMENT = 1
+    };
+
+    class RowsEventHeader
+    {
+    public:
+        EventType type;
+        UInt64 table_id;
+        UInt16 flags;
+
+        RowsEventHeader(EventType type_) : type(type_), table_id(0), flags(0) {}
+        void parse(ReadBuffer & payload);
+    };
+
     class RowsEvent : public EventBase
     {
     public:
@@ -438,9 +454,11 @@ namespace MySQLReplication
         String table;
         std::vector<Field> rows;
 
-        RowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_)
-            : EventBase(std::move(header_)), number_columns(0), table_id(0), flags(0), extra_data_len(0), table_map(table_map_)
+        RowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_, const RowsEventHeader & rows_header)
+            : EventBase(std::move(header_)), number_columns(0), table_map(table_map_)
         {
+            table_id = rows_header.table_id;
+            flags = rows_header.flags;
             schema = table_map->schema;
             table = table_map->table;
         }
@@ -450,7 +468,6 @@ namespace MySQLReplication
     protected:
         UInt64 table_id;
         UInt16 flags;
-        UInt16 extra_data_len;
         Bitmap columns_present_bitmap1;
         Bitmap columns_present_bitmap2;
 
@@ -464,21 +481,24 @@ namespace MySQLReplication
     class WriteRowsEvent : public RowsEvent
     {
     public:
-        WriteRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_) : RowsEvent(table_map_, std::move(header_)) {}
+        WriteRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_, const RowsEventHeader & rows_header)
+            : RowsEvent(table_map_, std::move(header_), rows_header) {}
         MySQLEventType type() const override { return MYSQL_WRITE_ROWS_EVENT; }
     };
 
     class DeleteRowsEvent : public RowsEvent
     {
     public:
-        DeleteRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_) : RowsEvent(table_map_, std::move(header_)) {}
+        DeleteRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_, const RowsEventHeader & rows_header)
+            : RowsEvent(table_map_, std::move(header_), rows_header) {}
         MySQLEventType type() const override { return MYSQL_DELETE_ROWS_EVENT; }
     };
 
     class UpdateRowsEvent : public RowsEvent
     {
     public:
-        UpdateRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_) : RowsEvent(table_map_, std::move(header_)) {}
+        UpdateRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_, const RowsEventHeader & rows_header)
+            : RowsEvent(table_map_, std::move(header_), rows_header) {}
         MySQLEventType type() const override { return MYSQL_UPDATE_ROWS_EVENT; }
     };
 
@@ -546,10 +566,10 @@ namespace MySQLReplication
         Position position;
         BinlogEventPtr event;
         String replicate_do_db;
-        std::shared_ptr<TableMapEvent> table_map;
+        std::map<UInt64, std::shared_ptr<TableMapEvent> > table_maps;
         size_t checksum_signature_length = 4;
 
-        inline bool doReplicate() { return (replicate_do_db.empty() || table_map->schema == replicate_do_db); }
+        bool doReplicate(UInt64 table_id);
     };
 }
 
diff --git a/src/Core/MySQL/PacketsProtocolText.cpp b/src/Core/MySQL/PacketsProtocolText.cpp
index ad34cd8c28d..62efe549b33 100644
--- a/src/Core/MySQL/PacketsProtocolText.cpp
+++ b/src/Core/MySQL/PacketsProtocolText.cpp
@@ -62,10 +62,10 @@ ColumnDefinition::ColumnDefinition()
 
 ColumnDefinition::ColumnDefinition(
     String schema_, String table_, String org_table_, String name_, String org_name_, uint16_t character_set_, uint32_t column_length_,
-    ColumnType column_type_, uint16_t flags_, uint8_t decimals_)
+    ColumnType column_type_, uint16_t flags_, uint8_t decimals_, bool with_defaults_)
     : schema(std::move(schema_)), table(std::move(table_)), org_table(std::move(org_table_)), name(std::move(name_)),
       org_name(std::move(org_name_)), character_set(character_set_), column_length(column_length_), column_type(column_type_),
-      flags(flags_), decimals(decimals_)
+      flags(flags_), decimals(decimals_), is_comm_field_list_response(with_defaults_)
 {
 }
 
@@ -77,8 +77,15 @@ ColumnDefinition::ColumnDefinition(
 
 size_t ColumnDefinition::getPayloadSize() const
 {
-    return 12 + getLengthEncodedStringSize("def") + getLengthEncodedStringSize(schema) + getLengthEncodedStringSize(table) + getLengthEncodedStringSize(org_table) + \
-            getLengthEncodedStringSize(name) + getLengthEncodedStringSize(org_name) + getLengthEncodedNumberSize(next_length);
+    return 12 +
+           getLengthEncodedStringSize("def") +
+           getLengthEncodedStringSize(schema) +
+           getLengthEncodedStringSize(table) +
+           getLengthEncodedStringSize(org_table) +
+           getLengthEncodedStringSize(name) +
+           getLengthEncodedStringSize(org_name) +
+           getLengthEncodedNumberSize(next_length) +
+           is_comm_field_list_response;
 }
 
 void ColumnDefinition::readPayloadImpl(ReadBuffer & payload)
@@ -115,6 +122,13 @@ void ColumnDefinition::writePayloadImpl(WriteBuffer & buffer) const
     buffer.write(reinterpret_cast<const char *>(&flags), 2);
     buffer.write(reinterpret_cast<const char *>(&decimals), 1);
     writeChar(0x0, 2, buffer);
+    if (is_comm_field_list_response)
+    {
+        /// We should write length encoded int with string size
+        /// followed by string with some "default values" (possibly it's column defaults).
+        /// But we just send NULL for simplicity.
+        writeChar(0xfb, buffer);
+    }
 }
 
 ColumnDefinition getColumnDefinition(const String & column_name, const TypeIndex type_index)
diff --git a/src/Core/MySQL/PacketsProtocolText.h b/src/Core/MySQL/PacketsProtocolText.h
index d449e94cff1..b54b1c5ca19 100644
--- a/src/Core/MySQL/PacketsProtocolText.h
+++ b/src/Core/MySQL/PacketsProtocolText.h
@@ -101,6 +101,9 @@ public:
     ColumnType column_type;
     uint16_t flags;
     uint8_t decimals = 0x00;
+    /// https://dev.mysql.com/doc/internals/en/com-query-response.html#column-definition
+    /// There are extra fields in the packet for column defaults
+    bool is_comm_field_list_response = false;
 
 protected:
     size_t getPayloadSize() const override;
@@ -114,7 +117,7 @@ public:
 
     ColumnDefinition(
         String schema_, String table_, String org_table_, String name_, String org_name_, uint16_t character_set_, uint32_t column_length_,
-        ColumnType column_type_, uint16_t flags_, uint8_t decimals_);
+        ColumnType column_type_, uint16_t flags_, uint8_t decimals_, bool with_defaults_ = false);
 
     /// Should be used when column metadata (original name, table, original table, database) is unknown.
     ColumnDefinition(
diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h
index f383e509751..df51a0cb61a 100644
--- a/src/Core/Protocol.h
+++ b/src/Core/Protocol.h
@@ -75,8 +75,9 @@ namespace Protocol
             TablesStatusResponse = 9, /// A response to TablesStatus request.
             Log = 10,                 /// System logs of the query execution
             TableColumns = 11,        /// Columns' description for default values calculation
+            PartUUIDs = 12,           /// List of unique parts ids.
 
-            MAX = TableColumns,
+            MAX = PartUUIDs,
         };
 
         /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10
@@ -98,6 +99,7 @@ namespace Protocol
                 "TablesStatusResponse",
                 "Log",
                 "TableColumns",
+                "PartUUIDs",
             };
             return packet <= MAX
                 ? data[packet]
@@ -132,8 +134,9 @@ namespace Protocol
             TablesStatusRequest = 5, /// Check status of tables on the server.
             KeepAlive = 6,           /// Keep the connection alive
             Scalar = 7,              /// A block of data (compressed or not).
+            IgnoredPartUUIDs = 8,    /// List of unique parts ids to exclude from query processing
 
-            MAX = Scalar,
+            MAX = IgnoredPartUUIDs,
         };
 
         inline const char * toString(UInt64 packet)
@@ -147,6 +150,7 @@ namespace Protocol
                 "TablesStatusRequest",
                 "KeepAlive",
                 "Scalar",
+                "IgnoredPartUUIDs",
             };
             return packet <= MAX
                 ? data[packet]
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index a0cef8867a7..1de89aa6047 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -69,7 +69,7 @@ class IColumn;
     M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
     M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
     M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
-    M(Bool, use_uncompressed_cache, true, "Whether to use the cache of uncompressed blocks.", 0) \
+    M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \
     M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \
     M(UInt64, background_buffer_flush_schedule_pool_size, 16, "Number of threads performing background flush for tables with Buffer engine. Only has meaning at server startup.", 0) \
     M(UInt64, background_pool_size, 16, "Number of threads performing background work for tables (for example, merging in merge tree). Only has meaning at server startup.", 0) \
@@ -86,8 +86,6 @@ class IColumn;
     \
     M(Bool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \
     \
-    M(Milliseconds, insert_in_memory_parts_timeout, 600000, "", 0) \
-    \
     M(UInt64, replication_alter_partitions_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) \
     M(UInt64, replication_alter_columns_timeout, 60, "Wait for actions to change the table structure within the specified number of seconds. 0 - wait unlimited time.", 0) \
     \
@@ -102,7 +100,7 @@ class IColumn;
     M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
     M(UInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.", 0) \
     M(UInt64, group_by_two_level_threshold_bytes, 100000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.", 0) \
-    M(Bool, distributed_aggregation_memory_efficient, false, "Is the memory-saving mode of distributed aggregation enabled.", 0) \
+    M(Bool, distributed_aggregation_memory_efficient, true, "Is the memory-saving mode of distributed aggregation enabled.", 0) \
     M(UInt64, aggregation_memory_efficient_merge_threads, 0, "Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is consumed. 0 means - same as 'max_threads'.", 0) \
     \
     M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \
@@ -139,6 +137,7 @@ class IColumn;
     \
     M(UInt64, min_bytes_to_use_direct_io, 0, "The minimum number of bytes for reading the data with O_DIRECT option during SELECT queries execution. 0 - disabled.", 0) \
     M(UInt64, min_bytes_to_use_mmap_io, 0, "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
+    M(Bool, checksum_on_read, true, "Validate checksums on reading. It is enabled by default and should be always enabled in production. Please do not expect any benefits in disabling this setting. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network.", 0) \
     \
     M(Bool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
     M(Bool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
@@ -376,6 +375,7 @@ class IColumn;
     M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
     M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
     M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
+    M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
     M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
     M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
     M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
@@ -383,6 +383,7 @@ class IColumn;
     M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
     M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
     M(Bool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \
+    M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
     M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
     M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
     M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
@@ -390,6 +391,7 @@ class IColumn;
     M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
     M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
     M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \
+    M(Seconds, periodic_live_view_refresh, DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC, "Interval after which periodically refreshed live view is forced to refresh.", 0) \
     M(Bool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \
     M(Bool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \
     M(Seconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \
@@ -418,6 +420,14 @@ class IColumn;
     M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
     \
     M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
+    M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
+    M(Bool, allow_experimental_query_deduplication, false, "Allow sending parts' UUIDs for a query in order to deduplicate data parts if any", 0) \
+    M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \
+    M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \
+    M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
+    M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
+    M(Bool, database_replicated_ddl_output, true, "Return table with query execution status as a result of DDL query", 0) \
+    \
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
     M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h
index ccbccd7105a..2d6134ab9fc 100644
--- a/src/Core/callOnTypeIndex.h
+++ b/src/Core/callOnTypeIndex.h
@@ -207,4 +207,22 @@ bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args)
     return false;
 }
 
+template <typename F>
+static bool callOnTwoTypeIndexes(TypeIndex left_type, TypeIndex right_type, F && func)
+{
+    return callOnIndexAndDataType<void>(left_type, [&](const auto & left_types) -> bool
+    {
+        using LeftTypes = std::decay_t<decltype(left_types)>;
+        using LeftType = typename LeftTypes::LeftType;
+
+        return callOnIndexAndDataType<void>(right_type, [&](const auto & right_types) -> bool
+        {
+            using RightTypes = std::decay_t<decltype(right_types)>;
+            using RightType = typename RightTypes::LeftType;
+
+            return std::forward<F>(func)(TypePair<LeftType, RightType>());
+        });
+    });
+}
+
 }
diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in
index 6c7a35abd7c..666ef32efdf 100644
--- a/src/Core/config_core.h.in
+++ b/src/Core/config_core.h.in
@@ -13,3 +13,4 @@
 #cmakedefine01 USE_LDAP
 #cmakedefine01 USE_ROCKSDB
 #cmakedefine01 USE_LIBPQXX
+#cmakedefine01 USE_NURAFT
diff --git a/src/DataStreams/AddingDefaultBlockOutputStream.cpp b/src/DataStreams/AddingDefaultBlockOutputStream.cpp
index 74300a371fb..db1542801d6 100644
--- a/src/DataStreams/AddingDefaultBlockOutputStream.cpp
+++ b/src/DataStreams/AddingDefaultBlockOutputStream.cpp
@@ -1,13 +1,27 @@
 #include <DataStreams/AddingDefaultBlockOutputStream.h>
 #include <Interpreters/addMissingDefaults.h>
+#include <Interpreters/ExpressionActions.h>
 
 
 namespace DB
 {
 
+AddingDefaultBlockOutputStream::AddingDefaultBlockOutputStream(
+    const BlockOutputStreamPtr & output_,
+    const Block & header_,
+    const ColumnsDescription & columns_,
+    const Context & context_)
+    : output(output_), header(header_)
+{
+    auto dag = addMissingDefaults(header_, output->getHeader().getNamesAndTypesList(), columns_, context_);
+    adding_defaults_actions = std::make_shared<ExpressionActions>(std::move(dag));
+}
+
 void AddingDefaultBlockOutputStream::write(const Block & block)
 {
-    output->write(addMissingDefaults(block, output_block.getNamesAndTypesList(), columns, context));
+    auto copy = block;
+    adding_defaults_actions->execute(copy);
+    output->write(copy);
 }
 
 void AddingDefaultBlockOutputStream::flush()
diff --git a/src/DataStreams/AddingDefaultBlockOutputStream.h b/src/DataStreams/AddingDefaultBlockOutputStream.h
index 5b46c533f7f..5fbbe2aed60 100644
--- a/src/DataStreams/AddingDefaultBlockOutputStream.h
+++ b/src/DataStreams/AddingDefaultBlockOutputStream.h
@@ -8,6 +8,9 @@
 namespace DB
 {
 
+class ExpressionActions;
+using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
+
 class Context;
 
 /** This stream adds three types of columns into block
@@ -22,13 +25,8 @@ public:
     AddingDefaultBlockOutputStream(
         const BlockOutputStreamPtr & output_,
         const Block & header_,
-        const Block & output_block_,
         const ColumnsDescription & columns_,
-        const Context & context_)
-        : output(output_), header(header_), output_block(output_block_),
-          columns(columns_), context(context_)
-    {
-    }
+        const Context & context_);
 
     Block getHeader() const override { return header; }
     void write(const Block & block) override;
@@ -41,10 +39,7 @@ public:
 private:
     BlockOutputStreamPtr output;
     const Block header;
-    /// Blocks after this stream should have this structure
-    const Block output_block;
-    const ColumnsDescription columns;
-    const Context & context;
+    ExpressionActionsPtr adding_defaults_actions;
 };
 
 
diff --git a/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/src/DataStreams/AddingDefaultsBlockInputStream.cpp
index 160d1b4fb76..4b8dcff1870 100644
--- a/src/DataStreams/AddingDefaultsBlockInputStream.cpp
+++ b/src/DataStreams/AddingDefaultsBlockInputStream.cpp
@@ -171,7 +171,12 @@ Block AddingDefaultsBlockInputStream::readImpl()
     if (!evaluate_block.columns())
         evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), res.rows()), std::make_shared<DataTypeUInt8>(), "_dummy"});
 
-    evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
+    auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
+    if (dag)
+    {
+        auto actions = std::make_shared<ExpressionActions>(std::move(dag));
+        actions->execute(evaluate_block);
+    }
 
     std::unordered_map<size_t, MutableColumnPtr> mixed_columns;
 
diff --git a/src/DataStreams/IBlockOutputStream.h b/src/DataStreams/IBlockOutputStream.h
index 4cc1257e955..79c13b6fa47 100644
--- a/src/DataStreams/IBlockOutputStream.h
+++ b/src/DataStreams/IBlockOutputStream.h
@@ -57,7 +57,7 @@ public:
       */
     virtual std::string getContentType() const { return "text/plain; charset=UTF-8"; }
 
-    virtual ~IBlockOutputStream() {}
+    virtual ~IBlockOutputStream() = default;
 
     /** Don't let to alter table while instance of stream is alive.
       */
diff --git a/src/DataStreams/ITTLAlgorithm.cpp b/src/DataStreams/ITTLAlgorithm.cpp
new file mode 100644
index 00000000000..7513e0c6ce0
--- /dev/null
+++ b/src/DataStreams/ITTLAlgorithm.cpp
@@ -0,0 +1,65 @@
+#include <DataStreams/ITTLAlgorithm.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnConst.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+ITTLAlgorithm::ITTLAlgorithm(
+    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : description(description_)
+    , old_ttl_info(old_ttl_info_)
+    , current_time(current_time_)
+    , force(force_)
+    , date_lut(DateLUT::instance())
+{
+}
+
+bool ITTLAlgorithm::isTTLExpired(time_t ttl) const
+{
+    return (ttl && (ttl <= current_time));
+}
+
+ColumnPtr ITTLAlgorithm::executeExpressionAndGetColumn(
+    const ExpressionActionsPtr & expression, const Block & block, const String & result_column)
+{
+    if (!expression)
+        return nullptr;
+
+    if (block.has(result_column))
+        return block.getByName(result_column).column;
+
+    Block block_copy;
+    for (const auto & column_name : expression->getRequiredColumns())
+        block_copy.insert(block.getByName(column_name));
+
+    /// Keep number of rows for const expression.
+    size_t num_rows = block.rows();
+    expression->execute(block_copy, num_rows);
+
+    return block_copy.getByName(result_column).column;
+}
+
+UInt32 ITTLAlgorithm::getTimestampByIndex(const IColumn * column, size_t index) const
+{
+    if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
+        return date_lut.fromDayNum(DayNum(column_date->getData()[index]));
+    else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
+        return column_date_time->getData()[index];
+    else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(column))
+    {
+        if (typeid_cast<const ColumnUInt16 *>(&column_const->getDataColumn()))
+            return date_lut.fromDayNum(DayNum(column_const->getValue<UInt16>()));
+        else if (typeid_cast<const ColumnUInt32 *>(&column_const->getDataColumn()))
+            return column_const->getValue<UInt32>();
+    }
+
+    throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR);
+}
+
+}
diff --git a/src/DataStreams/ITTLAlgorithm.h b/src/DataStreams/ITTLAlgorithm.h
new file mode 100644
index 00000000000..d87d43d8c0c
--- /dev/null
+++ b/src/DataStreams/ITTLAlgorithm.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#include <Storages/TTLDescription.h>
+#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
+#include <Storages/MergeTree/IMergeTreeDataPart.h>
+#include <common/DateLUT.h>
+
+namespace DB
+{
+
+/**
+ * Represents the actions, which are required to do
+ * with data, when TTL is expired: delete, aggregate, etc.
+ */
+class ITTLAlgorithm
+{
+public:
+    using TTLInfo = IMergeTreeDataPart::TTLInfo;
+    using MutableDataPartPtr = MergeTreeMutableDataPartPtr;
+
+    ITTLAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+    virtual ~ITTLAlgorithm() = default;
+
+    virtual void execute(Block & block) = 0;
+
+    /// Updates TTL metadata of the data_part.
+    virtual void finalize(const MutableDataPartPtr & data_part) const = 0;
+
+    bool isMinTTLExpired() const { return force || isTTLExpired(old_ttl_info.min); }
+    bool isMaxTTLExpired() const { return isTTLExpired(old_ttl_info.max); }
+
+    /** This function is needed to avoid a conflict between already calculated columns and columns that needed to execute TTL.
+      * If result column is absent in block, all required columns are copied to new block and expression is executed on new block.
+      */
+    static ColumnPtr executeExpressionAndGetColumn(
+        const ExpressionActionsPtr & expression, const Block & block, const String & result_column);
+
+protected:
+    bool isTTLExpired(time_t ttl) const;
+    UInt32 getTimestampByIndex(const IColumn * column, size_t index) const;
+
+    const TTLDescription description;
+    const TTLInfo old_ttl_info;
+    const time_t current_time;
+    const bool force;
+    TTLInfo new_ttl_info;
+
+private:
+    const DateLUTImpl & date_lut;
+};
+
+using TTLAlgorithmPtr = std::unique_ptr<ITTLAlgorithm>;
+
+}
diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp
index a6e0dcd7356..4d1990ffe18 100644
--- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp
+++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp
@@ -121,7 +121,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
             out = std::make_shared<PushingToViewsBlockOutputStream>(
                 dependent_table, dependent_metadata_snapshot, *insert_context, ASTPtr());
 
-        views.emplace_back(ViewInfo{std::move(query), database_table, std::move(out), nullptr});
+        views.emplace_back(ViewInfo{std::move(query), database_table, std::move(out), nullptr, 0 /* elapsed_ms */});
     }
 
     /// Do not push to destination table if the flag is set
@@ -146,8 +146,6 @@ Block PushingToViewsBlockOutputStream::getHeader() const
 
 void PushingToViewsBlockOutputStream::write(const Block & block)
 {
-    Stopwatch watch;
-
     /** Throw an exception if the sizes of arrays - elements of nested data structures doesn't match.
       * We have to make this assertion before writing to table, because storage engine may assume that they have equal sizes.
       * NOTE It'd better to do this check in serialization of nested structures (in place when this assumption is required),
@@ -177,15 +175,15 @@ void PushingToViewsBlockOutputStream::write(const Block & block)
     {
         // Push to views concurrently if enabled and more than one view is attached
         ThreadPool pool(std::min(size_t(settings.max_threads), views.size()));
-        for (size_t view_num = 0; view_num < views.size(); ++view_num)
+        for (auto & view : views)
         {
             auto thread_group = CurrentThread::getGroup();
-            pool.scheduleOrThrowOnError([=, this]
+            pool.scheduleOrThrowOnError([=, &view, this]
             {
                 setThreadName("PushingToViews");
                 if (thread_group)
                     CurrentThread::attachToIfDetached(thread_group);
-                process(block, view_num);
+                process(block, view);
             });
         }
         // Wait for concurrent view processing
@@ -194,22 +192,14 @@ void PushingToViewsBlockOutputStream::write(const Block & block)
     else
     {
         // Process sequentially
-        for (size_t view_num = 0; view_num < views.size(); ++view_num)
+        for (auto & view : views)
         {
-            process(block, view_num);
+            process(block, view);
 
-            if (views[view_num].exception)
-                std::rethrow_exception(views[view_num].exception);
+            if (view.exception)
+                std::rethrow_exception(view.exception);
         }
     }
-
-    UInt64 milliseconds = watch.elapsedMilliseconds();
-    if (views.size() > 1)
-    {
-        LOG_TRACE(log, "Pushing from {} to {} views took {} ms.",
-            storage->getStorageID().getNameForLogs(), views.size(),
-            milliseconds);
-    }
 }
 
 void PushingToViewsBlockOutputStream::writePrefix()
@@ -257,12 +247,13 @@ void PushingToViewsBlockOutputStream::writeSuffix()
             if (view.exception)
                 continue;
 
-            pool.scheduleOrThrowOnError([thread_group, &view]
+            pool.scheduleOrThrowOnError([thread_group, &view, this]
             {
                 setThreadName("PushingToViews");
                 if (thread_group)
                     CurrentThread::attachToIfDetached(thread_group);
 
+                Stopwatch watch;
                 try
                 {
                     view.out->writeSuffix();
@@ -271,6 +262,12 @@ void PushingToViewsBlockOutputStream::writeSuffix()
                 {
                     view.exception = std::current_exception();
                 }
+                view.elapsed_ms += watch.elapsedMilliseconds();
+
+                LOG_TRACE(log, "Pushing from {} to {} took {} ms.",
+                    storage->getStorageID().getNameForLogs(),
+                    view.table_id.getNameForLogs(),
+                    view.elapsed_ms);
             });
         }
         // Wait for concurrent view processing
@@ -290,6 +287,7 @@ void PushingToViewsBlockOutputStream::writeSuffix()
         if (parallel_processing)
             continue;
 
+        Stopwatch watch;
         try
         {
             view.out->writeSuffix();
@@ -299,10 +297,24 @@ void PushingToViewsBlockOutputStream::writeSuffix()
             ex.addMessage("while write prefix to view " + view.table_id.getNameForLogs());
             throw;
         }
+        view.elapsed_ms += watch.elapsedMilliseconds();
+
+        LOG_TRACE(log, "Pushing from {} to {} took {} ms.",
+            storage->getStorageID().getNameForLogs(),
+            view.table_id.getNameForLogs(),
+            view.elapsed_ms);
     }
 
     if (first_exception)
         std::rethrow_exception(first_exception);
+
+    UInt64 milliseconds = main_watch.elapsedMilliseconds();
+    if (views.size() > 1)
+    {
+        LOG_TRACE(log, "Pushing from {} to {} views took {} ms.",
+            storage->getStorageID().getNameForLogs(), views.size(),
+            milliseconds);
+    }
 }
 
 void PushingToViewsBlockOutputStream::flush()
@@ -314,10 +326,9 @@ void PushingToViewsBlockOutputStream::flush()
         view.out->flush();
 }
 
-void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_num)
+void PushingToViewsBlockOutputStream::process(const Block & block, ViewInfo & view)
 {
     Stopwatch watch;
-    auto & view = views[view_num];
 
     try
     {
@@ -379,11 +390,7 @@ void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_n
         view.exception = std::current_exception();
     }
 
-    UInt64 milliseconds = watch.elapsedMilliseconds();
-    LOG_TRACE(log, "Pushing from {} to {} took {} ms.",
-        storage->getStorageID().getNameForLogs(),
-        view.table_id.getNameForLogs(),
-        milliseconds);
+    view.elapsed_ms += watch.elapsedMilliseconds();
 }
 
 }
diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.h b/src/DataStreams/PushingToViewsBlockOutputStream.h
index 30a223d26a3..6b32607b294 100644
--- a/src/DataStreams/PushingToViewsBlockOutputStream.h
+++ b/src/DataStreams/PushingToViewsBlockOutputStream.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <DataStreams/IBlockOutputStream.h>
+#include <Common/Stopwatch.h>
 #include <Parsers/IAST_fwd.h>
 #include <Storages/IStorage.h>
 
@@ -44,6 +45,7 @@ private:
 
     const Context & context;
     ASTPtr query_ptr;
+    Stopwatch main_watch;
 
     struct ViewInfo
     {
@@ -51,13 +53,14 @@ private:
         StorageID table_id;
         BlockOutputStreamPtr out;
         std::exception_ptr exception;
+        UInt64 elapsed_ms = 0;
     };
 
     std::vector<ViewInfo> views;
     std::unique_ptr<Context> select_context;
     std::unique_ptr<Context> insert_context;
 
-    void process(const Block & block, size_t view_num);
+    void process(const Block & block, ViewInfo & view);
 };
 
 
diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp
index 14e51ffefdf..fc3870b3f22 100644
--- a/src/DataStreams/RemoteQueryExecutor.cpp
+++ b/src/DataStreams/RemoteQueryExecutor.cpp
@@ -13,6 +13,7 @@
 #include <Interpreters/InternalTextLogsQueue.h>
 #include <IO/ConnectionTimeoutsContext.h>
 #include <Common/FiberStack.h>
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
 
 namespace DB
 {
@@ -20,6 +21,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int UNKNOWN_PACKET_FROM_SERVER;
+    extern const int DUPLICATED_PART_UUIDS;
 }
 
 RemoteQueryExecutor::RemoteQueryExecutor(
@@ -158,6 +160,7 @@ void RemoteQueryExecutor::sendQuery()
     std::lock_guard guard(was_cancelled_mutex);
 
     established = true;
+    was_cancelled = false;
 
     auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings);
     ClientInfo modified_client_info = context.getClientInfo();
@@ -167,6 +170,12 @@ void RemoteQueryExecutor::sendQuery()
         modified_client_info.client_trace_context = CurrentThread::get().thread_trace_context;
     }
 
+    {
+        std::lock_guard lock(duplicated_part_uuids_mutex);
+        if (!duplicated_part_uuids.empty())
+            multiplexed_connections->sendIgnoredPartUUIDs(duplicated_part_uuids);
+    }
+
     multiplexed_connections->sendQuery(timeouts, query, query_id, stage, modified_client_info, true);
 
     established = false;
@@ -196,6 +205,8 @@ Block RemoteQueryExecutor::read()
 
         if (auto block = processPacket(std::move(packet)))
             return *block;
+        else if (got_duplicated_part_uuids)
+            return std::get<Block>(restartQueryWithoutDuplicatedUUIDs());
     }
 }
 
@@ -211,7 +222,7 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
             return Block();
     }
 
-    if (!read_context)
+    if (!read_context || resent_query)
     {
         std::lock_guard lock(was_cancelled_mutex);
         if (was_cancelled)
@@ -234,6 +245,8 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
         {
             if (auto data = processPacket(std::move(read_context->packet)))
                 return std::move(*data);
+            else if (got_duplicated_part_uuids)
+                return restartQueryWithoutDuplicatedUUIDs(&read_context);
         }
     }
     while (true);
@@ -242,10 +255,39 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
 #endif
 }
 
+
+std::variant<Block, int> RemoteQueryExecutor::restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context)
+{
+    /// Cancel previous query and disconnect before retry.
+    cancel(read_context);
+    multiplexed_connections->disconnect();
+
+    /// Only resend once, otherwise throw an exception
+    if (!resent_query)
+    {
+        if (log)
+            LOG_DEBUG(log, "Found duplicate UUIDs, will retry query without those parts");
+
+        resent_query = true;
+        sent_query = false;
+        got_duplicated_part_uuids = false;
+        /// Consecutive read will implicitly send query first.
+        if (!read_context)
+            return read();
+        else
+            return read(*read_context);
+    }
+    throw Exception("Found duplicate uuids while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
+}
+
 std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
 {
     switch (packet.type)
     {
+        case Protocol::Server::PartUUIDs:
+            if (!setPartUUIDs(packet.part_uuids))
+                got_duplicated_part_uuids = true;
+            break;
         case Protocol::Server::Data:
             /// If the block is not empty and is not a header block
             if (packet.block && (packet.block.rows() > 0))
@@ -306,6 +348,20 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
     return {};
 }
 
+bool RemoteQueryExecutor::setPartUUIDs(const std::vector<UUID> & uuids)
+{
+    Context & query_context = const_cast<Context &>(context).getQueryContext();
+    auto duplicates = query_context.getPartUUIDs()->add(uuids);
+
+    if (!duplicates.empty())
+    {
+        std::lock_guard lock(duplicated_part_uuids_mutex);
+        duplicated_part_uuids.insert(duplicated_part_uuids.begin(), duplicates.begin(), duplicates.end());
+        return false;
+    }
+    return true;
+}
+
 void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
 {
     /** If one of:
@@ -383,6 +439,7 @@ void RemoteQueryExecutor::sendExternalTables()
     {
         std::lock_guard lock(external_tables_mutex);
 
+        external_tables_data.clear();
         external_tables_data.reserve(count);
 
         for (size_t i = 0; i < count; ++i)
diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/DataStreams/RemoteQueryExecutor.h
index 46d9d067563..6a10627b948 100644
--- a/src/DataStreams/RemoteQueryExecutor.h
+++ b/src/DataStreams/RemoteQueryExecutor.h
@@ -57,6 +57,9 @@ public:
     /// Create connection and send query, external tables and scalars.
     void sendQuery();
 
+    /// Query is resent to a replica, the query itself can be modified.
+    std::atomic<bool> resent_query { false };
+
     /// Read next block of data. Returns empty block if query is finished.
     Block read();
 
@@ -152,6 +155,14 @@ private:
       */
     std::atomic<bool> got_unknown_packet_from_replica { false };
 
+    /** Got duplicated uuids from replica
+      */
+    std::atomic<bool> got_duplicated_part_uuids{ false };
+
+    /// Parts uuids, collected from remote replicas
+    std::mutex duplicated_part_uuids_mutex;
+    std::vector<UUID> duplicated_part_uuids;
+
     PoolMode pool_mode = PoolMode::GET_MANY;
     StorageID main_table = StorageID::createEmpty();
 
@@ -163,6 +174,14 @@ private:
     /// Send all temporary tables to remote servers
     void sendExternalTables();
 
+    /// Set part uuids to a query context, collected from remote replicas.
+    /// Return true if duplicates found.
+    bool setPartUUIDs(const std::vector<UUID> & uuids);
+
+    /// Cancell query and restart it with info about duplicated UUIDs
+    /// only for `allow_experimental_query_deduplication`.
+    std::variant<Block, int> restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context = nullptr);
+
     /// If wasn't sent yet, send request to cancel all connections to replicas
     void tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context);
 
@@ -174,6 +193,10 @@ private:
 
     /// Process packet for read and return data block if possible.
     std::optional<Block> processPacket(Packet packet);
+
+    /// Reads packet by packet
+    Block readPackets();
+
 };
 
 }
diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.cpp b/src/DataStreams/RemoteQueryExecutorReadContext.cpp
index bc47b049407..3cc24ad5056 100644
--- a/src/DataStreams/RemoteQueryExecutorReadContext.cpp
+++ b/src/DataStreams/RemoteQueryExecutorReadContext.cpp
@@ -146,9 +146,13 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl() const
     events[0].data.fd = events[1].data.fd = events[2].data.fd = -1;
 
     /// Wait for epoll_fd will not block if it was polled externally.
-    int num_events = epoll_wait(epoll_fd, events, 3, 0);
-    if (num_events == -1)
-        throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+    int num_events = 0;
+    while (num_events <= 0)
+    {
+        num_events = epoll_wait(epoll_fd, events, 3, -1);
+        if (num_events == -1 && errno != EINTR)
+            throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+    }
 
     bool is_socket_ready = false;
     bool is_pipe_alarmed = false;
diff --git a/src/DataStreams/TTLAggregationAlgorithm.cpp b/src/DataStreams/TTLAggregationAlgorithm.cpp
new file mode 100644
index 00000000000..ebe08159c55
--- /dev/null
+++ b/src/DataStreams/TTLAggregationAlgorithm.cpp
@@ -0,0 +1,173 @@
+#include <DataStreams/TTLAggregationAlgorithm.h>
+
+namespace DB
+{
+
+TTLAggregationAlgorithm::TTLAggregationAlgorithm(
+    const TTLDescription & description_,
+    const TTLInfo & old_ttl_info_,
+    time_t current_time_,
+    bool force_,
+    const Block & header_,
+    const MergeTreeData & storage_)
+    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+    , header(header_)
+{
+    current_key_value.resize(description.group_by_keys.size());
+
+    ColumnNumbers keys;
+    for (const auto & key : description.group_by_keys)
+        keys.push_back(header.getPositionByName(key));
+
+    key_columns.resize(description.group_by_keys.size());
+    AggregateDescriptions aggregates = description.aggregate_descriptions;
+
+    for (auto & descr : aggregates)
+        if (descr.arguments.empty())
+            for (const auto & name : descr.argument_names)
+                descr.arguments.push_back(header.getPositionByName(name));
+
+    columns_for_aggregator.resize(description.aggregate_descriptions.size());
+    const Settings & settings = storage_.global_context.getSettingsRef();
+
+    Aggregator::Params params(header, keys, aggregates,
+        false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, 0, 0,
+        settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
+        storage_.global_context.getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
+
+    aggregator = std::make_unique<Aggregator>(params);
+}
+
+void TTLAggregationAlgorithm::execute(Block & block)
+{
+    if (!block)
+    {
+        if (!aggregation_result.empty())
+        {
+            MutableColumns result_columns = header.cloneEmptyColumns();
+            finalizeAggregates(result_columns);
+            block = header.cloneWithColumns(std::move(result_columns));
+        }
+
+        return;
+    }
+
+    const auto & column_names = header.getNames();
+    MutableColumns result_columns = header.cloneEmptyColumns();
+    MutableColumns aggregate_columns = header.cloneEmptyColumns();
+
+    auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
+    auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
+
+    size_t rows_aggregated = 0;
+    size_t current_key_start = 0;
+    size_t rows_with_current_key = 0;
+
+    for (size_t i = 0; i < block.rows(); ++i)
+    {
+        UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
+        bool where_filter_passed = !where_column || where_column->getBool(i);
+        bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed;
+
+        bool same_as_current = true;
+        for (size_t j = 0; j < description.group_by_keys.size(); ++j)
+        {
+            const String & key_column = description.group_by_keys[j];
+            const IColumn * values_column = block.getByName(key_column).column.get();
+            if (!same_as_current || (*values_column)[i] != current_key_value[j])
+            {
+                values_column->get(i, current_key_value[j]);
+                same_as_current = false;
+            }
+        }
+
+        if (!same_as_current)
+        {
+            if (rows_with_current_key)
+                calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
+            finalizeAggregates(result_columns);
+
+            current_key_start = rows_aggregated;
+            rows_with_current_key = 0;
+        }
+
+        if (ttl_expired)
+        {
+            ++rows_with_current_key;
+            ++rows_aggregated;
+            for (const auto & name : column_names)
+            {
+                const IColumn * values_column = block.getByName(name).column.get();
+                auto & column = aggregate_columns[header.getPositionByName(name)];
+                column->insertFrom(*values_column, i);
+            }
+        }
+        else
+        {
+            new_ttl_info.update(cur_ttl);
+            for (const auto & name : column_names)
+            {
+                const IColumn * values_column = block.getByName(name).column.get();
+                auto & column = result_columns[header.getPositionByName(name)];
+                column->insertFrom(*values_column, i);
+            }
+        }
+    }
+
+    if (rows_with_current_key)
+        calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
+
+    block = header.cloneWithColumns(std::move(result_columns));
+}
+
+void TTLAggregationAlgorithm::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length)
+{
+    Columns aggregate_chunk;
+    aggregate_chunk.reserve(aggregate_columns.size());
+    for (const auto & name : header.getNames())
+    {
+        const auto & column = aggregate_columns[header.getPositionByName(name)];
+        ColumnPtr chunk_column = column->cut(start_pos, length);
+        aggregate_chunk.emplace_back(std::move(chunk_column));
+    }
+
+    aggregator->executeOnBlock(aggregate_chunk, length, aggregation_result, key_columns,
+                               columns_for_aggregator, no_more_keys);
+}
+
+void TTLAggregationAlgorithm::finalizeAggregates(MutableColumns & result_columns)
+{
+    if (!aggregation_result.empty())
+    {
+        auto aggregated_res = aggregator->convertToBlocks(aggregation_result, true, 1);
+        for (auto & agg_block : aggregated_res)
+        {
+            for (const auto & it : description.set_parts)
+                it.expression->execute(agg_block);
+
+            for (const auto & name : description.group_by_keys)
+            {
+                const IColumn * values_column = agg_block.getByName(name).column.get();
+                auto & result_column = result_columns[header.getPositionByName(name)];
+                result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
+            }
+
+            for (const auto & it : description.set_parts)
+            {
+                const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get();
+                auto & result_column = result_columns[header.getPositionByName(it.column_name)];
+                result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
+            }
+        }
+    }
+
+    aggregation_result.invalidate();
+}
+
+void TTLAggregationAlgorithm::finalize(const MutableDataPartPtr & data_part) const
+{
+    data_part->ttl_infos.group_by_ttl[description.result_column] = new_ttl_info;
+    data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
+}
+
+}
diff --git a/src/DataStreams/TTLAggregationAlgorithm.h b/src/DataStreams/TTLAggregationAlgorithm.h
new file mode 100644
index 00000000000..c2f40bab6b9
--- /dev/null
+++ b/src/DataStreams/TTLAggregationAlgorithm.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <DataStreams/ITTLAlgorithm.h>
+#include <Interpreters/Aggregator.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+
+namespace DB
+{
+
+/// Aggregates rows according to 'TTL expr GROUP BY key' description.
+/// Aggregation key must be the prefix of the sorting key.
+class TTLAggregationAlgorithm final : public ITTLAlgorithm
+{
+public:
+    TTLAggregationAlgorithm(
+        const TTLDescription & description_,
+        const TTLInfo & old_ttl_info_,
+        time_t current_time_,
+        bool force_,
+        const Block & header_,
+        const MergeTreeData & storage_);
+
+    void execute(Block & block) override;
+    void finalize(const MutableDataPartPtr & data_part) const override;
+
+private:
+    // Calculate aggregates of aggregate_columns into aggregation_result
+    void calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length);
+
+    /// Finalize aggregation_result into result_columns
+    void finalizeAggregates(MutableColumns & result_columns);
+
+    const Block header;
+    std::unique_ptr<Aggregator> aggregator;
+    Row current_key_value;
+    AggregatedDataVariants aggregation_result;
+    ColumnRawPtrs key_columns;
+    Aggregator::AggregateColumns columns_for_aggregator;
+    bool no_more_keys = false;
+};
+
+}
diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp
index 38479409f84..4f141a03475 100644
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@@ -8,15 +8,14 @@
 #include <Storages/TTLMode.h>
 #include <Interpreters/Context.h>
 
+#include <DataStreams/TTLDeleteAlgorithm.h>
+#include <DataStreams/TTLColumnAlgorithm.h>
+#include <DataStreams/TTLAggregationAlgorithm.h>
+#include <DataStreams/TTLUpdateInfoAlgorithm.h>
+
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-
 TTLBlockInputStream::TTLBlockInputStream(
     const BlockInputStreamPtr & input_,
     const MergeTreeData & storage_,
@@ -24,83 +23,69 @@ TTLBlockInputStream::TTLBlockInputStream(
     const MergeTreeData::MutableDataPartPtr & data_part_,
     time_t current_time_,
     bool force_)
-    : storage(storage_)
-    , metadata_snapshot(metadata_snapshot_)
-    , data_part(data_part_)
-    , current_time(current_time_)
-    , force(force_)
-    , old_ttl_infos(data_part->ttl_infos)
-    , log(&Poco::Logger::get(storage.getLogName() + " (TTLBlockInputStream)"))
-    , date_lut(DateLUT::instance())
+    : data_part(data_part_)
+    , log(&Poco::Logger::get(storage_.getLogName() + " (TTLBlockInputStream)"))
 {
     children.push_back(input_);
     header = children.at(0)->getHeader();
+    auto old_ttl_infos = data_part->ttl_infos;
 
-    const auto & storage_columns = metadata_snapshot->getColumns();
-    const auto & column_defaults = storage_columns.getDefaults();
-
-    ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
-    for (const auto & [name, _] : metadata_snapshot->getColumnTTLs())
+    if (metadata_snapshot_->hasRowsTTL())
     {
-        auto it = column_defaults.find(name);
-        if (it != column_defaults.end())
+        const auto & rows_ttl = metadata_snapshot_->getRowsTTL();
+        auto algorithm = std::make_unique<TTLDeleteAlgorithm>(
+            rows_ttl, old_ttl_infos.table_ttl, current_time_, force_);
+
+        /// Skip all data if table ttl is expired for part
+        if (algorithm->isMaxTTLExpired() && !rows_ttl.where_expression)
+            all_data_dropped = true;
+
+        delete_algorithm = algorithm.get();
+        algorithms.emplace_back(std::move(algorithm));
+    }
+
+    for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs())
+        algorithms.emplace_back(std::make_unique<TTLDeleteAlgorithm>(
+            where_ttl, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
+
+    for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
+        algorithms.emplace_back(std::make_unique<TTLAggregationAlgorithm>(
+            group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_, header, storage_));
+
+    if (metadata_snapshot_->hasAnyColumnTTL())
+    {
+        const auto & storage_columns = metadata_snapshot_->getColumns();
+        const auto & column_defaults = storage_columns.getDefaults();
+
+        for (const auto & [name, description] : metadata_snapshot_->getColumnTTLs())
         {
-            auto column = storage_columns.get(name);
-            auto expression = it->second.expression->clone();
-            default_expr_list->children.emplace_back(setAlias(addTypeConversionToAST(std::move(expression), column.type->getName()), it->first));
+            ExpressionActionsPtr default_expression;
+            String default_column_name;
+            auto it = column_defaults.find(name);
+            if (it != column_defaults.end())
+            {
+                const auto & column = storage_columns.get(name);
+                auto default_ast = it->second.expression->clone();
+                default_ast = addTypeConversionToAST(std::move(default_ast), column.type->getName());
+
+                auto syntax_result = TreeRewriter(storage_.global_context).analyze(default_ast, metadata_snapshot_->getColumns().getAllPhysical());
+                default_expression = ExpressionAnalyzer{default_ast, syntax_result, storage_.global_context}.getActions(true);
+                default_column_name = default_ast->getColumnName();
+            }
+
+            algorithms.emplace_back(std::make_unique<TTLColumnAlgorithm>(
+                description, old_ttl_infos.columns_ttl[name], current_time_,
+                force_, name, default_expression, default_column_name));
         }
     }
 
-    for (const auto & [name, ttl_info] : old_ttl_infos.columns_ttl)
-    {
-        if (force || isTTLExpired(ttl_info.min))
-        {
-            new_ttl_infos.columns_ttl.emplace(name, IMergeTreeDataPart::TTLInfo{});
-            empty_columns.emplace(name);
-        }
-        else
-            new_ttl_infos.columns_ttl.emplace(name, ttl_info);
-    }
+    for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs())
+        algorithms.emplace_back(std::make_unique<TTLMoveAlgorithm>(
+            move_ttl, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
 
-    if (!force && !isTTLExpired(old_ttl_infos.table_ttl.min))
-        new_ttl_infos.table_ttl = old_ttl_infos.table_ttl;
-
-    if (!default_expr_list->children.empty())
-    {
-        auto syntax_result = TreeRewriter(storage.global_context).analyze(default_expr_list, metadata_snapshot->getColumns().getAllPhysical());
-        defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true);
-    }
-
-    auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
-    if (metadata_snapshot->hasRowsTTL() && storage_rows_ttl.mode == TTLMode::GROUP_BY)
-    {
-        current_key_value.resize(storage_rows_ttl.group_by_keys.size());
-
-        ColumnNumbers keys;
-        for (const auto & key : storage_rows_ttl.group_by_keys)
-            keys.push_back(header.getPositionByName(key));
-        agg_key_columns.resize(storage_rows_ttl.group_by_keys.size());
-
-        AggregateDescriptions aggregates = storage_rows_ttl.aggregate_descriptions;
-        for (auto & descr : aggregates)
-            if (descr.arguments.empty())
-                for (const auto & name : descr.argument_names)
-                    descr.arguments.push_back(header.getPositionByName(name));
-        agg_aggregate_columns.resize(storage_rows_ttl.aggregate_descriptions.size());
-
-        const Settings & settings = storage.global_context.getSettingsRef();
-
-        Aggregator::Params params(header, keys, aggregates,
-            false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, 0, 0,
-            settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
-            storage.global_context.getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
-        aggregator = std::make_unique<Aggregator>(params);
-    }
-}
-
-bool TTLBlockInputStream::isTTLExpired(time_t ttl) const
-{
-    return (ttl && (ttl <= current_time));
+    for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs())
+        algorithms.emplace_back(std::make_unique<TTLRecompressionAlgorithm>(
+            recompression_ttl, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
 }
 
 Block reorderColumns(Block block, const Block & header)
@@ -114,321 +99,30 @@ Block reorderColumns(Block block, const Block & header)
 
 Block TTLBlockInputStream::readImpl()
 {
-    /// Skip all data if table ttl is expired for part
-    auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
-    if (metadata_snapshot->hasRowsTTL() && !storage_rows_ttl.where_expression && storage_rows_ttl.mode != TTLMode::GROUP_BY
-        && isTTLExpired(old_ttl_infos.table_ttl.max))
-    {
-        rows_removed = data_part->rows_count;
+    if (all_data_dropped)
         return {};
-    }
 
+    auto block = children.at(0)->read();
+    for (const auto & algorithm : algorithms)
+        algorithm->execute(block);
 
-    Block block = children.at(0)->read();
     if (!block)
-    {
-        if (aggregator && !agg_result.empty())
-        {
-            MutableColumns result_columns = header.cloneEmptyColumns();
-            finalizeAggregates(result_columns);
-            block = header.cloneWithColumns(std::move(result_columns));
-        }
-
         return block;
-    }
-
-    if (metadata_snapshot->hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min)))
-        removeRowsWithExpiredTableTTL(block);
-
-    removeValuesWithExpiredColumnTTL(block);
-
-    updateMovesTTL(block);
-    updateRecompressionTTL(block);
 
     return reorderColumns(std::move(block), header);
 }
 
 void TTLBlockInputStream::readSuffixImpl()
 {
-    for (const auto & elem : new_ttl_infos.columns_ttl)
-        new_ttl_infos.updatePartMinMaxTTL(elem.second.min, elem.second.max);
+    data_part->ttl_infos = {};
+    for (const auto & algorithm : algorithms)
+        algorithm->finalize(data_part);
 
-    new_ttl_infos.updatePartMinMaxTTL(new_ttl_infos.table_ttl.min, new_ttl_infos.table_ttl.max);
-
-    data_part->ttl_infos = std::move(new_ttl_infos);
-    data_part->expired_columns = std::move(empty_columns);
-
-    if (rows_removed)
+    if (delete_algorithm)
+    {
+        size_t rows_removed = all_data_dropped ? data_part->rows_count : delete_algorithm->getNumberOfRemovedRows();
         LOG_DEBUG(log, "Removed {} rows with expired TTL from part {}", rows_removed, data_part->name);
-}
-
-void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
-{
-    auto rows_ttl = metadata_snapshot->getRowsTTL();
-
-    rows_ttl.expression->execute(block);
-    if (rows_ttl.where_expression)
-        rows_ttl.where_expression->execute(block);
-
-    const IColumn * ttl_column =
-        block.getByName(rows_ttl.result_column).column.get();
-
-    const IColumn * where_result_column = rows_ttl.where_expression ?
-        block.getByName(rows_ttl.where_result_column).column.get() : nullptr;
-
-    const auto & column_names = header.getNames();
-
-    if (!aggregator)
-    {
-        MutableColumns result_columns;
-        result_columns.reserve(column_names.size());
-        for (auto it = column_names.begin(); it != column_names.end(); ++it)
-        {
-            const IColumn * values_column = block.getByName(*it).column.get();
-            MutableColumnPtr result_column = values_column->cloneEmpty();
-            result_column->reserve(block.rows());
-
-            for (size_t i = 0; i < block.rows(); ++i)
-            {
-                UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
-                bool where_filter_passed = !where_result_column || where_result_column->getBool(i);
-                if (!isTTLExpired(cur_ttl) || !where_filter_passed)
-                {
-                    new_ttl_infos.table_ttl.update(cur_ttl);
-                    result_column->insertFrom(*values_column, i);
-                }
-                else if (it == column_names.begin())
-                    ++rows_removed;
-            }
-            result_columns.emplace_back(std::move(result_column));
-        }
-        block = header.cloneWithColumns(std::move(result_columns));
-    }
-    else
-    {
-        MutableColumns result_columns = header.cloneEmptyColumns();
-        MutableColumns aggregate_columns = header.cloneEmptyColumns();
-
-        size_t rows_aggregated = 0;
-        size_t current_key_start = 0;
-        size_t rows_with_current_key = 0;
-        auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
-        for (size_t i = 0; i < block.rows(); ++i)
-        {
-            UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
-            bool where_filter_passed = !where_result_column || where_result_column->getBool(i);
-            bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed;
-
-            bool same_as_current = true;
-            for (size_t j = 0; j < storage_rows_ttl.group_by_keys.size(); ++j)
-            {
-                const String & key_column = storage_rows_ttl.group_by_keys[j];
-                const IColumn * values_column = block.getByName(key_column).column.get();
-                if (!same_as_current || (*values_column)[i] != current_key_value[j])
-                {
-                    values_column->get(i, current_key_value[j]);
-                    same_as_current = false;
-                }
-            }
-            if (!same_as_current)
-            {
-                if (rows_with_current_key)
-                    calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
-                finalizeAggregates(result_columns);
-
-                current_key_start = rows_aggregated;
-                rows_with_current_key = 0;
-            }
-
-            if (ttl_expired)
-            {
-                ++rows_with_current_key;
-                ++rows_aggregated;
-                for (const auto & name : column_names)
-                {
-                    const IColumn * values_column = block.getByName(name).column.get();
-                    auto & column = aggregate_columns[header.getPositionByName(name)];
-                    column->insertFrom(*values_column, i);
-                }
-            }
-            else
-            {
-                new_ttl_infos.table_ttl.update(cur_ttl);
-                for (const auto & name : column_names)
-                {
-                    const IColumn * values_column = block.getByName(name).column.get();
-                    auto & column = result_columns[header.getPositionByName(name)];
-                    column->insertFrom(*values_column, i);
-                }
-            }
-        }
-
-        if (rows_with_current_key)
-            calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
-
-        block = header.cloneWithColumns(std::move(result_columns));
     }
 }
 
-void TTLBlockInputStream::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length)
-{
-    Columns aggregate_chunk;
-    aggregate_chunk.reserve(aggregate_columns.size());
-    for (const auto & name : header.getNames())
-    {
-        const auto & column = aggregate_columns[header.getPositionByName(name)];
-        ColumnPtr chunk_column = column->cut(start_pos, length);
-        aggregate_chunk.emplace_back(std::move(chunk_column));
-    }
-    aggregator->executeOnBlock(aggregate_chunk, length, agg_result, agg_key_columns,
-                               agg_aggregate_columns, agg_no_more_keys);
-}
-
-void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns)
-{
-    if (!agg_result.empty())
-    {
-        auto aggregated_res = aggregator->convertToBlocks(agg_result, true, 1);
-        auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
-        for (auto & agg_block : aggregated_res)
-        {
-            for (const auto & it : storage_rows_ttl.set_parts)
-                it.expression->execute(agg_block);
-            for (const auto & name : storage_rows_ttl.group_by_keys)
-            {
-                const IColumn * values_column = agg_block.getByName(name).column.get();
-                auto & result_column = result_columns[header.getPositionByName(name)];
-                result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
-            }
-            for (const auto & it : storage_rows_ttl.set_parts)
-            {
-                const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get();
-                auto & result_column = result_columns[header.getPositionByName(it.column_name)];
-                result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
-            }
-        }
-    }
-    agg_result.invalidate();
-}
-
-void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
-{
-    Block block_with_defaults;
-    if (defaults_expression)
-    {
-        block_with_defaults = block;
-        defaults_expression->execute(block_with_defaults);
-    }
-
-    std::vector<String> columns_to_remove;
-    for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
-    {
-        /// If we read not all table columns. E.g. while mutation.
-        if (!block.has(name))
-            continue;
-
-        const auto & old_ttl_info = old_ttl_infos.columns_ttl[name];
-        auto & new_ttl_info = new_ttl_infos.columns_ttl[name];
-
-        /// Nothing to do
-        if (!force && !isTTLExpired(old_ttl_info.min))
-            continue;
-
-        /// Later drop full column
-        if (isTTLExpired(old_ttl_info.max))
-            continue;
-
-        if (!block.has(ttl_entry.result_column))
-        {
-            columns_to_remove.push_back(ttl_entry.result_column);
-            ttl_entry.expression->execute(block);
-        }
-
-        ColumnPtr default_column = nullptr;
-        if (block_with_defaults.has(name))
-            default_column = block_with_defaults.getByName(name).column->convertToFullColumnIfConst();
-
-        auto & column_with_type = block.getByName(name);
-        const IColumn * values_column = column_with_type.column.get();
-        MutableColumnPtr result_column = values_column->cloneEmpty();
-        result_column->reserve(block.rows());
-
-        const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get();
-
-        for (size_t i = 0; i < block.rows(); ++i)
-        {
-            UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
-            if (isTTLExpired(cur_ttl))
-            {
-                if (default_column)
-                    result_column->insertFrom(*default_column, i);
-                else
-                    result_column->insertDefault();
-            }
-            else
-            {
-                new_ttl_info.update(cur_ttl);
-                empty_columns.erase(name);
-                result_column->insertFrom(*values_column, i);
-            }
-        }
-        column_with_type.column = std::move(result_column);
-    }
-
-    for (const String & column : columns_to_remove)
-        block.erase(column);
-}
-
-void TTLBlockInputStream::updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map)
-{
-    std::vector<String> columns_to_remove;
-    for (const auto & ttl_entry : descriptions)
-    {
-        auto & new_ttl_info = ttl_info_map[ttl_entry.result_column];
-        if (!block.has(ttl_entry.result_column))
-        {
-            columns_to_remove.push_back(ttl_entry.result_column);
-            ttl_entry.expression->execute(block);
-        }
-
-        const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get();
-
-        for (size_t i = 0; i < block.rows(); ++i)
-        {
-            UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
-            new_ttl_info.update(cur_ttl);
-        }
-    }
-
-    for (const String & column : columns_to_remove)
-        block.erase(column);
-}
-
-void TTLBlockInputStream::updateMovesTTL(Block & block)
-{
-    updateTTLWithDescriptions(block, metadata_snapshot->getMoveTTLs(), new_ttl_infos.moves_ttl);
-}
-
-void TTLBlockInputStream::updateRecompressionTTL(Block & block)
-{
-    updateTTLWithDescriptions(block, metadata_snapshot->getRecompressionTTLs(), new_ttl_infos.recompression_ttl);
-}
-
-UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind)
-{
-    if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
-        return date_lut.fromDayNum(DayNum(column_date->getData()[ind]));
-    else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
-        return column_date_time->getData()[ind];
-    else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(column))
-    {
-        if (typeid_cast<const ColumnUInt16 *>(&column_const->getDataColumn()))
-            return date_lut.fromDayNum(DayNum(column_const->getValue<UInt16>()));
-        else if (typeid_cast<const ColumnUInt32 *>(&column_const->getDataColumn()))
-            return column_const->getValue<UInt32>();
-    }
-
-    throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR);
-}
-
 }
diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h
index 1d3b69f61c5..da86b8d5710 100644
--- a/src/DataStreams/TTLBlockInputStream.h
+++ b/src/DataStreams/TTLBlockInputStream.h
@@ -3,8 +3,9 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Core/Block.h>
-#include <Interpreters/Aggregator.h>
 #include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
+#include <DataStreams/ITTLAlgorithm.h>
+#include <DataStreams/TTLDeleteAlgorithm.h>
 
 #include <common/DateLUT.h>
 
@@ -24,7 +25,6 @@ public:
     );
 
     String getName() const override { return "TTL"; }
-
     Block getHeader() const override { return header; }
 
 protected:
@@ -34,60 +34,14 @@ protected:
     void readSuffixImpl() override;
 
 private:
-    const MergeTreeData & storage;
-    StorageMetadataPtr metadata_snapshot;
+    std::vector<TTLAlgorithmPtr> algorithms;
+    const TTLDeleteAlgorithm * delete_algorithm = nullptr;
+    bool all_data_dropped = false;
 
     /// ttl_infos and empty_columns are updating while reading
     const MergeTreeData::MutableDataPartPtr & data_part;
-
-    time_t current_time;
-    bool force;
-
-    std::unique_ptr<Aggregator> aggregator;
-    std::vector<Field> current_key_value;
-    AggregatedDataVariants agg_result;
-    ColumnRawPtrs agg_key_columns;
-    Aggregator::AggregateColumns agg_aggregate_columns;
-    bool agg_no_more_keys = false;
-
-    IMergeTreeDataPart::TTLInfos old_ttl_infos;
-    IMergeTreeDataPart::TTLInfos new_ttl_infos;
-    NameSet empty_columns;
-
-    size_t rows_removed = 0;
     Poco::Logger * log;
-    const DateLUTImpl & date_lut;
-
-    /// TODO rewrite defaults logic to evaluteMissingDefaults
-    std::unordered_map<String, String> defaults_result_column;
-    ExpressionActionsPtr defaults_expression;
-
     Block header;
-private:
-    /// Removes values with expired ttl and computes new_ttl_infos and empty_columns for part
-    void removeValuesWithExpiredColumnTTL(Block & block);
-
-    /// Removes rows with expired table ttl and computes new ttl_infos for part
-    void removeRowsWithExpiredTableTTL(Block & block);
-
-    // Calculate aggregates of aggregate_columns into agg_result
-    void calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length);
-
-    /// Finalize agg_result into result_columns
-    void finalizeAggregates(MutableColumns & result_columns);
-
-    /// Execute description expressions on block and update ttl's in
-    /// ttl_info_map with expression results.
-    void updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map);
-
-    /// Updates TTL for moves
-    void updateMovesTTL(Block & block);
-
-    /// Update values for recompression TTL using data from block.
-    void updateRecompressionTTL(Block & block);
-
-    UInt32 getTimestampByIndex(const IColumn * column, size_t ind);
-    bool isTTLExpired(time_t ttl) const;
 };
 
 }
diff --git a/src/DataStreams/TTLColumnAlgorithm.cpp b/src/DataStreams/TTLColumnAlgorithm.cpp
new file mode 100644
index 00000000000..140631ac0bf
--- /dev/null
+++ b/src/DataStreams/TTLColumnAlgorithm.cpp
@@ -0,0 +1,83 @@
+#include <DataStreams/TTLColumnAlgorithm.h>
+
+namespace DB
+{
+
+TTLColumnAlgorithm::TTLColumnAlgorithm(
+    const TTLDescription & description_,
+    const TTLInfo & old_ttl_info_,
+    time_t current_time_,
+    bool force_,
+    const String & column_name_,
+    const ExpressionActionsPtr & default_expression_,
+    const String & default_column_name_)
+    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+    , column_name(column_name_)
+    , default_expression(default_expression_)
+    , default_column_name(default_column_name_)
+{
+    if (!isMinTTLExpired())
+    {
+        new_ttl_info = old_ttl_info;
+        is_fully_empty = false;
+    }
+}
+
+void TTLColumnAlgorithm::execute(Block & block)
+{
+    if (!block)
+        return;
+
+    /// If we read not all table columns. E.g. while mutation.
+    if (!block.has(column_name))
+        return;
+
+    /// Nothing to do
+    if (!isMinTTLExpired())
+        return;
+
+    /// Later drop full column
+    if (isMaxTTLExpired())
+        return;
+
+    auto default_column = executeExpressionAndGetColumn(default_expression, block, default_column_name);
+    if (default_column)
+        default_column = default_column->convertToFullColumnIfConst();
+
+    auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
+
+    auto & column_with_type = block.getByName(column_name);
+    const IColumn * values_column = column_with_type.column.get();
+    MutableColumnPtr result_column = values_column->cloneEmpty();
+    result_column->reserve(block.rows());
+
+    for (size_t i = 0; i < block.rows(); ++i)
+    {
+        UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
+        if (isTTLExpired(cur_ttl))
+        {
+            if (default_column)
+                result_column->insertFrom(*default_column, i);
+            else
+                result_column->insertDefault();
+        }
+        else
+        {
+            new_ttl_info.update(cur_ttl);
+            is_fully_empty = false;
+            result_column->insertFrom(*values_column, i);
+        }
+    }
+
+    column_with_type.column = std::move(result_column);
+}
+
+void TTLColumnAlgorithm::finalize(const MutableDataPartPtr & data_part) const
+{
+    data_part->ttl_infos.columns_ttl[column_name] = new_ttl_info;
+    data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
+    if (is_fully_empty)
+        data_part->expired_columns.insert(column_name);
+}
+
+}
diff --git a/src/DataStreams/TTLColumnAlgorithm.h b/src/DataStreams/TTLColumnAlgorithm.h
new file mode 100644
index 00000000000..e09dd663af0
--- /dev/null
+++ b/src/DataStreams/TTLColumnAlgorithm.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <DataStreams/ITTLAlgorithm.h>
+
+namespace DB
+{
+
+/// Deletes (replaces to default) values in column according to column's TTL description.
+/// If all values in column are replaced with defaults, this column won't be written to part.
+class TTLColumnAlgorithm final : public ITTLAlgorithm
+{
+public:
+    TTLColumnAlgorithm(
+        const TTLDescription & description_,
+        const TTLInfo & old_ttl_info_,
+        time_t current_time_,
+        bool force_,
+        const String & column_name_,
+        const ExpressionActionsPtr & default_expression_,
+        const String & default_column_name_);
+
+    void execute(Block & block) override;
+    void finalize(const MutableDataPartPtr & data_part) const override;
+
+private:
+    const String column_name;
+    const ExpressionActionsPtr default_expression;
+    const String default_column_name;
+
+    bool is_fully_empty = true;
+};
+
+}
diff --git a/src/DataStreams/TTLDeleteAlgorithm.cpp b/src/DataStreams/TTLDeleteAlgorithm.cpp
new file mode 100644
index 00000000000..c364bb06f3e
--- /dev/null
+++ b/src/DataStreams/TTLDeleteAlgorithm.cpp
@@ -0,0 +1,62 @@
+#include <DataStreams/TTLDeleteAlgorithm.h>
+
+namespace DB
+{
+
+TTLDeleteAlgorithm::TTLDeleteAlgorithm(
+    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+{
+    if (!isMinTTLExpired())
+        new_ttl_info = old_ttl_info;
+}
+
+void TTLDeleteAlgorithm::execute(Block & block)
+{
+    if (!block || !isMinTTLExpired())
+        return;
+
+    auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
+    auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
+
+    MutableColumns result_columns;
+    const auto & column_names = block.getNames();
+
+    result_columns.reserve(column_names.size());
+    for (auto it = column_names.begin(); it != column_names.end(); ++it)
+    {
+        const IColumn * values_column = block.getByName(*it).column.get();
+        MutableColumnPtr result_column = values_column->cloneEmpty();
+        result_column->reserve(block.rows());
+
+        for (size_t i = 0; i < block.rows(); ++i)
+        {
+            UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
+            bool where_filter_passed = !where_column || where_column->getBool(i);
+
+            if (!isTTLExpired(cur_ttl) || !where_filter_passed)
+            {
+                new_ttl_info.update(cur_ttl);
+                result_column->insertFrom(*values_column, i);
+            }
+            else if (it == column_names.begin())
+                ++rows_removed;
+        }
+
+        result_columns.emplace_back(std::move(result_column));
+    }
+
+    block = block.cloneWithColumns(std::move(result_columns));
+}
+
+void TTLDeleteAlgorithm::finalize(const MutableDataPartPtr & data_part) const
+{
+    if (description.where_expression)
+        data_part->ttl_infos.rows_where_ttl[description.result_column] = new_ttl_info;
+    else
+        data_part->ttl_infos.table_ttl = new_ttl_info;
+
+    data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
+}
+
+}
diff --git a/src/DataStreams/TTLDeleteAlgorithm.h b/src/DataStreams/TTLDeleteAlgorithm.h
new file mode 100644
index 00000000000..8ab3f8b63e8
--- /dev/null
+++ b/src/DataStreams/TTLDeleteAlgorithm.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <DataStreams/ITTLAlgorithm.h>
+
+namespace DB
+{
+
+/// Deletes rows according to table TTL description with
+/// possible optional condition in 'WHERE' clause.
+class TTLDeleteAlgorithm final : public ITTLAlgorithm
+{
+public:
+    TTLDeleteAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+
+    void execute(Block & block) override;
+    void finalize(const MutableDataPartPtr & data_part) const override;
+    size_t getNumberOfRemovedRows() const { return rows_removed; }
+
+private:
+    size_t rows_removed = 0;
+};
+
+}
diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp
new file mode 100644
index 00000000000..d5feb14658b
--- /dev/null
+++ b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp
@@ -0,0 +1,47 @@
+#include <DataStreams/TTLUpdateInfoAlgorithm.h>
+
+namespace DB
+{
+
+TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm(
+    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+{
+}
+
+void TTLUpdateInfoAlgorithm::execute(Block & block)
+{
+    if (!block)
+        return;
+
+    auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
+    for (size_t i = 0; i < block.rows(); ++i)
+    {
+        UInt32 cur_ttl = ITTLAlgorithm::getTimestampByIndex(ttl_column.get(), i);
+        new_ttl_info.update(cur_ttl);
+    }
+}
+
+TTLMoveAlgorithm::TTLMoveAlgorithm(
+    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_)
+{
+}
+
+void TTLMoveAlgorithm::finalize(const MutableDataPartPtr & data_part) const
+{
+    data_part->ttl_infos.moves_ttl[description.result_column] = new_ttl_info;
+}
+
+TTLRecompressionAlgorithm::TTLRecompressionAlgorithm(
+    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_)
+{
+}
+
+void TTLRecompressionAlgorithm::finalize(const MutableDataPartPtr & data_part) const
+{
+    data_part->ttl_infos.recompression_ttl[description.result_column] = new_ttl_info;
+}
+
+}
diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.h b/src/DataStreams/TTLUpdateInfoAlgorithm.h
new file mode 100644
index 00000000000..c1ef0e1c90d
--- /dev/null
+++ b/src/DataStreams/TTLUpdateInfoAlgorithm.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <DataStreams/ITTLAlgorithm.h>
+
+namespace DB
+{
+
+/// Calculates new ttl_info and does nothing with data.
+class TTLUpdateInfoAlgorithm : public ITTLAlgorithm
+{
+public:
+    TTLUpdateInfoAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+
+    void execute(Block & block) override;
+    void finalize(const MutableDataPartPtr & data_part) const override = 0;
+};
+
+class TTLMoveAlgorithm final : public TTLUpdateInfoAlgorithm
+{
+public:
+    TTLMoveAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+    void finalize(const MutableDataPartPtr & data_part) const override;
+};
+
+class TTLRecompressionAlgorithm final : public TTLUpdateInfoAlgorithm
+{
+public:
+    TTLRecompressionAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+    void finalize(const MutableDataPartPtr & data_part) const override;
+};
+
+}
diff --git a/src/DataStreams/ya.make b/src/DataStreams/ya.make
index 0af72f25d3d..29e6eb3afc3 100644
--- a/src/DataStreams/ya.make
+++ b/src/DataStreams/ya.make
@@ -27,6 +27,7 @@ SRCS(
     ExecutionSpeedLimits.cpp
     ExpressionBlockInputStream.cpp
     IBlockInputStream.cpp
+    ITTLAlgorithm.cpp
     InputStreamFromASTInsertQuery.cpp
     InternalTextLogsRowOutputStream.cpp
     LimitBlockInputStream.cpp
@@ -44,7 +45,11 @@ SRCS(
     SquashingBlockInputStream.cpp
     SquashingBlockOutputStream.cpp
     SquashingTransform.cpp
+    TTLAggregationAlgorithm.cpp
     TTLBlockInputStream.cpp
+    TTLColumnAlgorithm.cpp
+    TTLDeleteAlgorithm.cpp
+    TTLUpdateInfoAlgorithm.cpp
     copyData.cpp
     finalizeBlock.cpp
     materializeBlock.cpp
diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp
index 9104c12120f..e92994ae979 100644
--- a/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeAggregateFunction.cpp
@@ -10,8 +10,6 @@
 #include <Common/AlignedBuffer.h>
 
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <IO/WriteBufferFromString.h>
@@ -261,45 +259,6 @@ void DataTypeAggregateFunction::deserializeTextCSV(IColumn & column, ReadBuffer
 }
 
 
-void DataTypeAggregateFunction::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(
-        protobuf.writeAggregateFunction(function, assert_cast<const ColumnAggregateFunction &>(column).getData()[row_num]));
-}
-
-void DataTypeAggregateFunction::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    ColumnAggregateFunction & column_concrete = assert_cast<ColumnAggregateFunction &>(column);
-    Arena & arena = column_concrete.createOrGetArena();
-    size_t size_of_state = function->sizeOfData();
-    AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData());
-    function->create(place);
-    try
-    {
-        if (!protobuf.readAggregateFunction(function, place, arena))
-        {
-            function->destroy(place);
-            return;
-        }
-        auto & container = column_concrete.getData();
-        if (allow_add_row)
-        {
-            container.emplace_back(place);
-            row_added = true;
-        }
-        else
-            container.back() = place;
-    }
-    catch (...)
-    {
-        function->destroy(place);
-        throw;
-    }
-}
-
 MutableColumnPtr DataTypeAggregateFunction::createColumn() const
 {
     return ColumnAggregateFunction::create(function);
diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h
index 9ae7c67a803..d07d46fd3ee 100644
--- a/src/DataTypes/DataTypeAggregateFunction.h
+++ b/src/DataTypes/DataTypeAggregateFunction.h
@@ -59,8 +59,6 @@ public:
     void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     MutableColumnPtr createColumn() const override;
 
diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp
index 3ad84a8fcd7..27088ab822c 100644
--- a/src/DataTypes/DataTypeArray.cpp
+++ b/src/DataTypes/DataTypeArray.cpp
@@ -6,7 +6,6 @@
 #include <IO/WriteBufferFromString.h>
 
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeFactory.h>
@@ -522,55 +521,6 @@ void DataTypeArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, cons
 }
 
 
-void DataTypeArray::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
-    const ColumnArray::Offsets & offsets = column_array.getOffsets();
-    size_t offset = offsets[row_num - 1] + value_index;
-    size_t next_offset = offsets[row_num];
-    const IColumn & nested_column = column_array.getData();
-    size_t i;
-    for (i = offset; i < next_offset; ++i)
-    {
-        size_t element_stored = 0;
-        nested->serializeProtobuf(nested_column, i, protobuf, element_stored);
-        if (!element_stored)
-            break;
-    }
-    value_index += i - offset;
-}
-
-
-void DataTypeArray::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    ColumnArray & column_array = assert_cast<ColumnArray &>(column);
-    IColumn & nested_column = column_array.getData();
-    ColumnArray::Offsets & offsets = column_array.getOffsets();
-    size_t old_size = offsets.size();
-    try
-    {
-        bool nested_row_added;
-        do
-            nested->deserializeProtobuf(nested_column, protobuf, true, nested_row_added);
-        while (nested_row_added && protobuf.canReadMoreValues());
-        if (allow_add_row)
-        {
-            offsets.emplace_back(nested_column.size());
-            row_added = true;
-        }
-        else
-            offsets.back() = nested_column.size();
-    }
-    catch (...)
-    {
-        offsets.resize_assume_reserved(old_size);
-        nested_column.popBack(nested_column.size() - offsets.back());
-        throw;
-    }
-}
-
-
 MutableColumnPtr DataTypeArray::createColumn() const
 {
     return ColumnArray::create(nested->createColumn(), ColumnArray::ColumnOffsets::create());
diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h
index ba19ad021be..4185163e2e7 100644
--- a/src/DataTypes/DataTypeArray.h
+++ b/src/DataTypes/DataTypeArray.h
@@ -85,15 +85,6 @@ public:
             DeserializeBinaryBulkStatePtr & state,
             SubstreamsCache * cache) const override;
 
-    void serializeProtobuf(const IColumn & column,
-                           size_t row_num,
-                           ProtobufWriter & protobuf,
-                           size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column,
-                             ProtobufReader & protobuf,
-                             bool allow_add_row,
-                             bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeDate.cpp b/src/DataTypes/DataTypeDate.cpp
index 2c1dfcbb0fe..192a89cc454 100644
--- a/src/DataTypes/DataTypeDate.cpp
+++ b/src/DataTypes/DataTypeDate.cpp
@@ -4,8 +4,6 @@
 #include <Columns/ColumnsNumber.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 
 #include <Common/assert_cast.h>
 
@@ -81,30 +79,6 @@ void DataTypeDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
     assert_cast<ColumnUInt16 &>(column).getData().push_back(value.getDayNum());
 }
 
-void DataTypeDate::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeDate(DayNum(assert_cast<const ColumnUInt16 &>(column).getData()[row_num])));
-}
-
-void DataTypeDate::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    DayNum d;
-    if (!protobuf.readDate(d))
-        return;
-
-    auto & container = assert_cast<ColumnUInt16 &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(d);
-        row_added = true;
-    }
-    else
-        container.back() = d;
-}
-
 bool DataTypeDate::equals(const IDataType & rhs) const
 {
     return typeid(rhs) == typeid(*this);
diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h
index 00afba424e4..496d7fe0b22 100644
--- a/src/DataTypes/DataTypeDate.h
+++ b/src/DataTypes/DataTypeDate.h
@@ -24,8 +24,6 @@ public:
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     bool canBeUsedAsVersion() const override { return true; }
     bool canBeInsideNullable() const override { return true; }
diff --git a/src/DataTypes/DataTypeDateTime.cpp b/src/DataTypes/DataTypeDateTime.cpp
index bfb4473e429..d2bbb4a1efa 100644
--- a/src/DataTypes/DataTypeDateTime.cpp
+++ b/src/DataTypes/DataTypeDateTime.cpp
@@ -5,8 +5,6 @@
 #include <common/DateLUT.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/Operators.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
@@ -164,32 +162,6 @@ void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c
     assert_cast<ColumnType &>(column).getData().push_back(x);
 }
 
-void DataTypeDateTime::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-
-    // On some platforms `time_t` is `long` but not `unsigned int` (UInt32 that we store in column), hence static_cast.
-    value_index = static_cast<bool>(protobuf.writeDateTime(static_cast<time_t>(assert_cast<const ColumnType &>(column).getData()[row_num])));
-}
-
-void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    time_t t;
-    if (!protobuf.readDateTime(t))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(t);
-        row_added = true;
-    }
-    else
-        container.back() = t;
-}
-
 bool DataTypeDateTime::equals(const IDataType & rhs) const
 {
     /// DateTime with different timezones are equal, because:
diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h
index 47c7f361091..edec889309b 100644
--- a/src/DataTypes/DataTypeDateTime.h
+++ b/src/DataTypes/DataTypeDateTime.h
@@ -68,8 +68,6 @@ public:
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     bool canBeUsedAsVersion() const override { return true; }
     bool canBeInsideNullable() const override { return true; }
diff --git a/src/DataTypes/DataTypeDateTime64.cpp b/src/DataTypes/DataTypeDateTime64.cpp
index ef1a971510a..17b94e871bf 100644
--- a/src/DataTypes/DataTypeDateTime64.cpp
+++ b/src/DataTypes/DataTypeDateTime64.cpp
@@ -6,8 +6,6 @@
 #include <common/DateLUT.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/Operators.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
@@ -30,7 +28,7 @@ namespace ErrorCodes
 static constexpr UInt32 max_scale = 9;
 
 DataTypeDateTime64::DataTypeDateTime64(UInt32 scale_, const std::string & time_zone_name)
-    : DataTypeDecimalBase<DateTime64>(DecimalUtils::maxPrecision<DateTime64>(), scale_),
+    : DataTypeDecimalBase<DateTime64>(DecimalUtils::max_precision<DateTime64>, scale_),
       TimezoneMixin(time_zone_name)
 {
     if (scale > max_scale)
@@ -39,7 +37,7 @@ DataTypeDateTime64::DataTypeDateTime64(UInt32 scale_, const std::string & time_z
 }
 
 DataTypeDateTime64::DataTypeDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_info)
-    : DataTypeDecimalBase<DateTime64>(DecimalUtils::maxPrecision<DateTime64>(), scale_),
+    : DataTypeDecimalBase<DateTime64>(DecimalUtils::max_precision<DateTime64>, scale_),
       TimezoneMixin(time_zone_info)
 {
     if (scale > max_scale)
@@ -182,30 +180,6 @@ void DataTypeDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
     assert_cast<ColumnType &>(column).getData().push_back(x);
 }
 
-void DataTypeDateTime64::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeDateTime64(assert_cast<const ColumnType &>(column).getData()[row_num], scale));
-}
-
-void DataTypeDateTime64::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    DateTime64 t = 0;
-    if (!protobuf.readDateTime64(t, scale))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(t);
-        row_added = true;
-    }
-    else
-        container.back() = t;
-}
-
 bool DataTypeDateTime64::equals(const IDataType & rhs) const
 {
     if (const auto * ptype = typeid_cast<const DataTypeDateTime64 *>(&rhs))
diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h
index 003e83b7195..198c3739f58 100644
--- a/src/DataTypes/DataTypeDateTime64.h
+++ b/src/DataTypes/DataTypeDateTime64.h
@@ -42,8 +42,6 @@ public:
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     bool equals(const IDataType & rhs) const override;
 
diff --git a/src/DataTypes/DataTypeDecimalBase.cpp b/src/DataTypes/DataTypeDecimalBase.cpp
index 9fb445ab00d..ab17996167c 100644
--- a/src/DataTypes/DataTypeDecimalBase.cpp
+++ b/src/DataTypes/DataTypeDecimalBase.cpp
@@ -4,8 +4,6 @@
 #include <Common/typeid_cast.h>
 #include <Core/DecimalFunctions.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h
index d9128151403..d9079166fa7 100644
--- a/src/DataTypes/DataTypeDecimalBase.h
+++ b/src/DataTypes/DataTypeDecimalBase.h
@@ -65,7 +65,7 @@ public:
 
     static constexpr bool is_parametric = true;
 
-    static constexpr size_t maxPrecision() { return DecimalUtils::maxPrecision<T>(); }
+    static constexpr size_t maxPrecision() { return DecimalUtils::max_precision<T>; }
 
     DataTypeDecimalBase(UInt32 precision_, UInt32 scale_)
     :   precision(precision_),
@@ -120,14 +120,17 @@ public:
         return DecimalUtils::getFractionalPart(x, scale);
     }
 
-    T maxWholeValue() const { return getScaleMultiplier(maxPrecision() - scale) - T(1); }
+    T maxWholeValue() const { return getScaleMultiplier(precision - scale) - T(1); }
 
-    bool canStoreWhole(T x) const
+    template<typename U>
+    bool canStoreWhole(U x) const
     {
+        static_assert(std::is_signed_v<typename T::NativeType>);
         T max = maxWholeValue();
-        if (x > max || x < -max)
-            return false;
-        return true;
+        if constexpr (std::is_signed_v<U>)
+            return -max <= x && x <= max;
+        else
+            return x <= static_cast<std::make_unsigned_t<typename T::NativeType>>(max.value);
     }
 
     /// @returns multiplier for U to become T with correct scale
@@ -194,17 +197,17 @@ inline const DecimalType<U> decimalResultType(const DataTypeNumber<T> & tx, cons
 template <template <typename> typename DecimalType>
 inline DataTypePtr createDecimal(UInt64 precision_value, UInt64 scale_value)
 {
-    if (precision_value < DecimalUtils::minPrecision() || precision_value > DecimalUtils::maxPrecision<Decimal256>())
+    if (precision_value < DecimalUtils::min_precision || precision_value > DecimalUtils::max_precision<Decimal256>)
         throw Exception("Wrong precision", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
 
     if (static_cast<UInt64>(scale_value) > precision_value)
         throw Exception("Negative scales and scales larger than precision are not supported", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
 
-    if (precision_value <= DecimalUtils::maxPrecision<Decimal32>())
+    if (precision_value <= DecimalUtils::max_precision<Decimal32>)
         return std::make_shared<DecimalType<Decimal32>>(precision_value, scale_value);
-    else if (precision_value <= DecimalUtils::maxPrecision<Decimal64>())
+    else if (precision_value <= DecimalUtils::max_precision<Decimal64>)
         return std::make_shared<DecimalType<Decimal64>>(precision_value, scale_value);
-    else if (precision_value <= DecimalUtils::maxPrecision<Decimal128>())
+    else if (precision_value <= DecimalUtils::max_precision<Decimal128>)
        return std::make_shared<DecimalType<Decimal128>>(precision_value, scale_value);
     return std::make_shared<DecimalType<Decimal256>>(precision_value, scale_value);
 }
diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp
index 650a1da6407..043c971266c 100644
--- a/src/DataTypes/DataTypeEnum.cpp
+++ b/src/DataTypes/DataTypeEnum.cpp
@@ -1,7 +1,5 @@
 #include <IO/WriteBufferFromString.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Parsers/IAST.h>
@@ -254,34 +252,6 @@ void DataTypeEnum<Type>::deserializeBinaryBulk(
     x.resize(initial_size + size / sizeof(FieldType));
 }
 
-template <typename Type>
-void DataTypeEnum<Type>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    protobuf.prepareEnumMapping(values);
-    value_index = static_cast<bool>(protobuf.writeEnum(assert_cast<const ColumnType &>(column).getData()[row_num]));
-}
-
-template<typename Type>
-void DataTypeEnum<Type>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    protobuf.prepareEnumMapping(values);
-    row_added = false;
-    Type value;
-    if (!protobuf.readEnum(value))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(value);
-        row_added = true;
-    }
-    else
-        container.back() = value;
-}
-
 template <typename Type>
 Field DataTypeEnum<Type>::getDefault() const
 {
diff --git a/src/DataTypes/DataTypeEnum.h b/src/DataTypes/DataTypeEnum.h
index c75d348f15c..003613edb98 100644
--- a/src/DataTypes/DataTypeEnum.h
+++ b/src/DataTypes/DataTypeEnum.h
@@ -132,9 +132,6 @@ public:
     void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const override;
     void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, const size_t limit, const double avg_value_size_hint) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override { return ColumnType::create(); }
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp
index 1bc2a307915..eee3f22f9f1 100644
--- a/src/DataTypes/DataTypeFactory.cpp
+++ b/src/DataTypes/DataTypeFactory.cpp
@@ -29,8 +29,14 @@ namespace ErrorCodes
 
 DataTypePtr DataTypeFactory::get(const String & full_name) const
 {
+    /// Data type parser can be invoked from coroutines with small stack.
+    /// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers)
+    /// let's make the threshold significantly lower.
+    /// It is impractical for user to have complex data types with this depth.
+    static constexpr size_t data_type_max_parse_depth = 200;
+
     ParserDataType parser;
-    ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+    ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, data_type_max_parse_depth);
     return get(ast);
 }
 
diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp
index 585c5709be7..21cfe855169 100644
--- a/src/DataTypes/DataTypeFixedString.cpp
+++ b/src/DataTypes/DataTypeFixedString.cpp
@@ -2,8 +2,6 @@
 #include <Columns/ColumnConst.h>
 
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <DataTypes/DataTypeFactory.h>
 
@@ -25,7 +23,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int CANNOT_READ_ALL_DATA;
-    extern const int TOO_LARGE_STRING_SIZE;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int UNEXPECTED_AST_STRUCTURE;
 }
@@ -127,16 +124,7 @@ static inline void alignStringLength(const DataTypeFixedString & type,
                                      ColumnFixedString::Chars & data,
                                      size_t string_start)
 {
-    size_t length = data.size() - string_start;
-    if (length < type.getN())
-    {
-        data.resize_fill(string_start + type.getN());
-    }
-    else if (length > type.getN())
-    {
-        data.resize_assume_reserved(string_start);
-        throw Exception("Too large value for " + type.getName(), ErrorCodes::TOO_LARGE_STRING_SIZE);
-    }
+    ColumnFixedString::alignStringLength(data, type.getN(), string_start);
 }
 
 template <typename Reader>
@@ -215,53 +203,6 @@ void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr
 }
 
 
-void DataTypeFixedString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
-    value_index = static_cast<bool>(protobuf.writeString(StringRef(pos, n)));
-}
-
-
-void DataTypeFixedString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    auto & column_string = assert_cast<ColumnFixedString &>(column);
-    ColumnFixedString::Chars & data = column_string.getChars();
-    size_t old_size = data.size();
-    try
-    {
-        if (allow_add_row)
-        {
-            if (protobuf.readStringInto(data))
-            {
-                alignStringLength(*this, data, old_size);
-                row_added = true;
-            }
-            else
-                data.resize_assume_reserved(old_size);
-        }
-        else
-        {
-            ColumnFixedString::Chars temp_data;
-            if (protobuf.readStringInto(temp_data))
-            {
-                alignStringLength(*this, temp_data, 0);
-                column_string.popBack(1);
-                old_size = data.size();
-                data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end());
-            }
-        }
-    }
-    catch (...)
-    {
-        data.resize_assume_reserved(old_size);
-        throw;
-    }
-}
-
-
 MutableColumnPtr DataTypeFixedString::createColumn() const
 {
     return ColumnFixedString::create(n);
diff --git a/src/DataTypes/DataTypeFixedString.h b/src/DataTypes/DataTypeFixedString.h
index e410d1b0596..af82e4b5d11 100644
--- a/src/DataTypes/DataTypeFixedString.h
+++ b/src/DataTypes/DataTypeFixedString.h
@@ -66,9 +66,6 @@ public:
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp
index a433d39c561..1b21b7de4bc 100644
--- a/src/DataTypes/DataTypeLowCardinality.cpp
+++ b/src/DataTypes/DataTypeLowCardinality.cpp
@@ -808,31 +808,6 @@ void DataTypeLowCardinality::serializeTextXML(const IColumn & column, size_t row
     serializeImpl(column, row_num, &IDataType::serializeAsTextXML, ostr, settings);
 }
 
-void DataTypeLowCardinality::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    serializeImpl(column, row_num, &IDataType::serializeProtobuf, protobuf, value_index);
-}
-
-void DataTypeLowCardinality::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    if (allow_add_row)
-    {
-        deserializeImpl(column, &IDataType::deserializeProtobuf, protobuf, true, row_added);
-        return;
-    }
-
-    row_added = false;
-    auto & low_cardinality_column= getColumnLowCardinality(column);
-    auto  nested_column = low_cardinality_column.getDictionary().getNestedColumn();
-    auto temp_column = nested_column->cloneEmpty();
-    size_t unique_row_number = low_cardinality_column.getIndexes().getUInt(low_cardinality_column.size() - 1);
-    temp_column->insertFrom(*nested_column, unique_row_number);
-    bool dummy;
-    dictionary_type.get()->deserializeProtobuf(*temp_column, protobuf, false, dummy);
-    low_cardinality_column.popBack(1);
-    low_cardinality_column.insertFromFullColumn(*temp_column, 0);
-}
-
 template <typename... Params, typename... Args>
 void DataTypeLowCardinality::serializeImpl(
     const IColumn & column, size_t row_num, DataTypeLowCardinality::SerializeFunctionPtr<Params...> func, Args &&... args) const
@@ -885,15 +860,17 @@ MutableColumnUniquePtr DataTypeLowCardinality::createColumnUniqueImpl(const IDat
     if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&keys_type))
         type = nullable_type->getNestedType().get();
 
-    if (isString(type))
+    WhichDataType which(type);
+
+    if (which.isString())
         return creator(static_cast<ColumnString *>(nullptr));
-    if (isFixedString(type))
+    else if (which.isFixedString())
         return creator(static_cast<ColumnFixedString *>(nullptr));
-    if (typeid_cast<const DataTypeDate *>(type))
+    else if (which.isDate())
         return creator(static_cast<ColumnVector<UInt16> *>(nullptr));
-    if (typeid_cast<const DataTypeDateTime *>(type))
+    else if (which.isDateTime())
         return creator(static_cast<ColumnVector<UInt32> *>(nullptr));
-    if (isColumnedAsNumber(type))
+    else if (which.isInt() || which.isUInt() || which.isFloat())
     {
         MutableColumnUniquePtr column;
         TypeListNativeNumbers::forEach(CreateColumnVector(column, *type, creator));
diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h
index 6ed2b792ce3..f5b6b571187 100644
--- a/src/DataTypes/DataTypeLowCardinality.h
+++ b/src/DataTypes/DataTypeLowCardinality.h
@@ -1,7 +1,9 @@
 #pragma once
+
 #include <DataTypes/IDataType.h>
 #include <Columns/IColumnUnique.h>
 
+
 namespace DB
 {
 
@@ -65,8 +67,6 @@ public:
     void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
     void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     MutableColumnPtr createColumn() const override;
 
diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp
index af2ed8805e8..9972452862f 100644
--- a/src/DataTypes/DataTypeMap.cpp
+++ b/src/DataTypes/DataTypeMap.cpp
@@ -336,16 +336,6 @@ void DataTypeMap::deserializeBinaryBulkWithMultipleStreamsImpl(
     nested->deserializeBinaryBulkWithMultipleStreams(column_map.getNestedColumnPtr(), limit, settings, state, cache);
 }
 
-void DataTypeMap::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    nested->serializeProtobuf(extractNestedColumn(column), row_num, protobuf, value_index);
-}
-
-void DataTypeMap::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    nested->deserializeProtobuf(extractNestedColumn(column), protobuf, allow_add_row, row_added);
-}
-
 MutableColumnPtr DataTypeMap::createColumn() const
 {
     return ColumnMap::create(nested->createColumn());
diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h
index ea495f05548..88ea44a0d5a 100644
--- a/src/DataTypes/DataTypeMap.h
+++ b/src/DataTypes/DataTypeMap.h
@@ -76,9 +76,6 @@ public:
            DeserializeBinaryBulkStatePtr & state,
            SubstreamsCache * cache) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
@@ -92,6 +89,8 @@ public:
     const DataTypePtr & getValueType() const { return value_type; }
     DataTypes getKeyValueTypes() const { return {key_type, value_type}; }
 
+    const DataTypePtr & getNestedType() const { return nested; }
+
 private:
     template <typename Writer>
     void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const;
diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp
index c3b734686f8..903ebeb3ddc 100644
--- a/src/DataTypes/DataTypeNullable.cpp
+++ b/src/DataTypes/DataTypeNullable.cpp
@@ -486,33 +486,6 @@ void DataTypeNullable::serializeTextXML(const IColumn & column, size_t row_num,
         nested_data_type->serializeAsTextXML(col.getNestedColumn(), row_num, ostr, settings);
 }
 
-void DataTypeNullable::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
-    if (!col.isNullAt(row_num))
-        nested_data_type->serializeProtobuf(col.getNestedColumn(), row_num, protobuf, value_index);
-}
-
-void DataTypeNullable::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    ColumnNullable & col = assert_cast<ColumnNullable &>(column);
-    IColumn & nested_column = col.getNestedColumn();
-    size_t old_size = nested_column.size();
-    try
-    {
-        nested_data_type->deserializeProtobuf(nested_column, protobuf, allow_add_row, row_added);
-        if (row_added)
-            col.getNullMapData().push_back(0);
-    }
-    catch (...)
-    {
-        nested_column.popBack(nested_column.size() - old_size);
-        col.getNullMapData().resize_assume_reserved(old_size);
-        row_added = false;
-        throw;
-    }
-}
-
 MutableColumnPtr DataTypeNullable::createColumn() const
 {
     return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create());
diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h
index db641faf0af..5e71a1bee4d 100644
--- a/src/DataTypes/DataTypeNullable.h
+++ b/src/DataTypes/DataTypeNullable.h
@@ -73,9 +73,6 @@ public:
     void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeNumberBase.cpp b/src/DataTypes/DataTypeNumberBase.cpp
index a9b9bbc8090..ae3e6762d27 100644
--- a/src/DataTypes/DataTypeNumberBase.cpp
+++ b/src/DataTypes/DataTypeNumberBase.cpp
@@ -8,8 +8,6 @@
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 
 
 namespace DB
@@ -205,34 +203,6 @@ void DataTypeNumberBase<T>::deserializeBinaryBulk(IColumn & column, ReadBuffer &
 }
 
 
-template <typename T>
-void DataTypeNumberBase<T>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeNumber(assert_cast<const ColumnVector<T> &>(column).getData()[row_num]));
-}
-
-
-template <typename T>
-void DataTypeNumberBase<T>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    T value;
-    if (!protobuf.readNumber(value))
-        return;
-
-    auto & container = typeid_cast<ColumnVector<T> &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(value);
-        row_added = true;
-    }
-    else
-        container.back() = value;
-}
-
-
 template <typename T>
 MutableColumnPtr DataTypeNumberBase<T>::createColumn() const
 {
diff --git a/src/DataTypes/DataTypeNumberBase.h b/src/DataTypes/DataTypeNumberBase.h
index cbbc203bf4f..22a70ac7277 100644
--- a/src/DataTypes/DataTypeNumberBase.h
+++ b/src/DataTypes/DataTypeNumberBase.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <Common/UInt128.h>
 #include <DataTypes/IDataType.h>
 #include <DataTypes/DataTypeWithSimpleSerialization.h>
 
@@ -44,9 +45,6 @@ public:
     void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
     void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     bool isParametric() const override { return false; }
@@ -70,4 +68,21 @@ public:
     bool canBeInsideLowCardinality() const override { return true; }
 };
 
+/// Prevent implicit template instantiation of DataTypeNumberBase for common numeric types
+
+extern template class DataTypeNumberBase<UInt8>;
+extern template class DataTypeNumberBase<UInt16>;
+extern template class DataTypeNumberBase<UInt32>;
+extern template class DataTypeNumberBase<UInt64>;
+extern template class DataTypeNumberBase<UInt128>; // base for UUID
+extern template class DataTypeNumberBase<UInt256>;
+extern template class DataTypeNumberBase<Int16>;
+extern template class DataTypeNumberBase<Int8>;
+extern template class DataTypeNumberBase<Int32>;
+extern template class DataTypeNumberBase<Int64>;
+extern template class DataTypeNumberBase<Int128>;
+extern template class DataTypeNumberBase<Int256>;
+extern template class DataTypeNumberBase<Float32>;
+extern template class DataTypeNumberBase<Float64>;
+
 }
diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp
index c752d136642..d760df5075d 100644
--- a/src/DataTypes/DataTypeString.cpp
+++ b/src/DataTypes/DataTypeString.cpp
@@ -9,8 +9,6 @@
 #include <Core/Field.h>
 
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeFactory.h>
 
@@ -311,55 +309,6 @@ void DataTypeString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, con
 }
 
 
-void DataTypeString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeString(assert_cast<const ColumnString &>(column).getDataAt(row_num)));
-}
-
-
-void DataTypeString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    auto & column_string = assert_cast<ColumnString &>(column);
-    ColumnString::Chars & data = column_string.getChars();
-    ColumnString::Offsets & offsets = column_string.getOffsets();
-    size_t old_size = offsets.size();
-    try
-    {
-        if (allow_add_row)
-        {
-            if (protobuf.readStringInto(data))
-            {
-                data.emplace_back(0);
-                offsets.emplace_back(data.size());
-                row_added = true;
-            }
-            else
-                data.resize_assume_reserved(offsets.back());
-        }
-        else
-        {
-            ColumnString::Chars temp_data;
-            if (protobuf.readStringInto(temp_data))
-            {
-                temp_data.emplace_back(0);
-                column_string.popBack(1);
-                old_size = offsets.size();
-                data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end());
-                offsets.emplace_back(data.size());
-            }
-        }
-    }
-    catch (...)
-    {
-        offsets.resize_assume_reserved(old_size);
-        data.resize_assume_reserved(offsets.back());
-        throw;
-    }
-}
-
 Field DataTypeString::getDefault() const
 {
     return String();
diff --git a/src/DataTypes/DataTypeString.h b/src/DataTypes/DataTypeString.h
index f6db8fe73d4..7f8aa1fd0cf 100644
--- a/src/DataTypes/DataTypeString.h
+++ b/src/DataTypes/DataTypeString.h
@@ -47,9 +47,6 @@ public:
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp
index c62aa1c1187..2261e776ea2 100644
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@@ -504,33 +504,6 @@ void DataTypeTuple::deserializeBinaryBulkWithMultipleStreamsImpl(
     settings.path.pop_back();
 }
 
-void DataTypeTuple::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    for (; value_index < elems.size(); ++value_index)
-    {
-        size_t stored = 0;
-        elems[value_index]->serializeProtobuf(extractElementColumn(column, value_index), row_num, protobuf, stored);
-        if (!stored)
-            break;
-    }
-}
-
-void DataTypeTuple::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    bool all_elements_get_row = true;
-    addElementSafe(elems, column, [&]
-    {
-        for (const auto & i : ext::range(0, ext::size(elems)))
-        {
-            bool element_row_added;
-            elems[i]->deserializeProtobuf(extractElementColumn(column, i), protobuf, allow_add_row, element_row_added);
-            all_elements_get_row &= element_row_added;
-        }
-    });
-    row_added = all_elements_get_row;
-}
-
 MutableColumnPtr DataTypeTuple::createColumn() const
 {
     size_t size = elems.size();
diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h
index 0b28ebe5a63..12ccf574c0e 100644
--- a/src/DataTypes/DataTypeTuple.h
+++ b/src/DataTypes/DataTypeTuple.h
@@ -81,9 +81,6 @@ public:
             DeserializeBinaryBulkStatePtr & state,
             SubstreamsCache * cache) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeUUID.cpp b/src/DataTypes/DataTypeUUID.cpp
index 94a043eb472..b66cbadaef0 100644
--- a/src/DataTypes/DataTypeUUID.cpp
+++ b/src/DataTypes/DataTypeUUID.cpp
@@ -1,8 +1,6 @@
 #include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Columns/ColumnsNumber.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 #include <Common/assert_cast.h>
@@ -79,30 +77,6 @@ void DataTypeUUID::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
     assert_cast<ColumnUInt128 &>(column).getData().push_back(value);
 }
 
-void DataTypeUUID::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeUUID(UUID(assert_cast<const ColumnUInt128 &>(column).getData()[row_num])));
-}
-
-void DataTypeUUID::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    UUID uuid;
-    if (!protobuf.readUUID(uuid))
-        return;
-
-    auto & container = assert_cast<ColumnUInt128 &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(uuid);
-        row_added = true;
-    }
-    else
-        container.back() = uuid;
-}
-
 bool DataTypeUUID::equals(const IDataType & rhs) const
 {
     return typeid(rhs) == typeid(*this);
diff --git a/src/DataTypes/DataTypeUUID.h b/src/DataTypes/DataTypeUUID.h
index e9f1d22325b..de0c7c7d8cf 100644
--- a/src/DataTypes/DataTypeUUID.h
+++ b/src/DataTypes/DataTypeUUID.h
@@ -26,11 +26,10 @@ public:
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     bool canBeUsedInBitOperations() const override { return true; }
     bool canBeInsideNullable() const override { return true; }
+    bool canBeInsideLowCardinality() const override { return false; }
 
     bool canBePromoted() const override { return false; }
 };
diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp
index 6c325c5d371..160e09d92d8 100644
--- a/src/DataTypes/DataTypesDecimal.cpp
+++ b/src/DataTypes/DataTypesDecimal.cpp
@@ -4,8 +4,6 @@
 #include <Common/typeid_cast.h>
 #include <Core/DecimalFunctions.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <IO/readDecimalText.h>
@@ -111,33 +109,6 @@ T DataTypeDecimal<T>::parseFromString(const String & str) const
     return x;
 }
 
-template <typename T>
-void DataTypeDecimal<T>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeDecimal(assert_cast<const ColumnType &>(column).getData()[row_num], this->scale));
-}
-
-
-template <typename T>
-void DataTypeDecimal<T>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    T decimal;
-    if (!protobuf.readDecimal(decimal, this->precision, this->scale))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(decimal);
-        row_added = true;
-    }
-    else
-        container.back() = decimal;
-}
-
 
 static DataTypePtr create(const ASTPtr & arguments)
 {
@@ -170,7 +141,7 @@ static DataTypePtr createExact(const ASTPtr & arguments)
     if (!scale_arg || !(scale_arg->value.getType() == Field::Types::Int64 || scale_arg->value.getType() == Field::Types::UInt64))
         throw Exception("Decimal data type family must have a two numbers as its arguments", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-    UInt64 precision = DecimalUtils::maxPrecision<T>();
+    UInt64 precision = DecimalUtils::max_precision<T>;
     UInt64 scale = scale_arg->value.get<UInt64>();
 
     return createDecimal<DataTypeDecimal>(precision, scale);
diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h
index 3f7b4e2ac63..2b708b53be0 100644
--- a/src/DataTypes/DataTypesDecimal.h
+++ b/src/DataTypes/DataTypesDecimal.h
@@ -46,9 +46,6 @@ public:
     void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     bool equals(const IDataType & rhs) const override;
 
     T parseFromString(const String & str) const;
@@ -273,7 +270,7 @@ tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale
 template <typename T>
 inline DataTypePtr createDecimalMaxPrecision(UInt64 scale)
 {
-    return std::make_shared<DataTypeDecimal<T>>(DecimalUtils::maxPrecision<T>(), scale);
+    return std::make_shared<DataTypeDecimal<T>>(DecimalUtils::max_precision<T>, scale);
 }
 
 }
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index dba5bc3f5a9..c9c848a8037 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -26,9 +26,6 @@ class Field;
 using DataTypePtr = std::shared_ptr<const IDataType>;
 using DataTypes = std::vector<DataTypePtr>;
 
-class ProtobufReader;
-class ProtobufWriter;
-
 struct NameAndTypePair;
 
 
@@ -235,10 +232,6 @@ public:
     /// If method will throw an exception, then column will be in same state as before call to method.
     virtual void deserializeBinary(IColumn & column, ReadBuffer & istr) const = 0;
 
-    /** Serialize to a protobuf. */
-    virtual void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const = 0;
-    virtual void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const = 0;
-
     /** Text serialization with escaping but without quoting.
       */
     void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
diff --git a/src/DataTypes/IDataTypeDummy.h b/src/DataTypes/IDataTypeDummy.h
index f27359e5f74..08cc0778a6e 100644
--- a/src/DataTypes/IDataTypeDummy.h
+++ b/src/DataTypes/IDataTypeDummy.h
@@ -34,8 +34,6 @@ public:
     void deserializeBinaryBulk(IColumn &, ReadBuffer &, size_t, double) const override      { throwNoSerialization(); }
     void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
     void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &) const override    { throwNoSerialization(); }
-    void serializeProtobuf(const IColumn &, size_t, ProtobufWriter &, size_t &) const override { throwNoSerialization(); }
-    void deserializeProtobuf(IColumn &, ProtobufReader &, bool, bool &) const override      { throwNoSerialization(); }
 
     MutableColumnPtr createColumn() const override
     {
diff --git a/src/DataTypes/NumberTraits.h b/src/DataTypes/NumberTraits.h
index 3aa00c68274..c3b0d956ef5 100644
--- a/src/DataTypes/NumberTraits.h
+++ b/src/DataTypes/NumberTraits.h
@@ -104,11 +104,16 @@ template <typename A, typename B> struct ResultOfIntegerDivision
         sizeof(A)>::Type;
 };
 
-/** Division with remainder you get a number with the same number of bits as in divisor.
-    */
+/** Division with remainder you get a number with the same number of bits as in divisor,
+  * or larger in case of signed type.
+  */
 template <typename A, typename B> struct ResultOfModulo
 {
-    using Type0 = typename Construct<is_signed_v<A> || is_signed_v<B>, false, sizeof(B)>::Type;
+    static constexpr bool result_is_signed = is_signed_v<A>;
+    /// If modulo of division can yield negative number, we need larger type to accommodate it.
+    /// Example: toInt32(-199) % toUInt8(200) will return -199 that does not fit in Int8, only in Int16.
+    static constexpr size_t size_of_result = result_is_signed ? nextSize(sizeof(B)) : sizeof(B);
+    using Type0 = typename Construct<result_is_signed, false, size_of_result>::Type;
     using Type = std::conditional_t<std::is_floating_point_v<A> || std::is_floating_point_v<B>, Float64, Type0>;
 };
 
diff --git a/src/DataTypes/convertMySQLDataType.cpp b/src/DataTypes/convertMySQLDataType.cpp
index b266a9f035f..5684503e34a 100644
--- a/src/DataTypes/convertMySQLDataType.cpp
+++ b/src/DataTypes/convertMySQLDataType.cpp
@@ -103,11 +103,11 @@ DataTypePtr convertMySQLDataType(MultiEnum<MySQLDataTypesSupport> type_support,
     }
     else if (type_support.isSet(MySQLDataTypesSupport::DECIMAL) && (type_name == "numeric" || type_name == "decimal"))
     {
-        if (precision <= DecimalUtils::maxPrecision<Decimal32>())
+        if (precision <= DecimalUtils::max_precision<Decimal32>)
             res = std::make_shared<DataTypeDecimal<Decimal32>>(precision, scale);
-        else if (precision <= DecimalUtils::maxPrecision<Decimal64>())
+        else if (precision <= DecimalUtils::max_precision<Decimal64>)
             res = std::make_shared<DataTypeDecimal<Decimal64>>(precision, scale);
-        else if (precision <= DecimalUtils::maxPrecision<Decimal128>())
+        else if (precision <= DecimalUtils::max_precision<Decimal128>)
             res = std::make_shared<DataTypeDecimal<Decimal128>>(precision, scale);
     }
 
diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index 17a91a1fff9..71e0effb2d2 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -4,13 +4,14 @@
 #include <Poco/Path.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
+#include <IO/ReadBufferFromFile.h>
 #include <Parsers/formatAST.h>
 #include <Common/renameat2.h>
 #include <Storages/StorageMaterializedView.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <filesystem>
-
+#include <Interpreters/DDLTask.h>
 
 namespace DB
 {
@@ -34,7 +35,6 @@ public:
     UUID uuid() const override { return table()->getStorageID().uuid; }
 };
 
-
 DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, const Context & context_)
     : DatabaseOrdinary(name_, std::move(metadata_path_), "store/", logger_name, context_)
     , path_to_table_symlinks(global_context.getPath() + "data/" + escapeForFileName(name_) + "/")
@@ -106,7 +106,7 @@ StoragePtr DatabaseAtomic::detachTable(const String & name)
     return table;
 }
 
-void DatabaseAtomic::dropTable(const Context &, const String & table_name, bool no_delay)
+void DatabaseAtomic::dropTable(const Context & context, const String & table_name, bool no_delay)
 {
     String table_metadata_path = getObjectMetadataPath(table_name);
     String table_metadata_path_drop;
@@ -115,6 +115,16 @@ void DatabaseAtomic::dropTable(const Context &, const String & table_name, bool
         std::unique_lock lock(mutex);
         table = getTableUnlocked(table_name, lock);
         table_metadata_path_drop = DatabaseCatalog::instance().getPathForDroppedMetadata(table->getStorageID());
+        auto txn = context.getZooKeeperMetadataTransaction();
+        if (txn && !context.isInternalSubquery())
+            txn->commit();      /// Commit point (a sort of) for Replicated database
+
+        /// NOTE: replica will be lost if server crashes before the following rename
+        /// We apply changes in ZooKeeper before applying changes in local metadata file
+        /// to reduce probability of failures between these operations
+        /// (it's more likely to lost connection, than to fail before applying local changes).
+        /// TODO better detection and recovery
+
         Poco::File(table_metadata_path).renameTo(table_metadata_path_drop);    /// Mark table as dropped
         DatabaseWithDictionaries::detachTableUnlocked(table_name, lock);       /// Should never throw
         table_name_to_path.erase(table_name);
@@ -124,7 +134,7 @@ void DatabaseAtomic::dropTable(const Context &, const String & table_name, bool
     /// Remove the inner table (if any) to avoid deadlock
     /// (due to attempt to execute DROP from the worker thread)
     if (auto * mv = dynamic_cast<StorageMaterializedView *>(table.get()))
-        mv->dropInnerTable(no_delay);
+        mv->dropInnerTable(no_delay, context);
     /// Notify DatabaseCatalog that table was dropped. It will remove table data in background.
     /// Cleanup is performed outside of database to allow easily DROP DATABASE without waiting for cleanup to complete.
     DatabaseCatalog::instance().enqueueDroppedTableCleanup(table->getStorageID(), table, table_metadata_path_drop, no_delay);
@@ -144,6 +154,8 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
 
     if (exchange && dictionary)
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot exchange dictionaries");
+    if (exchange && !supportsRenameat2())
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "RENAME EXCHANGE is not supported");
 
     auto & other_db = dynamic_cast<DatabaseAtomic &>(to_database);
     bool inside_database = this == &other_db;
@@ -232,6 +244,13 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
     }
 
     /// Table renaming actually begins here
+    auto txn = context.getZooKeeperMetadataTransaction();
+    if (txn && !context.isInternalSubquery())
+        txn->commit();     /// Commit point (a sort of) for Replicated database
+
+    /// NOTE: replica will be lost if server crashes before the following rename
+    /// TODO better detection and recovery
+
     if (exchange)
         renameExchange(old_metadata_path, new_metadata_path);
     else
@@ -267,7 +286,8 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
 }
 
 void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
-                                       const String & table_metadata_tmp_path, const String & table_metadata_path)
+                                       const String & table_metadata_tmp_path, const String & table_metadata_path,
+                                       const Context & query_context)
 {
     DetachedTables not_in_use;
     auto table_data_path = getTableDataPath(query);
@@ -284,6 +304,14 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora
         /// We will get en exception if some table with the same UUID exists (even if it's detached table or table from another database)
         DatabaseCatalog::instance().addUUIDMapping(query.uuid);
         locked_uuid = true;
+
+        auto txn = query_context.getZooKeeperMetadataTransaction();
+        if (txn && !query_context.isInternalSubquery())
+            txn->commit();     /// Commit point (a sort of) for Replicated database
+
+        /// NOTE: replica will be lost if server crashes before the following renameNoReplace(...)
+        /// TODO better detection and recovery
+
         /// It throws if `table_metadata_path` already exists (it's possible if table was detached)
         renameNoReplace(table_metadata_tmp_path, table_metadata_path);  /// Commit point (a sort of)
         attachTableUnlocked(query.table, table, lock);   /// Should never throw
@@ -300,7 +328,8 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora
         tryCreateSymlink(query.table, table_data_path);
 }
 
-void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path)
+void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path,
+                                      const String & /*statement*/, const Context & query_context)
 {
     bool check_file_exists = true;
     SCOPE_EXIT({ std::error_code code; if (check_file_exists) std::filesystem::remove(table_metadata_tmp_path, code); });
@@ -311,6 +340,13 @@ void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String &
     if (table_id.uuid != actual_table_id.uuid)
         throw Exception("Cannot alter table because it was renamed", ErrorCodes::CANNOT_ASSIGN_ALTER);
 
+    auto txn = query_context.getZooKeeperMetadataTransaction();
+    if (txn && !query_context.isInternalSubquery())
+        txn->commit();      /// Commit point (a sort of) for Replicated database
+
+    /// NOTE: replica will be lost if server crashes before the following rename
+    /// TODO better detection and recovery
+
     check_file_exists = renameExchangeIfSupported(table_metadata_tmp_path, table_metadata_path);
     if (!check_file_exists)
         std::filesystem::rename(table_metadata_tmp_path, table_metadata_path);
@@ -329,6 +365,12 @@ void DatabaseAtomic::assertDetachedTableNotInUse(const UUID & uuid)
               ", because it was detached but still used by some query. Retry later.", ErrorCodes::TABLE_ALREADY_EXISTS);
 }
 
+void DatabaseAtomic::setDetachedTableNotInUseForce(const UUID & uuid)
+{
+    std::unique_lock lock{mutex};
+    detached_tables.erase(uuid);
+}
+
 DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables()
 {
     DetachedTables not_in_use;
diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h
index 006d0e11434..09cdf269b35 100644
--- a/src/Databases/DatabaseAtomic.h
+++ b/src/Databases/DatabaseAtomic.h
@@ -58,11 +58,12 @@ public:
     void tryRemoveSymlink(const String & table_name);
 
     void waitDetachedTableNotInUse(const UUID & uuid) override;
+    void setDetachedTableNotInUseForce(const UUID & uuid);
 
-private:
-    void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path) override;
+protected:
+    void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & statement, const Context & query_context) override;
     void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
-                           const String & table_metadata_tmp_path, const String & table_metadata_path) override;
+                           const String & table_metadata_tmp_path, const String & table_metadata_path, const Context & query_context) override;
 
     void assertDetachedTableNotInUse(const UUID & uuid);
     typedef std::unordered_map<UUID, StoragePtr> DetachedTables;
diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp
index 5166e15b7b4..cd0143556c9 100644
--- a/src/Databases/DatabaseFactory.cpp
+++ b/src/Databases/DatabaseFactory.cpp
@@ -1,6 +1,7 @@
 #include <Databases/DatabaseFactory.h>
 
 #include <Databases/DatabaseAtomic.h>
+#include <Databases/DatabaseReplicated.h>
 #include <Databases/DatabaseDictionary.h>
 #include <Databases/DatabaseLazy.h>
 #include <Databases/DatabaseMemory.h>
@@ -13,6 +14,7 @@
 #include <Poco/File.h>
 #include <Poco/Path.h>
 #include <Interpreters/Context.h>
+#include <Common/Macros.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include "config_core.h"
@@ -96,11 +98,16 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
     const String & engine_name = engine_define->engine->name;
     const UUID & uuid = create.uuid;
 
-    if (engine_name != "MySQL" && engine_name != "MaterializeMySQL" && engine_name != "Lazy" && engine_name != "PostgreSQL" && engine_define->engine->arguments)
+    bool engine_may_have_arguments = engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "Lazy" ||
+                                     engine_name == "Replicated" || engine_name == "PostgreSQL";
+    if (engine_define->engine->arguments && !engine_may_have_arguments)
         throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);
 
-    if (engine_define->engine->parameters || engine_define->partition_by || engine_define->primary_key || engine_define->order_by ||
-        engine_define->sample_by || (!endsWith(engine_name, "MySQL") && engine_define->settings))
+    bool has_unexpected_element = engine_define->engine->parameters || engine_define->partition_by ||
+                                  engine_define->primary_key || engine_define->order_by ||
+                                  engine_define->sample_by;
+    bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated";
+    if (has_unexpected_element || (!may_have_settings && engine_define->settings))
         throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings",
                         ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
 
@@ -184,6 +191,32 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
         return std::make_shared<DatabaseLazy>(database_name, metadata_path, cache_expiration_time_seconds, context);
     }
 
+    else if (engine_name == "Replicated")
+    {
+        const ASTFunction * engine = engine_define->engine;
+
+        if (!engine->arguments || engine->arguments->children.size() != 3)
+            throw Exception("Replicated database requires 3 arguments: zookeeper path, shard name and replica name", ErrorCodes::BAD_ARGUMENTS);
+
+        const auto & arguments = engine->arguments->children;
+
+        String zookeeper_path = safeGetLiteralValue<String>(arguments[0], "Replicated");
+        String shard_name = safeGetLiteralValue<String>(arguments[1], "Replicated");
+        String replica_name  = safeGetLiteralValue<String>(arguments[2], "Replicated");
+
+        zookeeper_path = context.getMacros()->expand(zookeeper_path);
+        shard_name = context.getMacros()->expand(shard_name);
+        replica_name = context.getMacros()->expand(replica_name);
+
+        DatabaseReplicatedSettings database_replicated_settings{};
+        if (engine_define->settings)
+            database_replicated_settings.loadFromQuery(*engine_define);
+
+        return std::make_shared<DatabaseReplicated>(database_name, metadata_path, uuid,
+                                                    zookeeper_path, shard_name, replica_name,
+                                                    std::move(database_replicated_settings), context);
+    }
+
 #if USE_LIBPQXX
 
     else if (engine_name == "PostgreSQL")
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index bf89fcdf4ee..e5d2b23ace0 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -129,6 +129,60 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query)
     return statement_buf.str();
 }
 
+void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata)
+{
+    auto & ast_create_query = query->as<ASTCreateQuery &>();
+
+    bool has_structure = ast_create_query.columns_list && ast_create_query.columns_list->columns;
+    if (ast_create_query.as_table_function && !has_structure)
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function"
+                                                     " and doesn't have structure in metadata", backQuote(ast_create_query.table));
+
+    assert(has_structure);
+    ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
+    ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
+    ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
+
+    ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
+    ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
+    ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
+
+    if (metadata.select.select_query)
+    {
+        query->replace(ast_create_query.select, metadata.select.select_query);
+    }
+
+    /// MaterializedView is one type of CREATE query without storage.
+    if (ast_create_query.storage)
+    {
+        ASTStorage & storage_ast = *ast_create_query.storage;
+
+        bool is_extended_storage_def
+            = storage_ast.partition_by || storage_ast.primary_key || storage_ast.order_by || storage_ast.sample_by || storage_ast.settings;
+
+        if (is_extended_storage_def)
+        {
+            if (metadata.sorting_key.definition_ast)
+                storage_ast.set(storage_ast.order_by, metadata.sorting_key.definition_ast);
+
+            if (metadata.primary_key.definition_ast)
+                storage_ast.set(storage_ast.primary_key, metadata.primary_key.definition_ast);
+
+            if (metadata.sampling_key.definition_ast)
+                storage_ast.set(storage_ast.sample_by, metadata.sampling_key.definition_ast);
+
+            if (metadata.table_ttl.definition_ast)
+                storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast);
+            else if (storage_ast.ttl_table != nullptr) /// TTL was removed
+                storage_ast.ttl_table = nullptr;
+
+            if (metadata.settings_changes)
+                storage_ast.set(storage_ast.settings, metadata.settings_changes);
+        }
+    }
+}
+
+
 DatabaseOnDisk::DatabaseOnDisk(
     const String & name,
     const String & metadata_path_,
@@ -214,7 +268,7 @@ void DatabaseOnDisk::createTable(
         out.close();
     }
 
-    commitCreateTable(create, table, table_metadata_tmp_path, table_metadata_path);
+    commitCreateTable(create, table, table_metadata_tmp_path, table_metadata_path, context);
 
     removeDetachedPermanentlyFlag(table_name, table_metadata_path);
 }
@@ -238,7 +292,8 @@ void DatabaseOnDisk::removeDetachedPermanentlyFlag(const String & table_name, co
 }
 
 void DatabaseOnDisk::commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
-                                       const String & table_metadata_tmp_path, const String & table_metadata_path)
+                                       const String & table_metadata_tmp_path, const String & table_metadata_path,
+                                       const Context & /*query_context*/)
 {
     try
     {
@@ -256,7 +311,7 @@ void DatabaseOnDisk::commitCreateTable(const ASTCreateQuery & query, const Stora
     }
 }
 
-void DatabaseOnDisk::detachTablePermanently(const String & table_name)
+void DatabaseOnDisk::detachTablePermanently(const Context &, const String & table_name)
 {
     auto table = detachTable(table_name);
 
@@ -352,6 +407,8 @@ void DatabaseOnDisk::renameTable(
             from_ordinary_to_atomic = true;
         else if (typeid_cast<DatabaseAtomic *>(this) && typeid_cast<DatabaseOrdinary *>(&to_database))
             from_atomic_to_ordinary = true;
+        else if (dynamic_cast<DatabaseAtomic *>(this) && typeid_cast<DatabaseOrdinary *>(&to_database) && getEngineName() == "Replicated")
+            from_atomic_to_ordinary = true;
         else
             throw Exception("Moving tables between databases of different engines is not supported", ErrorCodes::NOT_IMPLEMENTED);
     }
@@ -363,6 +420,7 @@ void DatabaseOnDisk::renameTable(
     /// DatabaseLazy::detachTable may return nullptr even if table exists, so we need tryGetTable for this case.
     StoragePtr table = tryGetTable(table_name, global_context);
     detachTable(table_name);
+    UUID prev_uuid = UUIDHelpers::Nil;
     try
     {
         table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
@@ -375,7 +433,7 @@ void DatabaseOnDisk::renameTable(
         if (from_ordinary_to_atomic)
             create.uuid = UUIDHelpers::generateV4();
         if (from_atomic_to_ordinary)
-            create.uuid = UUIDHelpers::Nil;
+            std::swap(create.uuid, prev_uuid);
 
         if (auto * target_db = dynamic_cast<DatabaseOnDisk *>(&to_database))
             target_db->checkMetadataFilenameAvailability(to_table_name);
@@ -400,12 +458,16 @@ void DatabaseOnDisk::renameTable(
 
     Poco::File(table_metadata_path).remove();
 
-    /// Special case: usually no actions with symlinks are required when detaching/attaching table,
-    /// but not when moving from Atomic database to Ordinary
-    if (from_atomic_to_ordinary && table->storesDataOnDisk())
+    if (from_atomic_to_ordinary)
     {
-        auto & atomic_db = assert_cast<DatabaseAtomic &>(*this);
-        atomic_db.tryRemoveSymlink(table_name);
+        auto & atomic_db = dynamic_cast<DatabaseAtomic &>(*this);
+        /// Special case: usually no actions with symlinks are required when detaching/attaching table,
+        /// but not when moving from Atomic database to Ordinary
+        if (table->storesDataOnDisk())
+            atomic_db.tryRemoveSymlink(table_name);
+        /// Forget about UUID, now it's possible to reuse it for new table
+        DatabaseCatalog::instance().removeUUIDMappingFinally(prev_uuid);
+        atomic_db.setDetachedTableNotInUseForce(prev_uuid);
     }
 }
 
diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h
index fff2a259911..fefe6e91606 100644
--- a/src/Databases/DatabaseOnDisk.h
+++ b/src/Databases/DatabaseOnDisk.h
@@ -25,6 +25,8 @@ std::pair<String, StoragePtr> createTableFromAST(
   */
 String getObjectDefinitionFromCreateQuery(const ASTPtr & query);
 
+void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata);
+
 
 /* Class to provide basic operations with tables when metadata is stored on disk in .sql files.
  */
@@ -39,7 +41,7 @@ public:
         const StoragePtr & table,
         const ASTPtr & query) override;
 
-    void detachTablePermanently(const String & table_name) override;
+    void detachTablePermanently(const Context & context, const String & table_name) override;
 
     void dropTable(
         const Context & context,
@@ -90,7 +92,7 @@ protected:
     ASTPtr getCreateQueryFromMetadata(const String & metadata_path, bool throw_on_error) const;
 
     virtual void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
-                                   const String & table_metadata_tmp_path, const String & table_metadata_path);
+                                   const String & table_metadata_tmp_path, const String & table_metadata_path, const Context & query_context);
 
     const String metadata_path;
     const String data_path;
diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index 58b4ad72a1b..a94668dacf7 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -33,11 +33,6 @@ static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256;
 static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5;
 static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768;
 
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
 namespace
 {
     void tryAttachTable(
@@ -172,6 +167,26 @@ void DatabaseOrdinary::loadStoredObjects(Context & context, bool has_force_resto
 
     ThreadPool pool;
 
+    /// We must attach dictionaries before attaching tables
+    /// because while we're attaching tables we may need to have some dictionaries attached
+    /// (for example, dictionaries can be used in the default expressions for some tables).
+    /// On the other hand we can attach any dictionary (even sourced from ClickHouse table)
+    /// without having any tables attached. It is so because attaching of a dictionary means
+    /// loading of its config only, it doesn't involve loading the dictionary itself.
+
+    /// Attach dictionaries.
+    for (const auto & [name, query] : file_names)
+    {
+        auto create_query = query->as<const ASTCreateQuery &>();
+        if (create_query.is_dictionary)
+        {
+            tryAttachDictionary(query, *this, getMetadataPath() + name, context);
+
+            /// Messages, so that it's not boring to wait for the server to load for a long time.
+            logAboutProgress(log, ++dictionaries_processed, total_dictionaries, watch);
+        }
+    }
+
     /// Attach tables.
     for (const auto & name_with_query : file_names)
     {
@@ -196,19 +211,6 @@ void DatabaseOrdinary::loadStoredObjects(Context & context, bool has_force_resto
 
     /// After all tables was basically initialized, startup them.
     startupTables(pool);
-
-    /// Attach dictionaries.
-    for (const auto & [name, query] : file_names)
-    {
-        auto create_query = query->as<const ASTCreateQuery &>();
-        if (create_query.is_dictionary)
-        {
-            tryAttachDictionary(query, *this, getMetadataPath() + name, context);
-
-            /// Messages, so that it's not boring to wait for the server to load for a long time.
-            logAboutProgress(log, ++dictionaries_processed, total_dictionaries, watch);
-        }
-    }
 }
 
 
@@ -265,55 +267,7 @@ void DatabaseOrdinary::alterTable(const Context & context, const StorageID & tab
         0,
         context.getSettingsRef().max_parser_depth);
 
-    auto & ast_create_query = ast->as<ASTCreateQuery &>();
-
-    bool has_structure = ast_create_query.columns_list && ast_create_query.columns_list->columns;
-    if (ast_create_query.as_table_function && !has_structure)
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function"
-                                                     " and doesn't have structure in metadata", backQuote(table_name));
-
-    assert(has_structure);
-    ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
-    ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
-    ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
-
-    ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
-    ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
-    ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
-
-    if (metadata.select.select_query)
-    {
-        ast->replace(ast_create_query.select, metadata.select.select_query);
-    }
-
-    /// MaterializedView is one type of CREATE query without storage.
-    if (ast_create_query.storage)
-    {
-        ASTStorage & storage_ast = *ast_create_query.storage;
-
-        bool is_extended_storage_def
-            = storage_ast.partition_by || storage_ast.primary_key || storage_ast.order_by || storage_ast.sample_by || storage_ast.settings;
-
-        if (is_extended_storage_def)
-        {
-            if (metadata.sorting_key.definition_ast)
-                storage_ast.set(storage_ast.order_by, metadata.sorting_key.definition_ast);
-
-            if (metadata.primary_key.definition_ast)
-                storage_ast.set(storage_ast.primary_key, metadata.primary_key.definition_ast);
-
-            if (metadata.sampling_key.definition_ast)
-                storage_ast.set(storage_ast.sample_by, metadata.sampling_key.definition_ast);
-
-            if (metadata.table_ttl.definition_ast)
-                storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast);
-            else if (storage_ast.ttl_table != nullptr) /// TTL was removed
-                storage_ast.ttl_table = nullptr;
-
-            if (metadata.settings_changes)
-                storage_ast.set(storage_ast.settings, metadata.settings_changes);
-        }
-    }
+    applyMetadataChangesToCreateQuery(ast, metadata);
 
     statement = getObjectDefinitionFromCreateQuery(ast);
     {
@@ -325,10 +279,10 @@ void DatabaseOrdinary::alterTable(const Context & context, const StorageID & tab
         out.close();
     }
 
-    commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path);
+    commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, context);
 }
 
-void DatabaseOrdinary::commitAlterTable(const StorageID &, const String & table_metadata_tmp_path, const String & table_metadata_path)
+void DatabaseOrdinary::commitAlterTable(const StorageID &, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & /*statement*/, const Context & /*query_context*/)
 {
     try
     {
diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h
index 07783313413..c1ad32345f6 100644
--- a/src/Databases/DatabaseOrdinary.h
+++ b/src/Databases/DatabaseOrdinary.h
@@ -30,7 +30,7 @@ public:
         const StorageInMemoryMetadata & metadata) override;
 
 protected:
-    virtual void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path);
+    virtual void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & statement, const Context & query_context);
 
     void startupTables(ThreadPool & thread_pool);
 };
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
new file mode 100644
index 00000000000..12cff3407d3
--- /dev/null
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -0,0 +1,719 @@
+#include <DataTypes/DataTypeString.h>
+#include <Databases/DatabaseReplicated.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/executeQuery.h>
+#include <Parsers/queryToString.h>
+#include <Common/Exception.h>
+#include <Common/Stopwatch.h>
+#include <Common/ZooKeeper/KeeperException.h>
+#include <Common/ZooKeeper/Types.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+#include <Databases/DatabaseReplicatedWorker.h>
+#include <Interpreters/DDLTask.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
+#include <Interpreters/Cluster.h>
+#include <common/getFQDNOrHostName.h>
+#include <Parsers/ASTAlterQuery.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/parseQuery.h>
+#include <Interpreters/InterpreterCreateQuery.h>
+#include <Parsers/formatAST.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NO_ZOOKEEPER;
+    extern const int LOGICAL_ERROR;
+    extern const int BAD_ARGUMENTS;
+    extern const int REPLICA_IS_ALREADY_EXIST;
+    extern const int DATABASE_REPLICATION_FAILED;
+    extern const int UNKNOWN_DATABASE;
+    extern const int UNKNOWN_TABLE;
+    extern const int NOT_IMPLEMENTED;
+    extern const int INCORRECT_QUERY;
+    extern const int ALL_CONNECTION_TRIES_FAILED;
+}
+
+static constexpr const char * DROPPED_MARK = "DROPPED";
+static constexpr const char * BROKEN_TABLES_SUFFIX = "_broken_tables";
+
+
+zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
+{
+    return global_context.getZooKeeper();
+}
+
+static inline String getHostID(const Context & global_context, const UUID & db_uuid)
+{
+    return Cluster::Address::toString(getFQDNOrHostName(), global_context.getTCPPort()) + ':' + toString(db_uuid);
+}
+
+
+DatabaseReplicated::~DatabaseReplicated() = default;
+
+DatabaseReplicated::DatabaseReplicated(
+    const String & name_,
+    const String & metadata_path_,
+    UUID uuid,
+    const String & zookeeper_path_,
+    const String & shard_name_,
+    const String & replica_name_,
+    DatabaseReplicatedSettings db_settings_,
+    const Context & context_)
+    : DatabaseAtomic(name_, metadata_path_, uuid, "DatabaseReplicated (" + name_ + ")", context_)
+    , zookeeper_path(zookeeper_path_)
+    , shard_name(shard_name_)
+    , replica_name(replica_name_)
+    , db_settings(std::move(db_settings_))
+{
+    if (zookeeper_path.empty() || shard_name.empty() || replica_name.empty())
+        throw Exception("ZooKeeper path, shard and replica names must be non-empty", ErrorCodes::BAD_ARGUMENTS);
+    if (shard_name.find('/') != std::string::npos || replica_name.find('/') != std::string::npos)
+        throw Exception("Shard and replica names should not contain '/'", ErrorCodes::BAD_ARGUMENTS);
+    if (shard_name.find('|') != std::string::npos || replica_name.find('|') != std::string::npos)
+        throw Exception("Shard and replica names should not contain '|'", ErrorCodes::BAD_ARGUMENTS);
+
+    if (zookeeper_path.back() == '/')
+        zookeeper_path.resize(zookeeper_path.size() - 1);
+
+    /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
+    if (zookeeper_path.front() != '/')
+        zookeeper_path = "/" + zookeeper_path;
+}
+
+String DatabaseReplicated::getFullReplicaName() const
+{
+    return shard_name + '|' + replica_name;
+}
+
+std::pair<String, String> DatabaseReplicated::parseFullReplicaName(const String & name)
+{
+    String shard;
+    String replica;
+    auto pos = name.find('|');
+    if (pos == std::string::npos || name.find('|', pos + 1) != std::string::npos)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect replica identifier: {}", name);
+    shard = name.substr(0, pos);
+    replica = name.substr(pos + 1);
+    return {shard, replica};
+}
+
+ClusterPtr DatabaseReplicated::getCluster() const
+{
+    /// TODO Maintain up-to-date Cluster and allow to use it in Distributed tables
+    Strings hosts;
+    Strings host_ids;
+
+    auto zookeeper = global_context.getZooKeeper();
+    constexpr int max_retries = 10;
+    int iteration = 0;
+    bool success = false;
+    while (++iteration <= max_retries)
+    {
+        host_ids.resize(0);
+        Coordination::Stat stat;
+        hosts = zookeeper->getChildren(zookeeper_path + "/replicas", &stat);
+        if (hosts.empty())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "No hosts found");
+        Int32 cver = stat.cversion;
+        std::sort(hosts.begin(), hosts.end());
+
+        std::vector<zkutil::ZooKeeper::FutureGet> futures;
+        futures.reserve(hosts.size());
+        host_ids.reserve(hosts.size());
+        for (const auto & host : hosts)
+            futures.emplace_back(zookeeper->asyncTryGet(zookeeper_path + "/replicas/" + host));
+
+        success = true;
+        for (auto & future : futures)
+        {
+            auto res = future.get();
+            if (res.error != Coordination::Error::ZOK)
+                success = false;
+            host_ids.emplace_back(res.data);
+        }
+
+        zookeeper->get(zookeeper_path + "/replicas", &stat);
+        if (success && cver == stat.version)
+            break;
+    }
+    if (!success)
+        throw Exception(ErrorCodes::ALL_CONNECTION_TRIES_FAILED, "Cannot get consistent cluster snapshot,"
+                                                                 "because replicas are created or removed concurrently");
+
+    assert(!hosts.empty());
+    assert(hosts.size() == host_ids.size());
+    String current_shard = parseFullReplicaName(hosts.front()).first;
+    std::vector<Strings> shards;
+    shards.emplace_back();
+    for (size_t i = 0; i < hosts.size(); ++i)
+    {
+        const auto & id = host_ids[i];
+        if (id == DROPPED_MARK)
+            continue;
+        auto [shard, replica] = parseFullReplicaName(hosts[i]);
+        auto pos = id.find(':');
+        String host = id.substr(0, pos);
+        if (shard != current_shard)
+        {
+            current_shard = shard;
+            if (!shards.back().empty())
+                shards.emplace_back();
+        }
+        shards.back().emplace_back(unescapeForFileName(host));
+    }
+
+    /// TODO make it configurable
+    String username = "default";
+    String password;
+
+    return std::make_shared<Cluster>(global_context.getSettingsRef(), shards, username, password, global_context.getTCPPort(), false);
+}
+
+void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach)
+{
+    try
+    {
+        if (!global_context.hasZooKeeper())
+        {
+            throw Exception("Can't create replicated database without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
+        }
+
+        auto current_zookeeper = global_context.getZooKeeper();
+
+        if (!current_zookeeper->exists(zookeeper_path))
+        {
+            /// Create new database, multiple nodes can execute it concurrently
+            createDatabaseNodesInZooKeeper(current_zookeeper);
+        }
+
+        replica_path = zookeeper_path + "/replicas/" + getFullReplicaName();
+
+        String replica_host_id;
+        if (current_zookeeper->tryGet(replica_path, replica_host_id))
+        {
+            String host_id = getHostID(global_context, db_uuid);
+            if (replica_host_id != host_id)
+                throw Exception(ErrorCodes::REPLICA_IS_ALREADY_EXIST,
+                                "Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'",
+                                replica_name, shard_name, zookeeper_path, replica_host_id, host_id);
+        }
+        else
+        {
+            /// Throws if replica with the same name already exists
+            createReplicaNodesInZooKeeper(current_zookeeper);
+        }
+
+        is_readonly = false;
+    }
+    catch (...)
+    {
+        if (!force_attach)
+            throw;
+
+        /// It's server startup, ignore error.
+        /// Worker thread will try to setup ZooKeeper connection
+        tryLogCurrentException(log);
+    }
+}
+
+bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
+{
+    current_zookeeper->createAncestors(zookeeper_path);
+
+    Coordination::Requests ops;
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/counter", "", zkutil::CreateMode::Persistent));
+    /// We create and remove counter/cnt- node to increment sequential number of counter/ node and make log entry numbers start from 1.
+    /// New replicas are created with log pointer equal to 0 and log pointer is a number of the last executed entry.
+    /// It means that we cannot have log entry with number 0.
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/counter/cnt-", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/counter/cnt-", -1));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/max_log_ptr", "1", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/logs_to_keep", "1000", zkutil::CreateMode::Persistent));
+
+    Coordination::Responses responses;
+    auto res = current_zookeeper->tryMulti(ops, responses);
+    if (res == Coordination::Error::ZOK)
+        return true;    /// Created new database (it's the first replica)
+    if (res == Coordination::Error::ZNODEEXISTS)
+        return false;   /// Database exists, we will add new replica
+
+    /// Other codes are unexpected, will throw
+    zkutil::KeeperMultiException::check(res, ops, responses);
+    assert(false);
+    __builtin_unreachable();
+}
+
+void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
+{
+    /// Write host name to replica_path, it will protect from multiple replicas with the same name
+    auto host_id = getHostID(global_context, db_uuid);
+
+    /// On replica creation add empty entry to log. Can be used to trigger some actions on other replicas (e.g. update cluster info).
+    DDLLogEntry entry{};
+
+    String query_path_prefix = zookeeper_path + "/log/query-";
+    String counter_prefix = zookeeper_path + "/counter/cnt-";
+    String counter_path = current_zookeeper->create(counter_prefix, "", zkutil::CreateMode::EphemeralSequential);
+    String query_path = query_path_prefix + counter_path.substr(counter_prefix.size());
+
+    Coordination::Requests ops;
+    ops.emplace_back(zkutil::makeCreateRequest(replica_path, host_id, zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", "0", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(query_path, entry.toString(), zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeRemoveRequest(counter_path, -1));
+    current_zookeeper->multi(ops);
+}
+
+void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach)
+{
+    tryConnectToZooKeeperAndInitDatabase(force_attach);
+
+    DatabaseAtomic::loadStoredObjects(context, has_force_restore_data_flag, force_attach);
+
+    ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, global_context);
+    ddl_worker->startup();
+}
+
+BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, const Context & query_context)
+{
+    if (is_readonly)
+        throw Exception(ErrorCodes::NO_ZOOKEEPER, "Database is in readonly mode, because it cannot connect to ZooKeeper");
+
+    if (query_context.getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY)
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "It's not initial query. ON CLUSTER is not allowed for Replicated database.");
+
+    /// Replicas will set correct name of current database in query context (database name can be different on replicas)
+    if (auto * ddl_query = query->as<ASTQueryWithTableAndOutput>())
+        ddl_query->database.clear();
+
+    if (const auto * query_alter = query->as<ASTAlterQuery>())
+    {
+        for (const auto & command : query_alter->command_list->children)
+        {
+            if (!isSupportedAlterType(command->as<ASTAlterCommand&>().type))
+                throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
+        }
+    }
+
+    LOG_DEBUG(log, "Proposing query: {}", queryToString(query));
+
+    /// TODO maybe write current settings to log entry?
+    DDLLogEntry entry;
+    entry.query = queryToString(query);
+    entry.initiator = ddl_worker->getCommonHostID();
+    String node_path = ddl_worker->tryEnqueueAndExecuteEntry(entry, query_context);
+
+    BlockIO io;
+    if (query_context.getSettingsRef().distributed_ddl_task_timeout == 0)
+        return io;
+
+    Strings hosts_to_wait = getZooKeeper()->getChildren(zookeeper_path + "/replicas");
+    auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, query_context, hosts_to_wait);
+    if (query_context.getSettingsRef().database_replicated_ddl_output)
+        io.in = std::move(stream);
+    return io;
+}
+
+static UUID getTableUUIDIfReplicated(const String & metadata, const Context & context)
+{
+    bool looks_like_replicated = metadata.find("ReplicatedMergeTree") != std::string::npos;
+    if (!looks_like_replicated)
+        return UUIDHelpers::Nil;
+
+    ParserCreateQuery parser;
+    auto size = context.getSettingsRef().max_query_size;
+    auto depth = context.getSettingsRef().max_parser_depth;
+    ASTPtr query = parseQuery(parser, metadata, size, depth);
+    const ASTCreateQuery & create = query->as<const ASTCreateQuery &>();
+    if (!create.storage || !create.storage->engine)
+        return UUIDHelpers::Nil;
+    if (!startsWith(create.storage->engine->name, "Replicated") || !endsWith(create.storage->engine->name, "MergeTree"))
+        return UUIDHelpers::Nil;
+    assert(create.uuid != UUIDHelpers::Nil);
+    return create.uuid;
+}
+
+void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr)
+{
+    /// Let's compare local (possibly outdated) metadata with (most actual) metadata stored in ZooKeeper
+    /// and try to update the set of local tables.
+    /// We could drop all local tables and create the new ones just like it's new replica.
+    /// But it will cause all ReplicatedMergeTree tables to fetch all data parts again and data in other tables will be lost.
+
+    bool new_replica = our_log_ptr == 0;
+    if (new_replica)
+        LOG_INFO(log, "Will create new replica from log pointer {}", max_log_ptr);
+    else
+        LOG_WARNING(log, "Will recover replica with staled log pointer {} from log pointer {}", our_log_ptr, max_log_ptr);
+
+    if (new_replica && !empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "It's new replica, but database is not empty");
+
+    auto table_name_to_metadata = tryGetConsistentMetadataSnapshot(current_zookeeper, max_log_ptr);
+
+    /// For ReplicatedMergeTree tables we can compare only UUIDs to ensure that it's the same table.
+    /// Metadata can be different, it's handled on table replication level.
+    /// We need to handle renamed tables only.
+    /// TODO maybe we should also update MergeTree SETTINGS if required?
+    std::unordered_map<UUID, String> zk_replicated_id_to_name;
+    for (const auto & zk_table : table_name_to_metadata)
+    {
+        UUID zk_replicated_id = getTableUUIDIfReplicated(zk_table.second, global_context);
+        if (zk_replicated_id != UUIDHelpers::Nil)
+            zk_replicated_id_to_name.emplace(zk_replicated_id, zk_table.first);
+    }
+
+    /// We will drop or move tables which exist only in local metadata
+    Strings tables_to_detach;
+    std::vector<std::pair<String, String>> replicated_tables_to_rename;
+    size_t total_tables = 0;
+    std::vector<UUID> replicated_ids;
+    for (auto existing_tables_it = getTablesIterator(global_context, {}); existing_tables_it->isValid(); existing_tables_it->next(), ++total_tables)
+    {
+        String name = existing_tables_it->name();
+        UUID local_replicated_id = UUIDHelpers::Nil;
+        if (existing_tables_it->table()->supportsReplication())
+        {
+            /// Check if replicated tables have the same UUID
+            local_replicated_id = existing_tables_it->table()->getStorageID().uuid;
+            auto it = zk_replicated_id_to_name.find(local_replicated_id);
+            if (it != zk_replicated_id_to_name.end())
+            {
+                if (name != it->second)
+                {
+                    /// Need just update table name
+                    replicated_tables_to_rename.emplace_back(name, it->second);
+                }
+                continue;
+            }
+        }
+
+        auto in_zk = table_name_to_metadata.find(name);
+        if (in_zk == table_name_to_metadata.end() || in_zk->second != readMetadataFile(name))
+        {
+            /// Local table does not exits in ZooKeeper or has different metadata
+            tables_to_detach.emplace_back(std::move(name));
+        }
+    }
+
+    String db_name = getDatabaseName();
+    String to_db_name = getDatabaseName() + BROKEN_TABLES_SUFFIX;
+    if (total_tables * db_settings.max_broken_tables_ratio < tables_to_detach.size())
+        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Too many tables to recreate: {} of {}", tables_to_detach.size(), total_tables);
+    else if (!tables_to_detach.empty())
+    {
+        LOG_WARNING(log, "Will recreate {} broken tables to recover replica", tables_to_detach.size());
+        /// It's too dangerous to automatically drop tables, so we will move them to special database.
+        /// We use Ordinary engine for destination database, because it's the only way to discard table UUID
+        /// and make possible creation of new table with the same UUID.
+        String query = fmt::format("CREATE DATABASE IF NOT EXISTS {} ENGINE=Ordinary", backQuoteIfNeed(to_db_name));
+        Context query_context = global_context;
+        executeQuery(query, query_context, true);
+    }
+
+    size_t dropped_dicts = 0;
+    size_t moved_tables = 0;
+    std::vector<UUID> dropped_tables;
+    for (const auto & table_name : tables_to_detach)
+    {
+        DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, table_name);
+        if (getDatabaseName() != db_name)
+            throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed, will retry");
+
+        auto table = tryGetTable(table_name, global_context);
+        if (isDictionaryExist(table_name))
+        {
+            /// We can safely drop any dictionaries because they do not store data
+            LOG_DEBUG(log, "Will DROP DICTIONARY {}", backQuoteIfNeed(table_name));
+            DatabaseAtomic::removeDictionary(global_context, table_name);
+            ++dropped_dicts;
+        }
+        else if (!table->storesDataOnDisk())
+        {
+            LOG_DEBUG(log, "Will DROP TABLE {}, because it does not store data on disk and can be safely dropped", backQuoteIfNeed(table_name));
+            dropped_tables.push_back(tryGetTableUUID(table_name));
+            table->shutdown();
+            DatabaseAtomic::dropTable(global_context, table_name, true);
+        }
+        else
+        {
+            /// Table probably stores some data. Let's move it to another database.
+            String to_name = fmt::format("{}_{}_{}", table_name, max_log_ptr, thread_local_rng() % 1000);
+            LOG_DEBUG(log, "Will RENAME TABLE {} TO {}.{}", backQuoteIfNeed(table_name), backQuoteIfNeed(to_db_name), backQuoteIfNeed(to_name));
+            assert(db_name < to_db_name);
+            DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(to_db_name, to_name);
+            auto to_db_ptr = DatabaseCatalog::instance().getDatabase(to_db_name);
+            DatabaseAtomic::renameTable(global_context, table_name, *to_db_ptr, to_name, false, false);
+            ++moved_tables;
+        }
+    }
+
+    if (!tables_to_detach.empty())
+        LOG_WARNING(log, "Cleaned {} outdated objects: dropped {} dictionaries and {} tables, moved {} tables",
+                    tables_to_detach.size(), dropped_dicts, dropped_tables.size(), moved_tables);
+
+    /// Now database is cleared from outdated tables, let's rename ReplicatedMergeTree tables to actual names
+    for (const auto & old_to_new : replicated_tables_to_rename)
+    {
+        const String & from = old_to_new.first;
+        const String & to = old_to_new.second;
+
+        LOG_DEBUG(log, "Will RENAME TABLE {} TO {}", backQuoteIfNeed(from), backQuoteIfNeed(to));
+        /// TODO Maybe we should do it in two steps: rename all tables to temporary names and then rename them to actual names?
+        DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::min(from, to));
+        DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::max(from, to));
+        DatabaseAtomic::renameTable(global_context, from, *this, to, false, false);
+    }
+
+    for (const auto & id : dropped_tables)
+        DatabaseCatalog::instance().waitTableFinallyDropped(id);
+
+    for (const auto & name_and_meta : table_name_to_metadata)
+    {
+        if (isTableExist(name_and_meta.first, global_context))
+        {
+            assert(name_and_meta.second == readMetadataFile(name_and_meta.first));
+            continue;
+        }
+
+        auto query_ast = parseQueryFromMetadataInZooKeeper(name_and_meta.first, name_and_meta.second);
+
+        Context query_context = global_context;
+        query_context.makeQueryContext();
+        query_context.getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+        query_context.setCurrentDatabase(database_name);
+        query_context.setCurrentQueryId(""); // generate random query_id
+
+        LOG_INFO(log, "Executing {}", serializeAST(*query_ast));
+        InterpreterCreateQuery(query_ast, query_context).execute();
+    }
+
+    current_zookeeper->set(replica_path + "/log_ptr", toString(max_log_ptr));
+}
+
+std::map<String, String> DatabaseReplicated::tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr)
+{
+    std::map<String, String> table_name_to_metadata;
+    constexpr int max_retries = 10;
+    int iteration = 0;
+    while (++iteration <= max_retries)
+    {
+        table_name_to_metadata.clear();
+        LOG_DEBUG(log, "Trying to get consistent metadata snapshot for log pointer {}", max_log_ptr);
+        Strings table_names = zookeeper->getChildren(zookeeper_path + "/metadata");
+
+        std::vector<zkutil::ZooKeeper::FutureGet> futures;
+        futures.reserve(table_names.size());
+        for (const auto & table : table_names)
+            futures.emplace_back(zookeeper->asyncTryGet(zookeeper_path + "/metadata/" + table));
+
+        for (size_t i = 0; i < table_names.size(); ++i)
+        {
+            auto res = futures[i].get();
+            if (res.error != Coordination::Error::ZOK)
+                break;
+            table_name_to_metadata.emplace(unescapeForFileName(table_names[i]), res.data);
+        }
+
+        UInt32 new_max_log_ptr = parse<UInt32>(zookeeper->get(zookeeper_path + "/max_log_ptr"));
+        if (new_max_log_ptr == max_log_ptr && table_names.size() == table_name_to_metadata.size())
+            break;
+
+        if (max_log_ptr < new_max_log_ptr)
+        {
+            LOG_DEBUG(log, "Log pointer moved from {} to {}, will retry", max_log_ptr, new_max_log_ptr);
+            max_log_ptr = new_max_log_ptr;
+        }
+        else
+        {
+            assert(max_log_ptr == new_max_log_ptr);
+            assert(table_names.size() != table_name_to_metadata.size());
+            LOG_DEBUG(log, "Cannot get metadata of some tables due to ZooKeeper error, will retry");
+        }
+    }
+
+    if (max_retries < iteration)
+        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Cannot get consistent metadata snapshot");
+
+    LOG_DEBUG(log, "Got consistent metadata snapshot for log pointer {}", max_log_ptr);
+
+    return table_name_to_metadata;
+}
+
+ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query)
+{
+    ParserCreateQuery parser;
+    String description = "in ZooKeeper " + zookeeper_path + "/metadata/" + node_name;
+    auto ast = parseQuery(parser, query, description, 0, global_context.getSettingsRef().max_parser_depth);
+
+    auto & create = ast->as<ASTCreateQuery &>();
+    if (create.uuid == UUIDHelpers::Nil || create.table != TABLE_WITH_UUID_NAME_PLACEHOLDER || ! create.database.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected query from {}: {}", node_name, query);
+
+    create.database = getDatabaseName();
+    create.table = unescapeForFileName(node_name);
+    create.attach = false;
+
+    return ast;
+}
+
+void DatabaseReplicated::drop(const Context & context_)
+{
+    auto current_zookeeper = getZooKeeper();
+    current_zookeeper->set(replica_path, DROPPED_MARK);
+    DatabaseAtomic::drop(context_);
+    current_zookeeper->tryRemoveRecursive(replica_path);
+    /// TODO it may leave garbage in ZooKeeper if the last node lost connection here
+    if (current_zookeeper->tryRemove(zookeeper_path + "/replicas") == Coordination::Error::ZOK)
+    {
+        /// It was the last replica, remove all metadata
+        current_zookeeper->tryRemoveRecursive(zookeeper_path);
+    }
+}
+
+void DatabaseReplicated::stopReplication()
+{
+    if (ddl_worker)
+        ddl_worker->shutdown();
+}
+
+void DatabaseReplicated::shutdown()
+{
+    stopReplication();
+    ddl_worker = nullptr;
+    DatabaseAtomic::shutdown();
+}
+
+
+void DatabaseReplicated::dropTable(const Context & context, const String & table_name, bool no_delay)
+{
+    auto txn = context.getZooKeeperMetadataTransaction();
+    assert(!ddl_worker->isCurrentlyActive() || txn);
+    if (txn && txn->isInitialQuery())
+    {
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name);
+        txn->addOp(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+    }
+    DatabaseAtomic::dropTable(context, table_name, no_delay);
+}
+
+void DatabaseReplicated::renameTable(const Context & context, const String & table_name, IDatabase & to_database,
+                                     const String & to_table_name, bool exchange, bool dictionary)
+{
+    auto txn = context.getZooKeeperMetadataTransaction();
+    assert(txn);
+
+    if (txn->isInitialQuery())
+    {
+        if (this != &to_database)
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Moving tables between databases is not supported for Replicated engine");
+        if (table_name == to_table_name)
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot rename table to itself");
+        if (!isTableExist(table_name, context))
+            throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_name);
+        if (exchange && !to_database.isTableExist(to_table_name, context))
+            throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", to_table_name);
+
+        String statement = readMetadataFile(table_name);
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name);
+        String metadata_zk_path_to = zookeeper_path + "/metadata/" + escapeForFileName(to_table_name);
+        txn->addOp(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+        if (exchange)
+        {
+            String statement_to = readMetadataFile(to_table_name);
+            txn->addOp(zkutil::makeRemoveRequest(metadata_zk_path_to, -1));
+            txn->addOp(zkutil::makeCreateRequest(metadata_zk_path, statement_to, zkutil::CreateMode::Persistent));
+        }
+        txn->addOp(zkutil::makeCreateRequest(metadata_zk_path_to, statement, zkutil::CreateMode::Persistent));
+    }
+
+    DatabaseAtomic::renameTable(context, table_name, to_database, to_table_name, exchange, dictionary);
+}
+
+void DatabaseReplicated::commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
+                       const String & table_metadata_tmp_path, const String & table_metadata_path,
+                       const Context & query_context)
+{
+    auto txn = query_context.getZooKeeperMetadataTransaction();
+    assert(!ddl_worker->isCurrentlyActive() || txn);
+    if (txn && txn->isInitialQuery())
+    {
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(query.table);
+        String statement = getObjectDefinitionFromCreateQuery(query.clone());
+        /// zk::multi(...) will throw if `metadata_zk_path` exists
+        txn->addOp(zkutil::makeCreateRequest(metadata_zk_path, statement, zkutil::CreateMode::Persistent));
+    }
+    DatabaseAtomic::commitCreateTable(query, table, table_metadata_tmp_path, table_metadata_path, query_context);
+}
+
+void DatabaseReplicated::commitAlterTable(const StorageID & table_id,
+                                          const String & table_metadata_tmp_path, const String & table_metadata_path,
+                                          const String & statement, const Context & query_context)
+{
+    auto txn = query_context.getZooKeeperMetadataTransaction();
+    if (txn && txn->isInitialQuery())
+    {
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_id.table_name);
+        txn->addOp(zkutil::makeSetRequest(metadata_zk_path, statement, -1));
+    }
+    DatabaseAtomic::commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, query_context);
+}
+
+void DatabaseReplicated::createDictionary(const Context & context,
+                                          const String & dictionary_name,
+                                          const ASTPtr & query)
+{
+    auto txn = context.getZooKeeperMetadataTransaction();
+    assert(!ddl_worker->isCurrentlyActive() || txn);
+    if (txn && txn->isInitialQuery())
+    {
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(dictionary_name);
+        String statement = getObjectDefinitionFromCreateQuery(query->clone());
+        txn->addOp(zkutil::makeCreateRequest(metadata_zk_path, statement, zkutil::CreateMode::Persistent));
+    }
+    DatabaseAtomic::createDictionary(context, dictionary_name, query);
+}
+
+void DatabaseReplicated::removeDictionary(const Context & context, const String & dictionary_name)
+{
+    auto txn = context.getZooKeeperMetadataTransaction();
+    assert(!ddl_worker->isCurrentlyActive() || txn);
+    if (txn && txn->isInitialQuery())
+    {
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(dictionary_name);
+        txn->addOp(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+    }
+    DatabaseAtomic::removeDictionary(context, dictionary_name);
+}
+
+void DatabaseReplicated::detachTablePermanently(const Context & context, const String & table_name)
+{
+    auto txn = context.getZooKeeperMetadataTransaction();
+    assert(!ddl_worker->isCurrentlyActive() || txn);
+    if (txn && txn->isInitialQuery())
+    {
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name);
+        txn->addOp(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+    }
+    DatabaseAtomic::detachTablePermanently(context, table_name);
+}
+
+String DatabaseReplicated::readMetadataFile(const String & table_name) const
+{
+    String statement;
+    ReadBufferFromFile in(getObjectMetadataPath(table_name), 4096);
+    readStringUntilEOF(statement, in);
+    return statement;
+}
+
+}
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
new file mode 100644
index 00000000000..fde53cf2c29
--- /dev/null
+++ b/src/Databases/DatabaseReplicated.h
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <Databases/DatabaseAtomic.h>
+#include <Databases/DatabaseReplicatedSettings.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+#include <Core/BackgroundSchedulePool.h>
+#include <DataStreams/BlockIO.h>
+#include <DataStreams/OneBlockInputStream.h>
+#include <Interpreters/Context.h>
+
+
+namespace DB
+{
+
+class DatabaseReplicatedDDLWorker;
+using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
+
+class Cluster;
+using ClusterPtr = std::shared_ptr<Cluster>;
+
+class DatabaseReplicated : public DatabaseAtomic
+{
+public:
+    DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid,
+                       const String & zookeeper_path_, const String & shard_name_, const String & replica_name_,
+                       DatabaseReplicatedSettings db_settings_,
+                       const Context & context);
+
+    ~DatabaseReplicated() override;
+
+    String getEngineName() const override { return "Replicated"; }
+
+    /// If current query is initial, then the following methods add metadata updating ZooKeeper operations to current ZooKeeperMetadataTransaction.
+    void dropTable(const Context &, const String & table_name, bool no_delay) override;
+    void renameTable(const Context & context, const String & table_name, IDatabase & to_database,
+                     const String & to_table_name, bool exchange, bool dictionary) override;
+    void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
+                           const String & table_metadata_tmp_path, const String & table_metadata_path,
+                           const Context & query_context) override;
+    void commitAlterTable(const StorageID & table_id,
+                          const String & table_metadata_tmp_path, const String & table_metadata_path,
+                          const String & statement, const Context & query_context) override;
+    void createDictionary(const Context & context,
+                          const String & dictionary_name,
+                          const ASTPtr & query) override;
+    void removeDictionary(const Context & context, const String & dictionary_name) override;
+    void detachTablePermanently(const Context & context, const String & table_name) override;
+
+    /// Try to execute DLL query on current host as initial query. If query is succeed,
+    /// then it will be executed on all replicas.
+    BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, const Context & query_context);
+
+    void stopReplication();
+
+    String getFullReplicaName() const;
+    static std::pair<String, String> parseFullReplicaName(const String & name);
+
+    /// Returns cluster consisting of database replicas
+    ClusterPtr getCluster() const;
+
+    void drop(const Context & /*context*/) override;
+
+    void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach) override;
+    void shutdown() override;
+
+    friend struct DatabaseReplicatedTask;
+    friend class DatabaseReplicatedDDLWorker;
+private:
+    void tryConnectToZooKeeperAndInitDatabase(bool force_attach);
+    bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
+    void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
+
+    void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr);
+    std::map<String, String> tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr);
+
+    ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query);
+    String readMetadataFile(const String & table_name) const;
+
+    String zookeeper_path;
+    String shard_name;
+    String replica_name;
+    String replica_path;
+    DatabaseReplicatedSettings db_settings;
+
+    zkutil::ZooKeeperPtr getZooKeeper() const;
+
+    std::atomic_bool is_readonly = true;
+    std::unique_ptr<DatabaseReplicatedDDLWorker> ddl_worker;
+};
+
+}
diff --git a/src/Databases/DatabaseReplicatedSettings.cpp b/src/Databases/DatabaseReplicatedSettings.cpp
new file mode 100644
index 00000000000..61febcf2810
--- /dev/null
+++ b/src/Databases/DatabaseReplicatedSettings.cpp
@@ -0,0 +1,23 @@
+#include <Databases/DatabaseReplicatedSettings.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTCreateQuery.h>
+
+namespace DB
+{
+
+IMPLEMENT_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)
+
+void DatabaseReplicatedSettings::loadFromQuery(ASTStorage & storage_def)
+{
+    if (storage_def.settings)
+    {
+        applyChanges(storage_def.settings->changes);
+        return;
+    }
+
+    auto settings_ast = std::make_shared<ASTSetQuery>();
+    settings_ast->is_standalone = false;
+    storage_def.set(storage_def.settings, settings_ast);
+}
+
+}
diff --git a/src/Databases/DatabaseReplicatedSettings.h b/src/Databases/DatabaseReplicatedSettings.h
new file mode 100644
index 00000000000..11d5b3820e4
--- /dev/null
+++ b/src/Databases/DatabaseReplicatedSettings.h
@@ -0,0 +1,26 @@
+#pragma once
+#include <Core/Defines.h>
+#include <Core/BaseSettings.h>
+
+namespace DB
+{
+
+class ASTStorage;
+
+#define LIST_OF_DATABASE_REPLICATED_SETTINGS(M) \
+    M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
+    M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
+    M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
+
+DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)
+
+
+/** Settings for the MaterializeMySQL database engine.
+  * Could be loaded from a CREATE DATABASE query (SETTINGS clause).
+  */
+struct DatabaseReplicatedSettings : public BaseSettings<DatabaseReplicatedSettingsTraits>
+{
+    void loadFromQuery(ASTStorage & storage_def);
+};
+
+}
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
new file mode 100644
index 00000000000..e0c5717711c
--- /dev/null
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -0,0 +1,260 @@
+#include <Databases/DatabaseReplicatedWorker.h>
+#include <Databases/DatabaseReplicated.h>
+#include <Interpreters/DDLTask.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int DATABASE_REPLICATION_FAILED;
+    extern const int NOT_A_LEADER;
+    extern const int UNFINISHED;
+}
+
+DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db, const Context & context_)
+    : DDLWorker(/* pool_size */ 1, db->zookeeper_path + "/log", context_, nullptr, {}, fmt::format("DDLWorker({})", db->getDatabaseName()))
+    , database(db)
+{
+    /// Pool size must be 1 to avoid reordering of log entries.
+    /// TODO Make a dependency graph of DDL queries. It will allow to execute independent entries in parallel.
+    /// We also need similar graph to load tables on server startup in order of topsort.
+}
+
+void DatabaseReplicatedDDLWorker::initializeMainThread()
+{
+    while (!stop_flag)
+    {
+        try
+        {
+            auto zookeeper = getAndSetZooKeeper();
+            if (database->is_readonly)
+                database->tryConnectToZooKeeperAndInitDatabase(false);
+            initializeReplication();
+            initialized = true;
+            return;
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, fmt::format("Error on initialization of {}", database->getDatabaseName()));
+            sleepForSeconds(5);
+        }
+    }
+}
+
+void DatabaseReplicatedDDLWorker::shutdown()
+{
+    DDLWorker::shutdown();
+    wait_current_task_change.notify_all();
+}
+
+void DatabaseReplicatedDDLWorker::initializeReplication()
+{
+    /// Check if we need to recover replica.
+    /// Invariant: replica is lost if it's log_ptr value is less then max_log_ptr - logs_to_keep.
+
+    String log_ptr_str = current_zookeeper->get(database->replica_path + "/log_ptr");
+    UInt32 our_log_ptr = parse<UInt32>(log_ptr_str);
+    UInt32 max_log_ptr = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
+    logs_to_keep = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/logs_to_keep"));
+    if (our_log_ptr == 0 || our_log_ptr + logs_to_keep < max_log_ptr)
+        database->recoverLostReplica(current_zookeeper, our_log_ptr, max_log_ptr);
+    else
+        last_skipped_entry_name.emplace(log_ptr_str);
+}
+
+String DatabaseReplicatedDDLWorker::enqueueQuery(DDLLogEntry & entry)
+{
+    auto zookeeper = getAndSetZooKeeper();
+    const String query_path_prefix = queue_dir + "/query-";
+
+    /// We cannot create sequential node and it's ephemeral child in a single transaction, so allocate sequential number another way
+    String counter_prefix = database->zookeeper_path + "/counter/cnt-";
+    String counter_path = zookeeper->create(counter_prefix, "", zkutil::CreateMode::EphemeralSequential);
+    String node_path = query_path_prefix + counter_path.substr(counter_prefix.size());
+
+    Coordination::Requests ops;
+    /// Query is not committed yet, but we have to write it into log to avoid reordering
+    ops.emplace_back(zkutil::makeCreateRequest(node_path, entry.toString(), zkutil::CreateMode::Persistent));
+    /// '/try' will be replaced with '/committed' or will be removed due to expired session or other error
+    ops.emplace_back(zkutil::makeCreateRequest(node_path + "/try", database->getFullReplicaName(), zkutil::CreateMode::Ephemeral));
+    /// We don't need it anymore
+    ops.emplace_back(zkutil::makeRemoveRequest(counter_path, -1));
+    /// Create status dirs
+    ops.emplace_back(zkutil::makeCreateRequest(node_path + "/active", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(node_path + "/finished", "", zkutil::CreateMode::Persistent));
+    zookeeper->multi(ops);
+
+    return node_path;
+}
+
+String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entry, const Context & query_context)
+{
+    /// NOTE Possibly it would be better to execute initial query on the most up-to-date node,
+    /// but it requires more complex logic around /try node.
+
+    auto zookeeper = getAndSetZooKeeper();
+    UInt32 our_log_ptr = parse<UInt32>(zookeeper->get(database->replica_path + "/log_ptr"));
+    UInt32 max_log_ptr = parse<UInt32>(zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
+    assert(our_log_ptr <= max_log_ptr);
+    if (database->db_settings.max_replication_lag_to_enqueue < max_log_ptr - our_log_ptr)
+        throw Exception(ErrorCodes::NOT_A_LEADER, "Cannot enqueue query on this replica, "
+                        "because it has replication lag of {} queries. Try other replica.", max_log_ptr - our_log_ptr);
+
+    String entry_path = enqueueQuery(entry);
+    auto try_node = zkutil::EphemeralNodeHolder::existing(entry_path + "/try", *zookeeper);
+    String entry_name = entry_path.substr(entry_path.rfind('/') + 1);
+    auto task = std::make_unique<DatabaseReplicatedTask>(entry_name, entry_path, database);
+    task->entry = entry;
+    task->parseQueryFromEntry(context);
+    assert(!task->entry.query.empty());
+    assert(!zookeeper->exists(task->getFinishedNodePath()));
+    task->is_initial_query = true;
+
+    LOG_DEBUG(log, "Waiting for worker thread to process all entries before {}", entry_name);
+    UInt64 timeout = query_context.getSettingsRef().database_replicated_initial_query_timeout_sec;
+    {
+        std::unique_lock lock{mutex};
+        bool processed = wait_current_task_change.wait_for(lock, std::chrono::seconds(timeout), [&]()
+        {
+            assert(zookeeper->expired() || current_task <= entry_name);
+            return zookeeper->expired() || current_task == entry_name || stop_flag;
+        });
+
+        if (!processed)
+            throw Exception(ErrorCodes::UNFINISHED, "Timeout: Cannot enqueue query on this replica,"
+                            "most likely because replica is busy with previous queue entries");
+    }
+
+    if (zookeeper->expired() || stop_flag)
+        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "ZooKeeper session expired or replication stopped, try again");
+
+    processTask(*task, zookeeper);
+
+    if (!task->was_executed)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} was executed, but was not committed: code {}: {}",
+                        task->execution_status.code, task->execution_status.message);
+    }
+
+    try_node->setAlreadyRemoved();
+
+    return entry_path;
+}
+
+DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper)
+{
+    {
+        std::lock_guard lock{mutex};
+        if (current_task < entry_name)
+        {
+            current_task = entry_name;
+            wait_current_task_change.notify_all();
+        }
+    }
+
+    UInt32 our_log_ptr = parse<UInt32>(current_zookeeper->get(database->replica_path + "/log_ptr"));
+    UInt32 entry_num = DatabaseReplicatedTask::getLogEntryNumber(entry_name);
+
+    if (entry_num <= our_log_ptr)
+    {
+        out_reason = fmt::format("Task {} already executed according to log pointer {}", entry_name, our_log_ptr);
+        return {};
+    }
+
+    String entry_path = queue_dir + "/" + entry_name;
+    auto task = std::make_unique<DatabaseReplicatedTask>(entry_name, entry_path, database);
+
+    String initiator_name;
+    zkutil::EventPtr wait_committed_or_failed = std::make_shared<Poco::Event>();
+
+    String try_node_path = entry_path + "/try";
+    if (zookeeper->tryGet(try_node_path, initiator_name, nullptr, wait_committed_or_failed))
+    {
+        task->is_initial_query = initiator_name == task->host_id_str;
+
+        /// Query is not committed yet. We cannot just skip it and execute next one, because reordering may break replication.
+        LOG_TRACE(log, "Waiting for initiator {} to commit or rollback entry {}", initiator_name, entry_path);
+        constexpr size_t wait_time_ms = 1000;
+        size_t max_iterations = database->db_settings.wait_entry_commited_timeout_sec;
+        size_t iteration = 0;
+
+        while (!wait_committed_or_failed->tryWait(wait_time_ms))
+        {
+            if (stop_flag)
+            {
+                /// We cannot return task to process and we cannot return nullptr too,
+                /// because nullptr means "task should not be executed".
+                /// We can only exit by exception.
+                throw Exception(ErrorCodes::UNFINISHED, "Replication was stopped");
+            }
+
+            if (max_iterations <= ++iteration)
+            {
+                /// What can we do if initiator hangs for some reason? Seems like we can remove /try node.
+                /// Initiator will fail to commit ZooKeeperMetadataTransaction (including ops for replicated table) if /try does not exist.
+                /// But it's questionable.
+
+                /// We use tryRemove(...) because multiple hosts (including initiator) may try to do it concurrently.
+                auto code = zookeeper->tryRemove(try_node_path);
+                if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE)
+                    throw Coordination::Exception(code, try_node_path);
+
+                if (!zookeeper->exists(entry_path + "/committed"))
+                {
+                    out_reason = fmt::format("Entry {} was forcefully cancelled due to timeout", entry_name);
+                    return {};
+                }
+            }
+        }
+    }
+
+    if (!zookeeper->exists(entry_path + "/committed"))
+    {
+        out_reason = fmt::format("Entry {} hasn't been committed", entry_name);
+        return {};
+    }
+
+    if (task->is_initial_query)
+    {
+        assert(!zookeeper->exists(entry_path + "/try"));
+        assert(zookeeper->exists(entry_path + "/committed") == (zookeeper->get(task->getFinishedNodePath()) == ExecutionStatus(0).serializeText()));
+        out_reason = fmt::format("Entry {} has been executed as initial query", entry_name);
+        return {};
+    }
+
+    String node_data;
+    if (!zookeeper->tryGet(entry_path, node_data))
+    {
+        LOG_ERROR(log, "Cannot get log entry {}", entry_path);
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "should be unreachable");
+    }
+
+    task->entry.parse(node_data);
+
+    if (task->entry.query.empty())
+    {
+        out_reason = fmt::format("Entry {} is a dummy task", entry_name);
+        return {};
+    }
+
+    task->parseQueryFromEntry(context);
+
+    if (zookeeper->exists(task->getFinishedNodePath()))
+    {
+        out_reason = fmt::format("Task {} has been already processed", entry_name);
+        return {};
+    }
+
+    return task;
+}
+
+bool DatabaseReplicatedDDLWorker::canRemoveQueueEntry(const String & entry_name, const Coordination::Stat &)
+{
+    UInt32 entry_number = DDLTaskBase::getLogEntryNumber(entry_name);
+    UInt32 max_log_ptr = parse<UInt32>(getAndSetZooKeeper()->get(database->zookeeper_path + "/max_log_ptr"));
+    return entry_number + logs_to_keep < max_log_ptr;
+}
+
+}
diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h
new file mode 100644
index 00000000000..6ba46a98bca
--- /dev/null
+++ b/src/Databases/DatabaseReplicatedWorker.h
@@ -0,0 +1,46 @@
+#pragma once
+#include <Interpreters/DDLWorker.h>
+
+namespace DB
+{
+
+class DatabaseReplicated;
+
+/// It's similar to DDLWorker, but has the following differences:
+/// 1. DDL queue in ZooKeeper is not shared between multiple clusters and databases,
+///    each DatabaseReplicated has its own queue in ZooKeeper and DatabaseReplicatedDDLWorker object.
+/// 2. Shards and replicas are identified by shard_name and replica_name arguments of database engine,
+///    not by address:port pairs. Cluster (of multiple database replicas) is identified by its zookeeper_path.
+/// 3. After creation of an entry in DDL queue initiator tries to execute the entry locally
+///    and other hosts wait for query to finish on initiator host.
+///    If query succeed on initiator, then all hosts must execute it, so they will retry until query succeed.
+///    We assume that cluster is homogeneous, so if replicas are in consistent state and query succeed on one host,
+///    then all hosts can execute it (maybe after several retries).
+/// 4. Each database replica stores its log pointer in ZooKeeper. Cleanup thread removes old entry
+///    if its number < max_log_ptr - logs_to_keep.
+class DatabaseReplicatedDDLWorker : public DDLWorker
+{
+public:
+    DatabaseReplicatedDDLWorker(DatabaseReplicated * db, const Context & context_);
+
+    String enqueueQuery(DDLLogEntry & entry) override;
+
+    String tryEnqueueAndExecuteEntry(DDLLogEntry & entry, const Context & query_context);
+
+    void shutdown() override;
+
+private:
+    void initializeMainThread() override;
+    void initializeReplication();
+
+    DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper) override;
+    bool canRemoveQueueEntry(const String & entry_name, const Coordination::Stat & stat) override;
+
+    DatabaseReplicated * const database;
+    mutable std::mutex mutex;
+    std::condition_variable wait_current_task_change;
+    String current_task;
+    UInt32 logs_to_keep = std::numeric_limits<UInt32>::max();
+};
+
+}
diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp
index ee16f4ae15e..d92f0f1897e 100644
--- a/src/Databases/DatabaseWithDictionaries.cpp
+++ b/src/Databases/DatabaseWithDictionaries.cpp
@@ -4,6 +4,7 @@
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <Interpreters/ExternalLoaderTempConfigRepository.h>
 #include <Interpreters/ExternalLoaderDatabaseConfigRepository.h>
+#include <Interpreters/DDLTask.h>
 #include <Dictionaries/getDictionaryConfigurationFromAST.h>
 #include <Dictionaries/DictionaryStructure.h>
 #include <Parsers/ASTCreateQuery.h>
@@ -193,6 +194,10 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S
             detachDictionary(dictionary_name);
     });
 
+    auto txn = context.getZooKeeperMetadataTransaction();
+    if (txn && !context.isInternalSubquery())
+        txn->commit();      /// Commit point (a sort of) for Replicated database
+
     /// If it was ATTACH query and file with dictionary metadata already exist
     /// (so, ATTACH is done after DETACH), then rename atomically replaces old file with new one.
     Poco::File(dictionary_metadata_tmp_path).renameTo(dictionary_metadata_path);
@@ -205,7 +210,7 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S
     succeeded = true;
 }
 
-void DatabaseWithDictionaries::removeDictionary(const Context &, const String & dictionary_name)
+void DatabaseWithDictionaries::removeDictionary(const Context & context, const String & dictionary_name)
 {
     DictionaryAttachInfo attach_info;
     detachDictionaryImpl(dictionary_name, attach_info);
@@ -213,6 +218,11 @@ void DatabaseWithDictionaries::removeDictionary(const Context &, const String &
     try
     {
         String dictionary_metadata_path = getObjectMetadataPath(dictionary_name);
+
+        auto txn = context.getZooKeeperMetadataTransaction();
+        if (txn && !context.isInternalSubquery())
+            txn->commit();      /// Commit point (a sort of) for Replicated database
+
         Poco::File(dictionary_metadata_path).remove();
         CurrentStatusInfo::unset(CurrentStatusInfo::DictionaryStatus,
                                  StorageID(attach_info.create_query).getInternalDictionaryName());
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index fc821fcab30..3a196f827b7 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -249,7 +249,7 @@ public:
 
     /// Forget about the table without deleting it's data, but rename metadata file to prevent reloading it
     /// with next restart. The database may not support this method.
-    virtual void detachTablePermanently(const String & /*name*/)
+    virtual void detachTablePermanently(const Context & /*context*/, const String & /*name*/)
     {
         throw Exception("There is no DETACH TABLE PERMANENTLY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp
index 35b016f255b..eeea12ae8f3 100644
--- a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp
+++ b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp
@@ -395,7 +395,7 @@ void DatabaseConnectionMySQL::loadStoredObjects(Context &, bool, bool /*force_at
     }
 }
 
-void DatabaseConnectionMySQL::detachTablePermanently(const String & table_name)
+void DatabaseConnectionMySQL::detachTablePermanently(const Context &, const String & table_name)
 {
     std::lock_guard<std::mutex> lock{mutex};
 
@@ -429,9 +429,9 @@ void DatabaseConnectionMySQL::detachTablePermanently(const String & table_name)
     table_iter->second.second->is_dropped = true;
 }
 
-void DatabaseConnectionMySQL::dropTable(const Context &, const String & table_name, bool /*no_delay*/)
+void DatabaseConnectionMySQL::dropTable(const Context & context, const String & table_name, bool /*no_delay*/)
 {
-    detachTablePermanently(table_name);
+    detachTablePermanently(context, table_name);
 }
 
 DatabaseConnectionMySQL::~DatabaseConnectionMySQL()
diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.h b/src/Databases/MySQL/DatabaseConnectionMySQL.h
index 3e305fcb20d..d0a5c041d7b 100644
--- a/src/Databases/MySQL/DatabaseConnectionMySQL.h
+++ b/src/Databases/MySQL/DatabaseConnectionMySQL.h
@@ -72,9 +72,9 @@ public:
 
     StoragePtr detachTable(const String & table_name) override;
 
-    void detachTablePermanently(const String & table_name) override;
+    void detachTablePermanently(const Context & context, const String & table_name) override;
 
-    void dropTable(const Context &, const String & table_name, bool no_delay) override;
+    void dropTable(const Context & context, const String & table_name, bool no_delay) override;
 
     void attachTable(const String & table_name, const StoragePtr & storage, const String & relative_table_path) override;
 
diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index ec23cfc8794..a6e5ded3efd 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -59,13 +59,13 @@ static DataTypePtr convertPostgreSQLDataType(std::string & type, bool is_nullabl
         uint32_t precision = getDecimalPrecision(*res);
         uint32_t scale = getDecimalScale(*res);
 
-        if (precision <= DecimalUtils::maxPrecision<Decimal32>())
+        if (precision <= DecimalUtils::max_precision<Decimal32>)
             res = std::make_shared<DataTypeDecimal<Decimal32>>(precision, scale);
-        else if (precision <= DecimalUtils::maxPrecision<Decimal64>())
+        else if (precision <= DecimalUtils::max_precision<Decimal64>)
             res = std::make_shared<DataTypeDecimal<Decimal64>>(precision, scale);
-        else if (precision <= DecimalUtils::maxPrecision<Decimal128>())
+        else if (precision <= DecimalUtils::max_precision<Decimal128>)
             res = std::make_shared<DataTypeDecimal<Decimal128>>(precision, scale);
-        else if (precision <= DecimalUtils::maxPrecision<Decimal256>())
+        else if (precision <= DecimalUtils::max_precision<Decimal256>)
             res = std::make_shared<DataTypeDecimal<Decimal256>>(precision, scale);
     }
 
diff --git a/src/Databases/ya.make b/src/Databases/ya.make
index 0dc44386088..8bd3f291a64 100644
--- a/src/Databases/ya.make
+++ b/src/Databases/ya.make
@@ -16,6 +16,9 @@ SRCS(
     DatabaseMemory.cpp
     DatabaseOnDisk.cpp
     DatabaseOrdinary.cpp
+    DatabaseReplicated.cpp
+    DatabaseReplicatedSettings.cpp
+    DatabaseReplicatedWorker.cpp
     DatabaseWithDictionaries.cpp
     DatabasesCommon.cpp
     MySQL/ConnectionMySQLSettings.cpp
diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index 4beb2caa1f1..67bcab109ea 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -13,11 +13,13 @@
 #include <IO/WriteBufferFromOStream.h>
 #include <ext/range.h>
 #include <ext/size.h>
+#include <ext/map.h>
+#include <ext/chrono_io.h>
 #include <Common/setThreadName.h>
-#include "CacheDictionary.inc.h"
+#include <DataTypes/DataTypesDecimal.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
-
+#include <Functions/FunctionHelpers.h>
 
 namespace ProfileEvents
 {
@@ -130,8 +132,8 @@ const IDictionarySource * CacheDictionary::getSource() const
 void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
 {
     const auto null_value = std::get<UInt64>(hierarchical_attribute->null_value);
-
-    getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; });
+    DictionaryDefaultValueExtractor<UInt64> default_value_extractor(null_value);
+    getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, default_value_extractor);
 }
 
 
@@ -249,34 +251,384 @@ void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArra
         out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
 }
 
-void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
+ColumnPtr CacheDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr & result_type,
+    const Columns & key_columns,
+    const DataTypes &,
+    const ColumnPtr default_values_column) const
 {
+    ColumnPtr result;
+
+    PaddedPODArray<Key> backup_storage;
+    const auto & keys = getColumnVectorData(this, key_columns.front(), backup_storage);
+    auto keys_size = keys.size();
+
     auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-    const auto null_value = StringRef{std::get<String>(attribute.null_value)};
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-    getItemsString(attribute, ids, out, [&](const size_t) { return null_value; });
+        const auto & null_value = std::get<AttributeType>(attribute.null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            getItemsString(attribute, keys, column.get(), default_value_extractor);
+        }
+        else
+        {
+            auto & out = column->getData();
+            getItemsNumberImpl<AttributeType, AttributeType>(attribute, keys, out, default_value_extractor);
+        }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
-void CacheDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
+template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
+void CacheDictionary::getItemsNumberImpl(
+    Attribute & attribute,
+    const PaddedPODArray<Key> & ids,
+    ResultArrayType<OutputType> & out,
+    DefaultValueExtractor & default_value_extractor) const
 {
-    auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    /// First fill everything with default values
+    const auto rows = ext::size(ids);
+    for (const auto row : ext::range(0, rows))
+        out[row] = default_value_extractor[row];
 
-    getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
+    /// Maybe there are duplicate keys, so we remember their indices.
+    std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
+
+    auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
+
+    size_t cache_hit = 0;
+    size_t cache_not_found_count = 0;
+    size_t cache_expired_cound = 0;
+
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+
+        auto insert_to_answer_routine = [&](size_t row, size_t idx)
+        {
+            auto & cell = cells[idx];
+            if (!cell.isDefault())
+                out[row] = static_cast<OutputType>(attribute_array[idx]);
+        };
+
+        /// fetch up-to-date values, decide which ones require update
+        for (const auto row : ext::range(0, rows))
+        {
+            const auto id = ids[row];
+
+            /** cell should be updated if either:
+                *    1. ids do not match,
+                *    2. cell has expired,
+                *    3. explicit defaults were specified and cell was set default. */
+
+            const auto [cell_idx, state] = findCellIdxForGet(id, now);
+
+            if (state == ResultState::FoundAndValid)
+            {
+                ++cache_hit;
+                insert_to_answer_routine(row, cell_idx);
+            }
+            else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
+            {
+                ++cache_not_found_count;
+                cache_expired_or_not_found_ids[id].push_back(row);
+            }
+            else if (state == ResultState::FoundButExpired)
+            {
+                cache_expired_cound++;
+                cache_expired_or_not_found_ids[id].push_back(row);
+
+                if (allow_read_expired_keys)
+                    insert_to_answer_routine(row, cell_idx);
+            }
+        }
+    }
+
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
+
+    query_count.fetch_add(rows, std::memory_order_relaxed);
+    hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
+
+    if (!cache_not_found_count)
+    {
+        /// Nothing to update - return
+        if (!cache_expired_cound)
+            return;
+
+        /// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
+        if (allow_read_expired_keys)
+        {
+            std::vector<Key> required_expired_ids;
+            required_expired_ids.reserve(cache_expired_cound);
+            std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
+                           std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
+
+            /// request new values
+            auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
+
+            tryPushToUpdateQueueOrThrow(update_unit_ptr);
+
+            /// Nothing to do - return
+            return;
+        }
+    }
+
+    /// From this point we have to update all keys sync.
+    /// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
+    /// and there no cache_not_found_ids but some cache_expired.
+
+    std::vector<Key> required_ids;
+    required_ids.reserve(cache_not_found_count + cache_expired_cound);
+    std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
+                   std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
+
+    /// Request new values
+    auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
+
+    tryPushToUpdateQueueOrThrow(update_unit_ptr);
+    waitForCurrentUpdateFinish(update_unit_ptr);
+
+    /// Add updated keys to answer.
+
+    const size_t attribute_index = getAttributeIndex(attribute.name);
+
+    for (auto & [key, value] : update_unit_ptr->found_ids)
+    {
+        if (value.found)
+        {
+            for (const size_t row : cache_expired_or_not_found_ids[key])
+                out[row] = std::get<OutputType>(value.values[attribute_index]);
+        }
+    }
 }
 
-void CacheDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
+void CacheDictionary::getItemsString(
+    Attribute & attribute,
+    const PaddedPODArray<Key> & ids,
+    ColumnString * out,
+    DictionaryDefaultValueExtractor<String> & default_value_extractor) const
 {
-    auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    const auto rows = ext::size(ids);
 
-    getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
+    /// Save on some allocations.
+    out->getOffsets().reserve(rows);
+
+    auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
+
+    auto found_outdated_values = false;
+
+    /// Perform optimistic version, fallback to pessimistic if failed.
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+
+        /// Fetch up-to-date values, discard on fail.
+        for (const auto row : ext::range(0, rows))
+        {
+            const auto id = ids[row];
+            const auto [cell_idx, state] = findCellIdxForGet(id, now);
+
+            if (state == ResultState::FoundAndValid)
+            {
+                auto & cell = cells[cell_idx];
+                const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
+                out->insertData(string_ref.data, string_ref.size);
+            }
+            else
+            {
+                found_outdated_values = true;
+                break;
+            }
+        }
+    }
+
+    /// Optimistic code completed successfully.
+    if (!found_outdated_values)
+    {
+        query_count.fetch_add(rows, std::memory_order_relaxed);
+        hit_count.fetch_add(rows, std::memory_order_release);
+        ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
+        return;
+    }
+
+    /// Now onto the pessimistic one, discard possible partial results from the optimistic path.
+    out->getChars().resize_assume_reserved(0);
+    out->getOffsets().resize_assume_reserved(0);
+
+    /// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
+    std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
+    /// we are going to store every string separately
+    std::unordered_map<Key, String> local_cache;
+
+    size_t cache_not_found_count = 0;
+    size_t cache_expired_count = 0;
+
+    size_t total_length = 0;
+    size_t cache_hit = 0;
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+
+        auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
+        {
+            const auto & cell = cells[cell_idx];
+            const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
+
+            /// Do not store default, but count it in total length.
+            if (!cell.isDefault())
+                local_cache[id] = String{string_ref};
+
+            total_length += string_ref.size + 1;
+        };
+
+        for (const auto row : ext::range(0, ids.size()))
+        {
+            const auto id = ids[row];
+            const auto [cell_idx, state] = findCellIdxForGet(id, now);
+
+            if (state == ResultState::FoundAndValid)
+            {
+                ++cache_hit;
+                insert_value_routine(row, id, cell_idx);
+            }
+            else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
+            {
+                ++cache_not_found_count;
+                cache_expired_or_not_found_ids[id].push_back(row);
+            }
+            else if (state == ResultState::FoundButExpired)
+            {
+                ++cache_expired_count;
+                cache_expired_or_not_found_ids[id].push_back(row);
+
+                if (allow_read_expired_keys)
+                    insert_value_routine(row, id, cell_idx);
+            }
+        }
+    }
+
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
+
+    query_count.fetch_add(rows, std::memory_order_relaxed);
+    hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
+
+    /// Async update of expired keys.
+    if (!cache_not_found_count)
+    {
+        if (allow_read_expired_keys && cache_expired_count)
+        {
+            std::vector<Key> required_expired_ids;
+            required_expired_ids.reserve(cache_expired_count);
+            std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
+                           std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
+
+            auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
+
+            tryPushToUpdateQueueOrThrow(update_unit_ptr);
+
+            /// Insert all found keys and defaults to output array.
+            out->getChars().reserve(total_length);
+
+            for (const auto row : ext::range(0, ext::size(ids)))
+            {
+                const auto id = ids[row];
+                StringRef value;
+
+                /// Previously we stored found keys in map.
+                const auto it = local_cache.find(id);
+                if (it != local_cache.end())
+                    value = StringRef(it->second);
+                else
+                    value = default_value_extractor[row];
+
+                out->insertData(value.data, value.size);
+            }
+
+            /// Nothing to do else.
+            return;
+        }
+    }
+
+    /// We will request both cache_not_found_ids and cache_expired_ids sync.
+    std::vector<Key> required_ids;
+    required_ids.reserve(cache_not_found_count + cache_expired_count);
+    std::transform(
+        std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
+        std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
+
+    auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
+
+    tryPushToUpdateQueueOrThrow(update_unit_ptr);
+    waitForCurrentUpdateFinish(update_unit_ptr);
+
+    const size_t attribute_index = getAttributeIndex(attribute.name);
+
+    /// Only calculate the total length.
+    for (auto & [key, value] : update_unit_ptr->found_ids)
+    {
+        if (value.found)
+        {
+            const auto found_value_ref = std::get<String>(value.values[attribute_index]);
+            total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
+        }
+        else
+        {
+            for (const auto row : cache_expired_or_not_found_ids[key])
+                total_length += default_value_extractor[row].size + 1;
+        }
+    }
+
+    out->getChars().reserve(total_length);
+
+    for (const auto row : ext::range(0, ext::size(ids)))
+    {
+        const auto id = ids[row];
+        StringRef value;
+
+        /// We have two maps: found in cache and found in source.
+        const auto local_it = local_cache.find(id);
+        if (local_it != local_cache.end())
+            value = StringRef(local_it->second);
+        else
+        {
+            const auto found_it = update_unit_ptr->found_ids.find(id);
+
+            /// Previously we didn't store defaults in local cache.
+            if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
+                value = std::get<String>(found_it->second.values[attribute_index]);
+            else
+                value = default_value_extractor[row];
+        }
+
+        out->insertData(value.data, value.size);
+    }
 }
 
+
 template<class... Ts>
 struct Overloaded : Ts... {using Ts::operator()...;};
 
@@ -375,8 +727,14 @@ size_t CacheDictionary::findCellIdxForSet(const Key & id) const
     return oldest_id;
 }
 
-void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
+ColumnUInt8::Ptr CacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
+
+    auto result = ColumnUInt8::create(ext::size(ids));
+    auto& out = result->getData();
+
     /// There are three types of ids.
     /// - Valid ids. These ids are presented in local cache and their lifetime is not expired.
     /// - CacheExpired ids. Ids that are in local cache, but their values are rotted (lifetime is expired).
@@ -444,7 +802,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
     {
         /// Nothing to update - return;
         if (!cache_expired_count)
-            return;
+            return result;
 
         if (allow_read_expired_keys)
         {
@@ -458,7 +816,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
 
             tryPushToUpdateQueueOrThrow(update_unit_ptr);
             /// Update is async - no need to wait.
-            return;
+            return result;
         }
     }
 
@@ -483,6 +841,8 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
             for (const auto row : cache_expired_or_not_found_ids[key])
                 out[row] = true;
     }
+
+    return result;
 }
 
 
@@ -707,7 +1067,7 @@ PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
 
 BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
+    using BlockInputStreamType = DictionaryBlockInputStream<Key>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
 }
 
@@ -931,7 +1291,6 @@ void CacheDictionary::update(UpdateUnitPtr & update_unit_ptr)
             BlockInputStreamPtr stream = current_source_ptr->loadIds(update_unit_ptr->requested_ids);
             stream->readPrefix();
 
-
             while (true)
             {
                 Block block = stream->read();
diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h
index b9bd0b7623b..35d38f03cbe 100644
--- a/src/Dictionaries/CacheDictionary.h
+++ b/src/Dictionaries/CacheDictionary.h
@@ -21,6 +21,7 @@
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
+#include "DictionaryHelpers.h"
 
 namespace CurrentMetrics
 {
@@ -119,77 +120,20 @@ public:
 
     std::exception_ptr getLastException() const override;
 
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
+
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_values_column) const override;
+
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
+
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void
-    getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
-        const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
-
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
-
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
 private:
@@ -260,12 +204,18 @@ private:
     /* NOLINTNEXTLINE(readability-convert-member-functions-to-static) */
     Attribute createAttributeWithTypeAndName(const AttributeUnderlyingType type, const String & name, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
     void getItemsNumberImpl(
-        Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
+        Attribute & attribute,
+        const PaddedPODArray<Key> & ids,
+        ResultArrayType<OutputType> & out,
+        DefaultValueExtractor & default_value_extractor) const;
 
-    template <typename DefaultGetter>
-    void getItemsString(Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const;
+    void getItemsString(
+        Attribute & attribute,
+        const PaddedPODArray<Key> & ids,
+        ColumnString * out,
+        DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
 
     PaddedPODArray<Key> getCachedIds() const;
 
@@ -456,5 +406,6 @@ private:
     mutable std::condition_variable is_update_finished;
 
     std::atomic<bool> finished{false};
-    };
+};
+
 }
diff --git a/src/Dictionaries/CacheDictionary.inc.h b/src/Dictionaries/CacheDictionary.inc.h
deleted file mode 100644
index 803b3b2566f..00000000000
--- a/src/Dictionaries/CacheDictionary.inc.h
+++ /dev/null
@@ -1,368 +0,0 @@
-#pragma once
-
-#include <stdexcept>
-
-#include "CacheDictionary.h"
-#include <Columns/ColumnsNumber.h>
-#include <Common/ProfilingScopedRWLock.h>
-#include <Common/typeid_cast.h>
-#include <DataStreams/IBlockInputStream.h>
-#include <ext/chrono_io.h>
-#include <ext/map.h>
-#include <ext/range.h>
-#include <ext/size.h>
-
-
-namespace ProfileEvents
-{
-extern const Event DictCacheKeysRequested;
-extern const Event DictCacheKeysRequestedMiss;
-extern const Event DictCacheKeysRequestedFound;
-extern const Event DictCacheKeysExpired;
-extern const Event DictCacheKeysNotFound;
-extern const Event DictCacheKeysHit;
-extern const Event DictCacheRequestTimeNs;
-extern const Event DictCacheRequests;
-extern const Event DictCacheLockWriteNs;
-extern const Event DictCacheLockReadNs;
-}
-
-namespace CurrentMetrics
-{
-extern const Metric DictCacheRequests;
-}
-
-namespace DB
-{
-namespace ErrorCodes
-{
-}
-
-template <typename AttributeType, typename OutputType, typename DefaultGetter>
-void CacheDictionary::getItemsNumberImpl(
-    Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
-{
-    /// First fill everything with default values
-    const auto rows = ext::size(ids);
-    for (const auto row : ext::range(0, rows))
-        out[row] = get_default(row);
-
-    /// Maybe there are duplicate keys, so we remember their indices.
-    std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
-
-    auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
-
-    size_t cache_hit = 0;
-    size_t cache_not_found_count = 0;
-    size_t cache_expired_cound = 0;
-
-    {
-        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-        const auto now = std::chrono::system_clock::now();
-
-        auto insert_to_answer_routine = [&](size_t row, size_t idx)
-        {
-            auto & cell = cells[idx];
-            if (!cell.isDefault())
-                out[row] = static_cast<OutputType>(attribute_array[idx]);
-        };
-
-        /// fetch up-to-date values, decide which ones require update
-        for (const auto row : ext::range(0, rows))
-        {
-            const auto id = ids[row];
-
-            /** cell should be updated if either:
-                *    1. ids do not match,
-                *    2. cell has expired,
-                *    3. explicit defaults were specified and cell was set default. */
-
-            const auto [cell_idx, state] = findCellIdxForGet(id, now);
-
-            if (state == ResultState::FoundAndValid)
-            {
-                ++cache_hit;
-                insert_to_answer_routine(row, cell_idx);
-            }
-            else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
-            {
-                ++cache_not_found_count;
-                cache_expired_or_not_found_ids[id].push_back(row);
-            }
-            else if (state == ResultState::FoundButExpired)
-            {
-                cache_expired_cound++;
-                cache_expired_or_not_found_ids[id].push_back(row);
-
-                if (allow_read_expired_keys)
-                    insert_to_answer_routine(row, cell_idx);
-            }
-        }
-    }
-
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-    hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
-
-    if (!cache_not_found_count)
-    {
-        /// Nothing to update - return
-        if (!cache_expired_cound)
-            return;
-
-        /// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
-        if (allow_read_expired_keys)
-        {
-            std::vector<Key> required_expired_ids;
-            required_expired_ids.reserve(cache_expired_cound);
-            std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
-                           std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
-
-            /// request new values
-            auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
-
-            tryPushToUpdateQueueOrThrow(update_unit_ptr);
-
-            /// Nothing to do - return
-            return;
-        }
-    }
-
-    /// From this point we have to update all keys sync.
-    /// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
-    /// and there no cache_not_found_ids but some cache_expired.
-
-    std::vector<Key> required_ids;
-    required_ids.reserve(cache_not_found_count + cache_expired_cound);
-    std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
-                   std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
-
-    /// Request new values
-    auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
-
-    tryPushToUpdateQueueOrThrow(update_unit_ptr);
-    waitForCurrentUpdateFinish(update_unit_ptr);
-
-    /// Add updated keys to answer.
-
-    const size_t attribute_index = getAttributeIndex(attribute.name);
-
-    for (auto & [key, value] : update_unit_ptr->found_ids)
-    {
-        if (value.found)
-        {
-            for (const size_t row : cache_expired_or_not_found_ids[key])
-                out[row] = std::get<OutputType>(value.values[attribute_index]);
-        }
-    }
-}
-
-template <typename DefaultGetter>
-void CacheDictionary::getItemsString(
-    Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const
-{
-    const auto rows = ext::size(ids);
-
-    /// Save on some allocations.
-    out->getOffsets().reserve(rows);
-
-    auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
-
-    auto found_outdated_values = false;
-
-    /// Perform optimistic version, fallback to pessimistic if failed.
-    {
-        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-        const auto now = std::chrono::system_clock::now();
-
-        /// Fetch up-to-date values, discard on fail.
-        for (const auto row : ext::range(0, rows))
-        {
-            const auto id = ids[row];
-            const auto [cell_idx, state] = findCellIdxForGet(id, now);
-
-            if (state == ResultState::FoundAndValid)
-            {
-                auto & cell = cells[cell_idx];
-                const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
-                out->insertData(string_ref.data, string_ref.size);
-            }
-            else
-            {
-                found_outdated_values = true;
-                break;
-            }
-        }
-    }
-
-    /// Optimistic code completed successfully.
-    if (!found_outdated_values)
-    {
-        query_count.fetch_add(rows, std::memory_order_relaxed);
-        hit_count.fetch_add(rows, std::memory_order_release);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
-        return;
-    }
-
-    /// Now onto the pessimistic one, discard possible partial results from the optimistic path.
-    out->getChars().resize_assume_reserved(0);
-    out->getOffsets().resize_assume_reserved(0);
-
-    /// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
-    std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
-    /// we are going to store every string separately
-    std::unordered_map<Key, String> local_cache;
-
-    size_t cache_not_found_count = 0;
-    size_t cache_expired_count = 0;
-
-    size_t total_length = 0;
-    size_t cache_hit = 0;
-    {
-        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-        const auto now = std::chrono::system_clock::now();
-
-        auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
-        {
-            const auto & cell = cells[cell_idx];
-            const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
-
-            /// Do not store default, but count it in total length.
-            if (!cell.isDefault())
-                local_cache[id] = String{string_ref};
-
-            total_length += string_ref.size + 1;
-        };
-
-        for (const auto row : ext::range(0, ids.size()))
-        {
-            const auto id = ids[row];
-            const auto [cell_idx, state] = findCellIdxForGet(id, now);
-
-            if (state == ResultState::FoundAndValid)
-            {
-                ++cache_hit;
-                insert_value_routine(row, id, cell_idx);
-            }
-            else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
-            {
-                ++cache_not_found_count;
-                cache_expired_or_not_found_ids[id].push_back(row);
-            }
-            else if (state == ResultState::FoundButExpired)
-            {
-                ++cache_expired_count;
-                cache_expired_or_not_found_ids[id].push_back(row);
-
-                if (allow_read_expired_keys)
-                    insert_value_routine(row, id, cell_idx);
-            }
-        }
-    }
-
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-    hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
-
-    /// Async update of expired keys.
-    if (!cache_not_found_count)
-    {
-        if (allow_read_expired_keys && cache_expired_count)
-        {
-            std::vector<Key> required_expired_ids;
-            required_expired_ids.reserve(cache_expired_count);
-            std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
-                           std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
-
-            auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
-
-            tryPushToUpdateQueueOrThrow(update_unit_ptr);
-
-            /// Insert all found keys and defaults to output array.
-            out->getChars().reserve(total_length);
-
-            for (const auto row : ext::range(0, ext::size(ids)))
-            {
-                const auto id = ids[row];
-                StringRef value;
-
-                /// Previously we stored found keys in map.
-                const auto it = local_cache.find(id);
-                if (it != local_cache.end())
-                    value = StringRef(it->second);
-                else
-                    value = get_default(row);
-
-                out->insertData(value.data, value.size);
-            }
-
-            /// Nothing to do else.
-            return;
-        }
-    }
-
-    /// We will request both cache_not_found_ids and cache_expired_ids sync.
-    std::vector<Key> required_ids;
-    required_ids.reserve(cache_not_found_count + cache_expired_count);
-    std::transform(
-        std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
-        std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
-
-    auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
-
-    tryPushToUpdateQueueOrThrow(update_unit_ptr);
-    waitForCurrentUpdateFinish(update_unit_ptr);
-
-    const size_t attribute_index = getAttributeIndex(attribute.name);
-
-    /// Only calculate the total length.
-    for (auto & [key, value] : update_unit_ptr->found_ids)
-    {
-        if (value.found)
-        {
-            const auto found_value_ref = std::get<String>(value.values[attribute_index]);
-            total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
-        }
-        else
-        {
-            for (const auto row : cache_expired_or_not_found_ids[key])
-                total_length += get_default(row).size + 1;
-        }
-    }
-
-    out->getChars().reserve(total_length);
-
-    for (const auto row : ext::range(0, ext::size(ids)))
-    {
-        const auto id = ids[row];
-        StringRef value;
-
-        /// We have two maps: found in cache and found in source.
-        const auto local_it = local_cache.find(id);
-        if (local_it != local_cache.end())
-            value = StringRef(local_it->second);
-        else
-        {
-            const auto found_it = update_unit_ptr->found_ids.find(id);
-
-            /// Previously we didn't store defaults in local cache.
-            if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
-                value = std::get<String>(found_it->second.values[attribute_index]);
-            else
-                value = get_default(row);
-        }
-
-        out->insertData(value.data, value.size);
-    }
-}
-
-}
diff --git a/src/Dictionaries/CacheDictionary_generate1.cpp b/src/Dictionaries/CacheDictionary_generate1.cpp
deleted file mode 100644
index 2c6742b3a8c..00000000000
--- a/src/Dictionaries/CacheDictionary_generate1.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <Dictionaries/CacheDictionary.h>
-#include <Dictionaries/CacheDictionary.inc.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
-        const \
-    { \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        const auto null_value = std::get<TYPE>(attribute.null_value); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return null_value; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}
diff --git a/src/Dictionaries/CacheDictionary_generate2.cpp b/src/Dictionaries/CacheDictionary_generate2.cpp
deleted file mode 100644
index be28a6302c2..00000000000
--- a/src/Dictionaries/CacheDictionary_generate2.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <Dictionaries/CacheDictionary.h>
-#include <Dictionaries/CacheDictionary.inc.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void CacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t row) { return def[row]; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}
diff --git a/src/Dictionaries/CacheDictionary_generate3.cpp b/src/Dictionaries/CacheDictionary_generate3.cpp
deleted file mode 100644
index 36195f166db..00000000000
--- a/src/Dictionaries/CacheDictionary_generate3.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-#include <Dictionaries/CacheDictionary.h>
-#include <Dictionaries/CacheDictionary.inc.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void CacheDictionary::get##TYPE( \
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
-    { \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return def; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index 0c517699272..cbb57f81793 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -10,7 +10,8 @@
 #include <ext/range.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
-
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 namespace ProfileEvents
 {
@@ -70,48 +71,50 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(
     createAttributes();
 }
 
-
-void ComplexKeyCacheDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
-{
-    dict_struct.validateKeyTypes(key_types);
-
-    auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    const auto null_value = StringRef{std::get<String>(attribute.null_values)};
-
-    getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; });
-}
-
-void ComplexKeyCacheDictionary::getString(
+ColumnPtr ComplexKeyCacheDictionary::getColumn(
     const std::string & attribute_name,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnString * const def,
-    ColumnString * const out) const
+    const ColumnPtr default_values_column) const
 {
     dict_struct.validateKeyTypes(key_types);
 
-    auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); });
-}
-
-void ComplexKeyCacheDictionary::getString(
-    const std::string & attribute_name,
-    const Columns & key_columns,
-    const DataTypes & key_types,
-    const String & def,
-    ColumnString * const out) const
-{
-    dict_struct.validateKeyTypes(key_types);
+    ColumnPtr result;
 
     auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-    getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
+    auto keys_size = key_columns.front()->size();
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+
+        const auto & null_value = std::get<AttributeType>(attribute.null_values);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto * out = column.get();
+            getItemsString(attribute, key_columns, out, default_value_extractor);
+        }
+        else
+        {
+            auto & out = column->getData();
+            getItemsNumberImpl<AttributeType, AttributeType>(attribute, key_columns, out, default_value_extractor);
+        }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
 /// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag,
@@ -158,15 +161,21 @@ ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata
     return {oldest_id, false, false};
 }
 
-void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
+ColumnUInt8::Ptr ComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
 {
     dict_struct.validateKeyTypes(key_types);
 
+    const auto rows_num = key_columns.front()->size();
+
+    auto result = ColumnUInt8::create(rows_num);
+    auto& out = result->getData();
+
+    for (const auto row : ext::range(0, rows_num))
+        out[row] = false;
+
     /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
     MapType<std::vector<size_t>> outdated_keys;
 
-
-    const auto rows_num = key_columns.front()->size();
     const auto keys_size = dict_struct.key->size();
     StringRefs keys(keys_size);
     Arena temporary_keys_pool;
@@ -212,7 +221,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
     hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
 
     if (outdated_keys.empty())
-        return;
+        return result;
 
     std::vector<size_t> required_rows(outdated_keys.size());
     std::transform(
@@ -233,8 +242,395 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
             for (const auto out_idx : outdated_keys[key])
                 out[out_idx] = false;
         });
+
+    return result;
 }
 
+
+template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
+void ComplexKeyCacheDictionary::getItemsNumberImpl(
+    Attribute & attribute,
+    const Columns & key_columns,
+    PaddedPODArray<OutputType> & out,
+    DefaultValueExtractor & default_value_extractor) const
+{
+    /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
+    MapType<std::vector<size_t>> outdated_keys;
+    auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
+
+    const auto rows_num = key_columns.front()->size();
+    const auto keys_size = dict_struct.key->size();
+    StringRefs keys(keys_size);
+    Arena temporary_keys_pool;
+    PODArray<StringRef> keys_array(rows_num);
+
+    size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+        /// fetch up-to-date values, decide which ones require update
+        for (const auto row : ext::range(0, rows_num))
+        {
+            const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
+            keys_array[row] = key;
+            const auto find_result = findCellIdx(key, now);
+
+            /** cell should be updated if either:
+                *    1. keys (or hash) do not match,
+                *    2. cell has expired,
+                *    3. explicit defaults were specified and cell was set default. */
+
+            if (!find_result.valid)
+            {
+                outdated_keys[key].push_back(row);
+                if (find_result.outdated)
+                    ++cache_expired;
+                else
+                    ++cache_not_found;
+            }
+            else
+            {
+                ++cache_hit;
+                const auto & cell_idx = find_result.cell_idx;
+                const auto & cell = cells[cell_idx];
+                out[row] = cell.isDefault() ? default_value_extractor[row] : static_cast<OutputType>(attribute_array[cell_idx]);
+            }
+        }
+    }
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
+    query_count.fetch_add(rows_num, std::memory_order_relaxed);
+    hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
+
+    if (outdated_keys.empty())
+        return;
+
+    std::vector<size_t> required_rows(outdated_keys.size());
+    std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
+    {
+        return pair.getMapped().front();
+    });
+
+    /// request new values
+    update(
+        key_columns,
+        keys_array,
+        required_rows,
+        [&](const StringRef key, const size_t cell_idx)
+        {
+            for (const auto row : outdated_keys[key])
+                out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
+        },
+        [&](const StringRef key, const size_t)
+        {
+            for (const auto row : outdated_keys[key])
+                out[row] = default_value_extractor[row];
+        });
+}
+
+void ComplexKeyCacheDictionary::getItemsString(
+    Attribute & attribute,
+    const Columns & key_columns,
+    ColumnString * out,
+    DictionaryDefaultValueExtractor<String> & default_value_extractor) const
+{
+    const auto rows_num = key_columns.front()->size();
+    /// save on some allocations
+    out->getOffsets().reserve(rows_num);
+
+    const auto keys_size = dict_struct.key->size();
+    StringRefs keys(keys_size);
+    Arena temporary_keys_pool;
+
+    auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
+
+    auto found_outdated_values = false;
+
+    /// perform optimistic version, fallback to pessimistic if failed
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+        /// fetch up-to-date values, discard on fail
+        for (const auto row : ext::range(0, rows_num))
+        {
+            const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
+            SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
+            const auto find_result = findCellIdx(key, now);
+
+            if (!find_result.valid)
+            {
+                found_outdated_values = true;
+                break;
+            }
+            else
+            {
+                const auto & cell_idx = find_result.cell_idx;
+                const auto & cell = cells[cell_idx];
+                const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
+                out->insertData(string_ref.data, string_ref.size);
+            }
+        }
+    }
+
+    /// optimistic code completed successfully
+    if (!found_outdated_values)
+    {
+        query_count.fetch_add(rows_num, std::memory_order_relaxed);
+        hit_count.fetch_add(rows_num, std::memory_order_release);
+        return;
+    }
+
+    /// now onto the pessimistic one, discard possible partial results from the optimistic path
+    out->getChars().resize_assume_reserved(0);
+    out->getOffsets().resize_assume_reserved(0);
+
+    /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
+    MapType<std::vector<size_t>> outdated_keys;
+    /// we are going to store every string separately
+    MapType<StringRef> map;
+    PODArray<StringRef> keys_array(rows_num);
+
+    size_t total_length = 0;
+    size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+        for (const auto row : ext::range(0, rows_num))
+        {
+            const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
+            keys_array[row] = key;
+            const auto find_result = findCellIdx(key, now);
+
+            if (!find_result.valid)
+            {
+                outdated_keys[key].push_back(row);
+                if (find_result.outdated)
+                    ++cache_expired;
+                else
+                    ++cache_not_found;
+            }
+            else
+            {
+                ++cache_hit;
+                const auto & cell_idx = find_result.cell_idx;
+                const auto & cell = cells[cell_idx];
+                const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
+
+                if (!cell.isDefault())
+                    map[key] = copyIntoArena(string_ref, temporary_keys_pool);
+
+                total_length += string_ref.size + 1;
+            }
+        }
+    }
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
+
+    query_count.fetch_add(rows_num, std::memory_order_relaxed);
+    hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
+
+    /// request new values
+    if (!outdated_keys.empty())
+    {
+        std::vector<size_t> required_rows(outdated_keys.size());
+        std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
+        {
+            return pair.getMapped().front();
+        });
+
+        update(
+            key_columns,
+            keys_array,
+            required_rows,
+            [&](const StringRef key, const size_t cell_idx)
+            {
+                const StringRef attribute_value = attribute_array[cell_idx];
+
+                /// We must copy key and value to own memory, because it may be replaced with another
+                ///  in next iterations of inner loop of update.
+                const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
+                const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
+
+                map[copied_key] = copied_value;
+                total_length += (attribute_value.size + 1) * outdated_keys[key].size();
+            },
+            [&](const StringRef key, const size_t)
+            {
+                for (const auto row : outdated_keys[key])
+                    total_length += default_value_extractor[row].size + 1;
+            });
+    }
+
+    out->getChars().reserve(total_length);
+
+    for (const auto row : ext::range(0, ext::size(keys_array)))
+    {
+        const StringRef key = keys_array[row];
+        auto * const it = map.find(key);
+        const auto string_ref = it ? it->getMapped() : default_value_extractor[row];
+        out->insertData(string_ref.data, string_ref.size);
+    }
+}
+
+template <typename PresentKeyHandler, typename AbsentKeyHandler>
+void ComplexKeyCacheDictionary::update(
+    const Columns & in_key_columns,
+    const PODArray<StringRef> & in_keys,
+    const std::vector<size_t> & in_requested_rows,
+    PresentKeyHandler && on_cell_updated,
+    AbsentKeyHandler && on_key_not_found) const
+{
+    MapType<bool> remaining_keys{in_requested_rows.size()};
+    for (const auto row : in_requested_rows)
+        remaining_keys.insert({in_keys[row], false});
+
+    std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
+
+    const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
+    {
+        Stopwatch watch;
+        auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
+        stream->readPrefix();
+
+        const auto keys_size = dict_struct.key->size();
+        StringRefs keys(keys_size);
+
+        const auto attributes_size = attributes.size();
+        const auto now = std::chrono::system_clock::now();
+
+        while (const auto block = stream->read())
+        {
+            /// cache column pointers
+            const auto key_columns = ext::map<Columns>(
+                ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
+
+            const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
+            {
+                return block.safeGetByPosition(keys_size + attribute_idx).column;
+            });
+
+            const auto rows_num = block.rows();
+
+            for (const auto row : ext::range(0, rows_num))
+            {
+                auto key = allocKey(row, key_columns, keys);
+                const auto hash = StringRefHash{}(key);
+                const auto find_result = findCellIdx(key, now, hash);
+                const auto & cell_idx = find_result.cell_idx;
+                auto & cell = cells[cell_idx];
+
+                for (const auto attribute_idx : ext::range(0, attributes.size()))
+                {
+                    const auto & attribute_column = *attribute_columns[attribute_idx];
+                    auto & attribute = attributes[attribute_idx];
+
+                    setAttributeValue(attribute, cell_idx, attribute_column[row]);
+                }
+
+                /// if cell id is zero and zero does not map to this cell, then the cell is unused
+                if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
+                    element_count.fetch_add(1, std::memory_order_relaxed);
+
+                /// handle memory allocated for old key
+                if (key == cell.key)
+                {
+                    freeKey(key);
+                    key = cell.key;
+                }
+                else
+                {
+                    /// new key is different from the old one
+                    if (cell.key.data)
+                        freeKey(cell.key);
+
+                    cell.key = key;
+                }
+
+                cell.hash = hash;
+
+                if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
+                    cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
+                else
+                    cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
+
+                /// inform caller
+                on_cell_updated(key, cell_idx);
+                /// mark corresponding id as found
+                remaining_keys[key] = true;
+            }
+        }
+
+        stream->readSuffix();
+
+        ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
+        ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
+    }
+
+    size_t found_num = 0;
+    size_t not_found_num = 0;
+
+    const auto now = std::chrono::system_clock::now();
+
+    /// Check which ids have not been found and require setting null_value
+    for (const auto & key_found_pair : remaining_keys)
+    {
+        if (key_found_pair.getMapped())
+        {
+            ++found_num;
+            continue;
+        }
+
+        ++not_found_num;
+
+        auto key = key_found_pair.getKey();
+        const auto hash = StringRefHash{}(key);
+        const auto find_result = findCellIdx(key, now, hash);
+        const auto & cell_idx = find_result.cell_idx;
+        auto & cell = cells[cell_idx];
+
+        /// Set null_value for each attribute
+        for (auto & attribute : attributes)
+            setDefaultAttributeValue(attribute, cell_idx);
+
+        /// Check if cell had not been occupied before and increment element counter if it hadn't
+        if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
+            element_count.fetch_add(1, std::memory_order_relaxed);
+
+        if (key == cell.key)
+            key = cell.key;
+        else
+        {
+            if (cell.key.data)
+                freeKey(cell.key);
+
+            /// copy key from temporary pool
+            key = copyKey(key);
+            cell.key = key;
+        }
+
+        cell.hash = hash;
+
+        if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
+            cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
+        else
+            cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
+
+        cell.setDefault();
+
+        /// inform caller that the cell has not been found
+        on_key_not_found(key, cell_idx);
+    }
+
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
+}
+
+
 void ComplexKeyCacheDictionary::createAttributes()
 {
     const auto attributes_size = dict_struct.attributes.size();
@@ -263,6 +659,102 @@ ComplexKeyCacheDictionary::Attribute & ComplexKeyCacheDictionary::getAttribute(c
     return attributes[it->second];
 }
 
+void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
+{
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            const auto & null_value_ref = std::get<String>(attribute.null_values);
+            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
+
+            if (string_ref.data != null_value_ref.data())
+            {
+                if (string_ref.data)
+                    string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
+
+                string_ref = StringRef{null_value_ref};
+            }
+        }
+        else
+        {
+            std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = std::get<AttributeType>(attribute.null_values);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+}
+
+ComplexKeyCacheDictionary::Attribute
+ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
+{
+    Attribute attr{type, {}, {}};
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            attr.null_values = null_value.get<String>();
+            attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
+            bytes_allocated += size * sizeof(StringRef);
+            if (!string_arena)
+                string_arena = std::make_unique<ArenaWithFreeLists>();
+        }
+        else
+        {
+            attr.null_values = AttributeType(null_value.get<NearestFieldType<AttributeType>>()); /* NOLINT */
+            attr.arrays = std::make_unique<ContainerType<AttributeType>>(size); /* NOLINT */
+            bytes_allocated += size * sizeof(AttributeType);
+        }
+    };
+
+    callOnDictionaryAttributeType(type, type_call);
+
+    return attr;
+}
+
+void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
+{
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            const auto & string = value.get<String>();
+            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
+            const auto & null_value_ref = std::get<String>(attribute.null_values);
+
+            /// free memory unless it points to a null_value
+            if (string_ref.data && string_ref.data != null_value_ref.data())
+                string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
+
+            const auto str_size = string.size();
+            if (str_size != 0)
+            {
+                auto * str_ptr = string_arena->alloc(str_size);
+                std::copy(string.data(), string.data() + str_size, str_ptr);
+                string_ref = StringRef{str_ptr, str_size};
+            }
+            else
+                string_ref = {};
+        }
+        else
+        {
+            std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = value.get<NearestFieldType<AttributeType>>();
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+}
+
 StringRef ComplexKeyCacheDictionary::allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) const
 {
     if (key_size_is_fixed)
@@ -388,7 +880,7 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names &
                 keys.push_back(cells[idx].key);
     }
 
-    using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyCacheDictionary, UInt64>;
+    using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, keys, column_names);
 }
 
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.h b/src/Dictionaries/ComplexKeyCacheDictionary.h
index 2663fee266d..f5643fc799c 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.h
@@ -23,7 +23,7 @@
 #include "IDictionary.h"
 #include "IDictionarySource.h"
 #include <DataStreams/IBlockInputStream.h>
-
+#include "DictionaryHelpers.h"
 
 namespace ProfileEvents
 {
@@ -89,93 +89,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
 
-/// In all functions below, key_columns must be full (non-constant) columns.
-/// See the requirement in IDataType.h for text-serialization functions.
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnString * const def,
-        ColumnString * const out) const;
+        const ColumnPtr default_values_column) const override;
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes & key_types,
-        const String & def,
-        ColumnString * const out) const;
-
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -252,227 +175,18 @@ private:
 
     Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
     void getItemsNumberImpl(
-        Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
-    {
-        /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
-        MapType<std::vector<size_t>> outdated_keys;
-        auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
+        Attribute & attribute,
+        const Columns & key_columns,
+        PaddedPODArray<OutputType> & out,
+        DefaultValueExtractor & default_value_extractor) const;
 
-        const auto rows_num = key_columns.front()->size();
-        const auto keys_size = dict_struct.key->size();
-        StringRefs keys(keys_size);
-        Arena temporary_keys_pool;
-        PODArray<StringRef> keys_array(rows_num);
-
-        size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
-        {
-            const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-            const auto now = std::chrono::system_clock::now();
-            /// fetch up-to-date values, decide which ones require update
-            for (const auto row : ext::range(0, rows_num))
-            {
-                const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
-                keys_array[row] = key;
-                const auto find_result = findCellIdx(key, now);
-
-                /** cell should be updated if either:
-                *    1. keys (or hash) do not match,
-                *    2. cell has expired,
-                *    3. explicit defaults were specified and cell was set default. */
-
-                if (!find_result.valid)
-                {
-                    outdated_keys[key].push_back(row);
-                    if (find_result.outdated)
-                        ++cache_expired;
-                    else
-                        ++cache_not_found;
-                }
-                else
-                {
-                    ++cache_hit;
-                    const auto & cell_idx = find_result.cell_idx;
-                    const auto & cell = cells[cell_idx];
-                    out[row] = cell.isDefault() ? get_default(row) : static_cast<OutputType>(attribute_array[cell_idx]);
-                }
-            }
-        }
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
-        query_count.fetch_add(rows_num, std::memory_order_relaxed);
-        hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
-
-        if (outdated_keys.empty())
-            return;
-
-        std::vector<size_t> required_rows(outdated_keys.size());
-        std::transform(
-            std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
-
-        /// request new values
-        update(
-            key_columns,
-            keys_array,
-            required_rows,
-            [&](const StringRef key, const size_t cell_idx)
-            {
-                for (const auto row : outdated_keys[key])
-                    out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
-            },
-            [&](const StringRef key, const size_t)
-            {
-                for (const auto row : outdated_keys[key])
-                    out[row] = get_default(row);
-            });
-    }
-
-    template <typename DefaultGetter>
-    void getItemsString(Attribute & attribute, const Columns & key_columns, ColumnString * out, DefaultGetter && get_default) const
-    {
-        const auto rows_num = key_columns.front()->size();
-        /// save on some allocations
-        out->getOffsets().reserve(rows_num);
-
-        const auto keys_size = dict_struct.key->size();
-        StringRefs keys(keys_size);
-        Arena temporary_keys_pool;
-
-        auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
-
-        auto found_outdated_values = false;
-
-        /// perform optimistic version, fallback to pessimistic if failed
-        {
-            const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-            const auto now = std::chrono::system_clock::now();
-            /// fetch up-to-date values, discard on fail
-            for (const auto row : ext::range(0, rows_num))
-            {
-                const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
-                SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
-                const auto find_result = findCellIdx(key, now);
-
-                if (!find_result.valid)
-                {
-                    found_outdated_values = true;
-                    break;
-                }
-                else
-                {
-                    const auto & cell_idx = find_result.cell_idx;
-                    const auto & cell = cells[cell_idx];
-                    const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
-                    out->insertData(string_ref.data, string_ref.size);
-                }
-            }
-        }
-
-        /// optimistic code completed successfully
-        if (!found_outdated_values)
-        {
-            query_count.fetch_add(rows_num, std::memory_order_relaxed);
-            hit_count.fetch_add(rows_num, std::memory_order_release);
-            return;
-        }
-
-        /// now onto the pessimistic one, discard possible partial results from the optimistic path
-        out->getChars().resize_assume_reserved(0);
-        out->getOffsets().resize_assume_reserved(0);
-
-        /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
-        MapType<std::vector<size_t>> outdated_keys;
-        /// we are going to store every string separately
-        MapType<StringRef> map;
-        PODArray<StringRef> keys_array(rows_num);
-
-        size_t total_length = 0;
-        size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
-        {
-            const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-            const auto now = std::chrono::system_clock::now();
-            for (const auto row : ext::range(0, rows_num))
-            {
-                const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
-                keys_array[row] = key;
-                const auto find_result = findCellIdx(key, now);
-
-                if (!find_result.valid)
-                {
-                    outdated_keys[key].push_back(row);
-                    if (find_result.outdated)
-                        ++cache_expired;
-                    else
-                        ++cache_not_found;
-                }
-                else
-                {
-                    ++cache_hit;
-                    const auto & cell_idx = find_result.cell_idx;
-                    const auto & cell = cells[cell_idx];
-                    const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
-
-                    if (!cell.isDefault())
-                        map[key] = copyIntoArena(string_ref, temporary_keys_pool);
-
-                    total_length += string_ref.size + 1;
-                }
-            }
-        }
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
-
-        query_count.fetch_add(rows_num, std::memory_order_relaxed);
-        hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
-
-        /// request new values
-        if (!outdated_keys.empty())
-        {
-            std::vector<size_t> required_rows(outdated_keys.size());
-            std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
-            {
-                return pair.getMapped().front();
-            });
-
-            update(
-                key_columns,
-                keys_array,
-                required_rows,
-                [&](const StringRef key, const size_t cell_idx)
-                {
-                    const StringRef attribute_value = attribute_array[cell_idx];
-
-                    /// We must copy key and value to own memory, because it may be replaced with another
-                    ///  in next iterations of inner loop of update.
-                    const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
-                    const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
-
-                    map[copied_key] = copied_value;
-                    total_length += (attribute_value.size + 1) * outdated_keys[key].size();
-                },
-                [&](const StringRef key, const size_t)
-                {
-                    for (const auto row : outdated_keys[key])
-                        total_length += get_default(row).size + 1;
-                });
-        }
-
-        out->getChars().reserve(total_length);
-
-        for (const auto row : ext::range(0, ext::size(keys_array)))
-        {
-            const StringRef key = keys_array[row];
-            const auto it = map.find(key);
-            const auto string_ref = it ? it->getMapped() : get_default(row);
-            out->insertData(string_ref.data, string_ref.size);
-        }
-    }
+    void getItemsString(
+        Attribute & attribute,
+        const Columns & key_columns,
+        ColumnString * out,
+        DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
 
     template <typename PresentKeyHandler, typename AbsentKeyHandler>
     void update(
@@ -480,152 +194,7 @@ private:
         const PODArray<StringRef> & in_keys,
         const std::vector<size_t> & in_requested_rows,
         PresentKeyHandler && on_cell_updated,
-        AbsentKeyHandler && on_key_not_found) const
-    {
-        MapType<bool> remaining_keys{in_requested_rows.size()};
-        for (const auto row : in_requested_rows)
-            remaining_keys.insert({in_keys[row], false});
-
-        std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
-
-        const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
-        {
-            Stopwatch watch;
-            auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
-            stream->readPrefix();
-
-            const auto keys_size = dict_struct.key->size();
-            StringRefs keys(keys_size);
-
-            const auto attributes_size = attributes.size();
-            const auto now = std::chrono::system_clock::now();
-
-            while (const auto block = stream->read())
-            {
-                /// cache column pointers
-                const auto key_columns = ext::map<Columns>(
-                    ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
-
-                const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
-                {
-                    return block.safeGetByPosition(keys_size + attribute_idx).column;
-                });
-
-                const auto rows_num = block.rows();
-
-                for (const auto row : ext::range(0, rows_num))
-                {
-                    auto key = allocKey(row, key_columns, keys);
-                    const auto hash = StringRefHash{}(key);
-                    const auto find_result = findCellIdx(key, now, hash);
-                    const auto & cell_idx = find_result.cell_idx;
-                    auto & cell = cells[cell_idx];
-
-                    for (const auto attribute_idx : ext::range(0, attributes.size()))
-                    {
-                        const auto & attribute_column = *attribute_columns[attribute_idx];
-                        auto & attribute = attributes[attribute_idx];
-
-                        setAttributeValue(attribute, cell_idx, attribute_column[row]);
-                    }
-
-                    /// if cell id is zero and zero does not map to this cell, then the cell is unused
-                    if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
-                        element_count.fetch_add(1, std::memory_order_relaxed);
-
-                    /// handle memory allocated for old key
-                    if (key == cell.key)
-                    {
-                        freeKey(key);
-                        key = cell.key;
-                    }
-                    else
-                    {
-                        /// new key is different from the old one
-                        if (cell.key.data)
-                            freeKey(cell.key);
-
-                        cell.key = key;
-                    }
-
-                    cell.hash = hash;
-
-                    if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
-                        cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
-                    else
-                        cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
-
-                    /// inform caller
-                    on_cell_updated(key, cell_idx);
-                    /// mark corresponding id as found
-                    remaining_keys[key] = true;
-                }
-            }
-
-            stream->readSuffix();
-
-            ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
-            ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
-        }
-
-        size_t found_num = 0;
-        size_t not_found_num = 0;
-
-        const auto now = std::chrono::system_clock::now();
-
-        /// Check which ids have not been found and require setting null_value
-        for (const auto & key_found_pair : remaining_keys)
-        {
-            if (key_found_pair.getMapped())
-            {
-                ++found_num;
-                continue;
-            }
-
-            ++not_found_num;
-
-            auto key = key_found_pair.getKey();
-            const auto hash = StringRefHash{}(key);
-            const auto find_result = findCellIdx(key, now, hash);
-            const auto & cell_idx = find_result.cell_idx;
-            auto & cell = cells[cell_idx];
-
-            /// Set null_value for each attribute
-            for (auto & attribute : attributes)
-                setDefaultAttributeValue(attribute, cell_idx);
-
-            /// Check if cell had not been occupied before and increment element counter if it hadn't
-            if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
-                element_count.fetch_add(1, std::memory_order_relaxed);
-
-            if (key == cell.key)
-                key = cell.key;
-            else
-            {
-                if (cell.key.data)
-                    freeKey(cell.key);
-
-                /// copy key from temporary pool
-                key = copyKey(key);
-                cell.key = key;
-            }
-
-            cell.hash = hash;
-
-            if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
-                cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
-            else
-                cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
-
-            cell.setDefault();
-
-            /// inform caller that the cell has not been found
-            on_key_not_found(key, cell_idx);
-        }
-
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
-    }
+        AbsentKeyHandler && on_key_not_found) const;
 
     UInt64 getCellIdx(const StringRef key) const;
 
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp
deleted file mode 100644
index ba9f8d014fd..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "ComplexKeyCacheDictionary.h"
-
-namespace DB
-{
-ComplexKeyCacheDictionary::Attribute
-ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
-{
-    Attribute attr{type, {}, {}};
-
-    switch (type)
-    {
-#define DISPATCH(TYPE) \
-    case AttributeUnderlyingType::ut##TYPE: \
-        attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); /* NOLINT */ \
-        attr.arrays = std::make_unique<ContainerType<TYPE>>(size); /* NOLINT */ \
-        bytes_allocated += size * sizeof(TYPE); \
-        break;
-        DISPATCH(UInt8)
-        DISPATCH(UInt16)
-        DISPATCH(UInt32)
-        DISPATCH(UInt64)
-        DISPATCH(UInt128)
-        DISPATCH(Int8)
-        DISPATCH(Int16)
-        DISPATCH(Int32)
-        DISPATCH(Int64)
-        DISPATCH(Decimal32)
-        DISPATCH(Decimal64)
-        DISPATCH(Decimal128)
-        DISPATCH(Float32)
-        DISPATCH(Float64)
-#undef DISPATCH
-        case AttributeUnderlyingType::utString:
-            attr.null_values = null_value.get<String>();
-            attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
-            bytes_allocated += size * sizeof(StringRef);
-            if (!string_arena)
-                string_arena = std::make_unique<ArenaWithFreeLists>();
-            break;
-    }
-
-    return attr;
-}
-
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp
deleted file mode 100644
index 01d39722d33..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <Dictionaries/ComplexKeyCacheDictionary.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void ComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return null_value; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp
deleted file mode 100644
index deb34706f54..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <Dictionaries/ComplexKeyCacheDictionary.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void ComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t row) { return def[row]; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp
deleted file mode 100644
index 2a84fdc89f6..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <Dictionaries/ComplexKeyCacheDictionary.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void ComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return def; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp
deleted file mode 100644
index 2df8f95bc0c..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-#include "ComplexKeyCacheDictionary.h"
-
-namespace DB
-{
-void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
-{
-    switch (attribute.type)
-    {
-        case AttributeUnderlyingType::utUInt8:
-            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
-            break;
-        case AttributeUnderlyingType::utInt8:
-            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
-            break;
-        case AttributeUnderlyingType::utInt16:
-            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
-            break;
-        case AttributeUnderlyingType::utInt32:
-            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
-            break;
-        case AttributeUnderlyingType::utInt64:
-            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
-            break;
-
-        case AttributeUnderlyingType::utDecimal32:
-            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
-            break;
-
-        case AttributeUnderlyingType::utString:
-        {
-            const auto & string = value.get<String>();
-            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
-            const auto & null_value_ref = std::get<String>(attribute.null_values);
-
-            /// free memory unless it points to a null_value
-            if (string_ref.data && string_ref.data != null_value_ref.data())
-                string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
-
-            const auto str_size = string.size();
-            if (str_size != 0)
-            {
-                auto * str_ptr = string_arena->alloc(str_size);
-                std::copy(string.data(), string.data() + str_size, str_ptr);
-                string_ref = StringRef{str_ptr, str_size};
-            }
-            else
-                string_ref = {};
-
-            break;
-        }
-    }
-}
-
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp
deleted file mode 100644
index aa03cc88038..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-#include "ComplexKeyCacheDictionary.h"
-
-namespace DB
-{
-void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
-{
-    switch (attribute.type)
-    {
-        case AttributeUnderlyingType::utUInt8:
-            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
-            break;
-
-        case AttributeUnderlyingType::utDecimal32:
-            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = std::get<Decimal64>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = std::get<Decimal128>(attribute.null_values);
-            break;
-
-        case AttributeUnderlyingType::utString:
-        {
-            const auto & null_value_ref = std::get<String>(attribute.null_values);
-            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
-
-            if (string_ref.data != null_value_ref.data())
-            {
-                if (string_ref.data)
-                    string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
-
-                string_ref = StringRef{null_value_ref};
-            }
-
-            break;
-        }
-    }
-}
-
-}
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.cpp b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
index c4b8678672c..391b5c47980 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
@@ -3,6 +3,9 @@
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 #include <Core/Defines.h>
+#include <Columns/ColumnNullable.h>
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 namespace DB
 {
@@ -31,194 +34,151 @@ ComplexKeyDirectDictionary::ComplexKeyDirectDictionary(
     createAttributes();
 }
 
-#define DECLARE(TYPE) \
-    void ComplexKeyDirectDictionary::get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void ComplexKeyDirectDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
+ColumnPtr ComplexKeyDirectDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr & result_type,
+    const Columns & key_columns,
+    const DataTypes & key_types,
+    const ColumnPtr default_values_column) const
 {
     dict_struct.validateKeyTypes(key_types);
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
 
-    const auto & null_value = std::get<StringRef>(attribute.null_values);
-    getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t) { return String(null_value.data, null_value.size); });
-}
-
-#define DECLARE(TYPE) \
-    void ComplexKeyDirectDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void ComplexKeyDirectDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const
-{
-    dict_struct.validateKeyTypes(key_types);
+    ColumnPtr result;
 
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-    getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
-}
+    auto keys_size = key_columns.front()->size();
 
-#define DECLARE(TYPE) \
-    void ComplexKeyDirectDictionary::get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void ComplexKeyDirectDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const
-{
-    dict_struct.validateKeyTypes(key_types);
-
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    ComplexKeyDirectDictionary::getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t) { return def; });
-}
-
-
-void ComplexKeyDirectDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
-{
-    dict_struct.validateKeyTypes(key_types);
-    const auto & attribute = attributes.front();
-
-    switch (attribute.type)
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container * vec_null_map_to = nullptr;
+    if (attribute.is_nullable)
     {
-        case AttributeUnderlyingType::utUInt8:
-            has<UInt8>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            has<UInt16>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            has<UInt32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            has<UInt64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            has<UInt128>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            has<Int8>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            has<Int16>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            has<Int32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            has<Int64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            has<Float32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            has<Float64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utString:
-            has<String>(attribute, key_columns, out);
-            break;
-
-        case AttributeUnderlyingType::utDecimal32:
-            has<Decimal32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            has<Decimal64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            has<Decimal128>(attribute, key_columns, out);
-            break;
+        col_null_map_to = ColumnUInt8::create(keys_size, false);
+        vec_null_map_to = &col_null_map_to->getData();
     }
+
+    auto type_call = [&](const auto & dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto * out = column.get();
+
+            getItemsImpl<String, String>(
+                attribute,
+                key_columns,
+                [&](const size_t row, const String value, bool is_null)
+                {
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
+
+                    const auto ref = StringRef{value};
+                    out->insertData(ref.data, ref.size);
+                },
+                default_value_extractor);
+        }
+        else
+        {
+            auto & out = column->getData();
+
+            getItemsImpl<AttributeType, AttributeType>(
+                attribute,
+                key_columns,
+                [&](const size_t row, const auto value, bool is_null)
+                {
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
+
+                    out[row] = value;
+                },
+                default_value_extractor);
+        }
+
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    if (attribute.is_nullable)
+    {
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
+    }
+
+    return result;
 }
 
+ColumnUInt8::Ptr ComplexKeyDirectDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
+{
+    dict_struct.validateKeyTypes(key_types);
+
+    auto size = key_columns.front()->size();
+    auto result = ColumnUInt8::create(size);
+    auto& out = result->getData();
+
+    const auto rows = key_columns.front()->size();
+    const auto keys_size = dict_struct.key->size();
+    StringRefs keys_array(keys_size);
+    MapType<UInt8> has_key;
+    Arena temporary_keys_pool;
+    std::vector<size_t> to_load(rows);
+    PODArray<StringRef> keys(rows);
+
+    for (const auto row : ext::range(0, rows))
+    {
+        const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
+        keys[row] = key;
+        has_key[key] = 0;
+        to_load[row] = row;
+    }
+
+    auto stream = source_ptr->loadKeys(key_columns, to_load);
+
+    stream->readPrefix();
+
+    while (const auto block = stream->read())
+    {
+        const auto columns = ext::map<Columns>(
+            ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
+
+        Arena pool;
+
+        StringRefs keys_temp(keys_size);
+
+        const auto columns_size = columns.front()->size();
+
+        for (const auto row_idx : ext::range(0, columns_size))
+        {
+            const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
+            if (has_key.has(key))
+            {
+                has_key[key] = 1;
+            }
+        }
+    }
+
+    stream->readSuffix();
+
+    for (const auto row : ext::range(0, rows))
+    {
+        out[row] = has_key[keys[row]];
+    }
+
+    query_count.fetch_add(rows, std::memory_order_relaxed);
+
+    return result;
+}
 
 void ComplexKeyDirectDictionary::createAttributes()
 {
@@ -229,7 +189,7 @@ void ComplexKeyDirectDictionary::createAttributes()
     {
         attribute_index_by_name.emplace(attribute.name, attributes.size());
         attribute_name_by_index.emplace(attributes.size(), attribute.name);
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
+        attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
 
         if (attribute.hierarchical)
             throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
@@ -237,7 +197,6 @@ void ComplexKeyDirectDictionary::createAttributes()
     }
 }
 
-
 template <typename T>
 void ComplexKeyDirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
 {
@@ -254,59 +213,19 @@ void ComplexKeyDirectDictionary::createAttributeImpl<String>(Attribute & attribu
 }
 
 
-ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
+ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttribute(
+    const DictionaryAttribute & attribute, const Field & null_value, const std::string & attr_name)
 {
-    Attribute attr{type, {}, {}, attr_name};
+    Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
 
-    switch (type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utString:
-            createAttributeImpl<String>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-    }
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
 
     return attr;
 }
@@ -356,14 +275,18 @@ StringRef ComplexKeyDirectDictionary::placeKeysInPool(
 }
 
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void ComplexKeyDirectDictionary::getItemsImpl(
-    const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const Columns & key_columns,
+    ValueSetter && set_value,
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto rows = key_columns.front()->size();
     const auto keys_size = dict_struct.key->size();
     StringRefs keys_array(keys_size);
     MapType<OutputType> value_by_key;
+    HashMapWithSavedHash<StringRef, bool, StringRefHash> value_is_null;
     Arena temporary_keys_pool;
     std::vector<size_t> to_load(rows);
     PODArray<StringRef> keys(rows);
@@ -372,8 +295,9 @@ void ComplexKeyDirectDictionary::getItemsImpl(
     {
         const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
         keys[row] = key;
-        value_by_key[key] = get_default(row);
+        value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
         to_load[row] = row;
+        value_is_null[key] = false;
     }
 
     auto stream = source_ptr->loadKeys(key_columns, to_load);
@@ -392,6 +316,11 @@ void ComplexKeyDirectDictionary::getItemsImpl(
             });
         for (const size_t attribute_idx : ext::range(0, attributes.size()))
         {
+            if (attribute.name != attribute_name_by_index.at(attribute_idx))
+            {
+                continue;
+            }
+
             const IColumn & attribute_column = *attribute_columns[attribute_idx];
             Arena pool;
 
@@ -402,17 +331,15 @@ void ComplexKeyDirectDictionary::getItemsImpl(
             for (const auto row_idx : ext::range(0, columns_size))
             {
                 const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
-                if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
+
+                if (value_by_key.has(key))
                 {
-                    if (attribute.type == AttributeUnderlyingType::utFloat32)
-                    {
-                        value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].template get<Float64>());
-                    }
+                    auto value = attribute_column[row_idx];
+
+                    if (value.isNull())
+                        value_is_null[key] = true;
                     else
-                    {
-                        value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].template get<AttributeType>());
-                    }
-
+                        value_by_key[key] = static_cast<OutputType>(value.template get<NearestFieldType<AttributeType>>());
                 }
             }
         }
@@ -422,78 +349,13 @@ void ComplexKeyDirectDictionary::getItemsImpl(
 
     for (const auto row : ext::range(0, rows))
     {
-        set_value(row, value_by_key[keys[row]]);
+        auto key = keys[row];
+        set_value(row, value_by_key[key], value_is_null[key]);
     }
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-void ComplexKeyDirectDictionary::getItemsStringImpl(
-    const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
-{
-    const auto rows = key_columns.front()->size();
-    const auto keys_size = dict_struct.key->size();
-    StringRefs keys_array(keys_size);
-    MapType<String> value_by_key;
-    Arena temporary_keys_pool;
-    std::vector<size_t> to_load(rows);
-    PODArray<StringRef> keys(rows);
-
-    for (const auto row : ext::range(0, rows))
-    {
-        const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
-        keys[row] = key;
-        value_by_key[key] = get_default(row);
-        to_load[row] = row;
-    }
-
-    auto stream = source_ptr->loadKeys(key_columns, to_load);
-    const auto attributes_size = attributes.size();
-
-    stream->readPrefix();
-
-    while (const auto block = stream->read())
-    {
-        const auto columns = ext::map<Columns>(
-            ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
-
-        const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
-            {
-                return block.safeGetByPosition(keys_size + attribute_idx).column;
-            });
-        for (const size_t attribute_idx : ext::range(0, attributes.size()))
-        {
-            const IColumn & attribute_column = *attribute_columns[attribute_idx];
-            Arena pool;
-
-            StringRefs keys_temp(keys_size);
-
-            const auto columns_size = columns.front()->size();
-
-            for (const auto row_idx : ext::range(0, columns_size))
-            {
-                const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
-                if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
-                {
-                    const String from_source = attribute_column[row_idx].template get<String>();
-                    value_by_key[key] = from_source;
-                }
-            }
-        }
-    }
-
-    stream->readSuffix();
-
-    for (const auto row : ext::range(0, rows))
-    {
-        set_value(row, value_by_key[keys[row]]);
-    }
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-}
-
-
 const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAttribute(const std::string & attribute_name) const
 {
     const auto it = attribute_index_by_name.find(attribute_name);
@@ -503,65 +365,6 @@ const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAtt
     return attributes[it->second];
 }
 
-
-template <typename T>
-void ComplexKeyDirectDictionary::has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const
-{
-    const auto rows = key_columns.front()->size();
-    const auto keys_size = dict_struct.key->size();
-    StringRefs keys_array(keys_size);
-    MapType<UInt8> has_key;
-    Arena temporary_keys_pool;
-    std::vector<size_t> to_load(rows);
-    PODArray<StringRef> keys(rows);
-
-    for (const auto row : ext::range(0, rows))
-    {
-        const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
-        keys[row] = key;
-        has_key[key] = 0;
-        to_load[row] = row;
-    }
-
-    auto stream = source_ptr->loadKeys(key_columns, to_load);
-
-    stream->readPrefix();
-
-    while (const auto block = stream->read())
-    {
-        const auto columns = ext::map<Columns>(
-            ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
-
-        for (const size_t attribute_idx : ext::range(0, attributes.size()))
-        {
-            Arena pool;
-
-            StringRefs keys_temp(keys_size);
-
-            const auto columns_size = columns.front()->size();
-
-            for (const auto row_idx : ext::range(0, columns_size))
-            {
-                const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
-                if (has_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
-                {
-                    has_key[key] = 1;
-                }
-            }
-        }
-    }
-
-    stream->readSuffix();
-
-    for (const auto row : ext::range(0, rows))
-    {
-        out[row] = has_key[keys[row]];
-    }
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-}
-
-
 BlockInputStreamPtr ComplexKeyDirectDictionary::getBlockInputStream(const Names & /* column_names */, size_t /* max_block_size */) const
 {
     return source_ptr->loadAll();
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.h b/src/Dictionaries/ComplexKeyDirectDictionary.h
index dc602be103f..0e191321daa 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.h
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.h
@@ -12,14 +12,13 @@
 #include <ext/range.h>
 #include <ext/size.h>
 #include <ext/map.h>
-#include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
-
+#include "DictionaryStructure.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
-using BlockPtr = std::shared_ptr<Block>;
 
 class ComplexKeyDirectDictionary final : public IDictionaryBase
 {
@@ -60,78 +59,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_values_column) const override;
 
-    void getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const;
-
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -142,6 +79,8 @@ private:
     struct Attribute final
     {
         AttributeUnderlyingType type;
+        bool is_nullable;
+
         std::variant<
             UInt8,
             UInt16,
@@ -168,27 +107,21 @@ private:
     template <typename T>
     void addAttributeSize(const Attribute & attribute);
 
-    void calculateBytesAllocated();
-
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
+    static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value, const std::string & name);
 
     template <typename Pool>
     StringRef placeKeysInPool(
         const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector<DictionaryAttribute> & key_attributes, Pool & pool) const;
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-    void getItemsStringImpl(
-        const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
-
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
-        const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
-
-    template <typename T>
-    void resize(Attribute & attribute, const Key id);
+        const Attribute & attribute,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
@@ -197,9 +130,6 @@ private:
 
     const Attribute & getAttribute(const std::string & attribute_name) const;
 
-    template <typename T>
-    void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
-
     const DictionaryStructure dict_struct;
     const DictionarySourcePtr source_ptr;
     const DictionaryLifetime dict_lifetime;
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 676196fabd2..a0784b5a417 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -1,6 +1,10 @@
 #include "ComplexKeyHashedDictionary.h"
 #include <ext/map.h>
 #include <ext/range.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnNullable.h>
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 
@@ -32,216 +36,111 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(
     calculateBytesAllocated();
 }
 
-#define DECLARE(TYPE) \
-    void ComplexKeyHashedDictionary::get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void ComplexKeyHashedDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
-{
-    dict_struct.validateKeyTypes(key_types);
-
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return null_value; });
-}
-
-#define DECLARE(TYPE) \
-    void ComplexKeyHashedDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void ComplexKeyHashedDictionary::getString(
+ColumnPtr ComplexKeyHashedDictionary::getColumn(
     const std::string & attribute_name,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnString * const def,
-    ColumnString * const out) const
+    const ColumnPtr default_values_column) const
 {
     dict_struct.validateKeyTypes(key_types);
 
+    ColumnPtr result;
+
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t row) { return def->getDataAt(row); });
-}
+    auto keys_size = key_columns.front()->size();
 
-#define DECLARE(TYPE) \
-    void ComplexKeyHashedDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container * vec_null_map_to = nullptr;
+    if (attribute.is_nullable)
+    {
+        col_null_map_to = ColumnUInt8::create(keys_size, false);
+        vec_null_map_to = &col_null_map_to->getData();
     }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
 
-void ComplexKeyHashedDictionary::getString(
-    const std::string & attribute_name,
-    const Columns & key_columns,
-    const DataTypes & key_types,
-    const String & def,
-    ColumnString * const out) const
-{
-    dict_struct.validateKeyTypes(key_types);
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
 
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return StringRef{def}; });
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto * out = column.get();
+
+            getItemsImpl<StringRef, StringRef>(
+                attribute,
+                key_columns,
+                [&](const size_t row, const StringRef value, bool is_null)
+                {
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
+
+                    out->insertData(value.data, value.size);
+                },
+                default_value_extractor);
+        }
+        else
+        {
+            auto & out = column->getData();
+
+            getItemsImpl<AttributeType, AttributeType>(
+                attribute,
+                key_columns,
+                [&](const size_t row, const auto value, bool is_null)
+                {
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
+
+                    out[row] = value;
+                },
+                default_value_extractor);
+        }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    if (attribute.is_nullable)
+    {
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
+    }
+
+    return result;
 }
 
-void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
+ColumnUInt8::Ptr ComplexKeyHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
 {
     dict_struct.validateKeyTypes(key_types);
 
+    auto size = key_columns.front()->size();
+    auto result = ColumnUInt8::create(size);
+    auto& out = result->getData();
+
     const auto & attribute = attributes.front();
 
-    switch (attribute.type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            has<UInt8>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            has<UInt16>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            has<UInt32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            has<UInt64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            has<UInt128>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            has<Int8>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            has<Int16>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            has<Int32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            has<Int64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            has<Float32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            has<Float64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utString:
-            has<StringRef>(attribute, key_columns, out);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
 
-        case AttributeUnderlyingType::utDecimal32:
-            has<Decimal32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            has<Decimal64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            has<Decimal128>(attribute, key_columns, out);
-            break;
-    }
+        has<ValueType>(attribute, key_columns, out);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
 void ComplexKeyHashedDictionary::createAttributes()
@@ -252,7 +151,7 @@ void ComplexKeyHashedDictionary::createAttributes()
     for (const auto & attribute : dict_struct.attributes)
     {
         attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
+        attributes.push_back(createAttribute(attribute, attribute.null_value));
 
         if (attribute.hierarchical)
             throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
@@ -407,66 +306,30 @@ void ComplexKeyHashedDictionary::addAttributeSize(const Attribute & attribute)
     bucket_count = map_ref.getBufferSizeInCells();
 }
 
+template <>
+void ComplexKeyHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
+{
+    const auto & map_ref = std::get<ContainerType<StringRef>>(attribute.maps);
+    bytes_allocated += sizeof(ContainerType<StringRef>) + map_ref.getBufferSizeInBytes();
+    bucket_count = map_ref.getBufferSizeInCells();
+    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+}
+
 void ComplexKeyHashedDictionary::calculateBytesAllocated()
 {
     bytes_allocated += attributes.size() * sizeof(attributes.front());
 
     for (const auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
-            case AttributeUnderlyingType::utUInt8:
-                addAttributeSize<UInt8>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                addAttributeSize<UInt16>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                addAttributeSize<UInt32>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                addAttributeSize<UInt64>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                addAttributeSize<UInt128>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                addAttributeSize<Int8>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                addAttributeSize<Int16>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                addAttributeSize<Int32>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                addAttributeSize<Int64>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                addAttributeSize<Float32>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                addAttributeSize<Float64>(attribute);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
 
-            case AttributeUnderlyingType::utDecimal32:
-                addAttributeSize<Decimal32>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                addAttributeSize<Decimal64>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                addAttributeSize<Decimal128>(attribute);
-                break;
+            addAttributeSize<AttributeType>(attribute);
+        };
 
-            case AttributeUnderlyingType::utString:
-            {
-                addAttributeSize<StringRef>(attribute);
-                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-
-                break;
-            }
-        }
+        callOnDictionaryAttributeType(attribute.type, type_call);
     }
 
     bytes_allocated += keys_pool.size();
@@ -479,73 +342,41 @@ void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, cons
     attribute.maps.emplace<ContainerType<T>>();
 }
 
-ComplexKeyHashedDictionary::Attribute
-ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
+template <>
+void ComplexKeyHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
 {
-    Attribute attr{type, {}, {}, {}};
+    attribute.string_arena = std::make_unique<Arena>();
+    const String & string = null_value.get<String>();
+    const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
+    attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
+    attribute.maps.emplace<ContainerType<StringRef>>();
+}
 
-    switch (type)
+ComplexKeyHashedDictionary::Attribute
+ComplexKeyHashedDictionary::createAttribute(const DictionaryAttribute & attribute, const Field & null_value)
+{
+    auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
+    Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}};
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-
-        case AttributeUnderlyingType::utString:
-        {
-            attr.null_values = null_value.get<String>();
-            attr.maps.emplace<ContainerType<StringRef>>();
-            attr.string_arena = std::make_unique<Arena>();
-            break;
-        }
-    }
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
 
     return attr;
 }
 
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void ComplexKeyHashedDictionary::getItemsImpl(
-    const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const Columns & key_columns,
+    ValueSetter && set_value,
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto & attr = std::get<ContainerType<AttributeType>>(attribute.maps);
 
@@ -560,7 +391,18 @@ void ComplexKeyHashedDictionary::getItemsImpl(
         const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
 
         const auto it = attr.find(key);
-        set_value(i, it ? static_cast<OutputType>(it->getMapped()) : get_default(i));
+
+        if (it)
+        {
+            set_value(i, static_cast<OutputType>(it->getMapped()), false);
+        }
+        else
+        {
+            if (attribute.is_nullable && attribute.nullable_set->find(key) != nullptr)
+                set_value(i, default_value_extractor[i], true);
+            else
+                set_value(i, default_value_extractor[i], false);
+        }
 
         /// free memory allocated for the key
         temporary_keys_pool.rollback(key.size);
@@ -578,51 +420,42 @@ bool ComplexKeyHashedDictionary::setAttributeValueImpl(Attribute & attribute, co
     return pair.second;
 }
 
+template <>
+bool ComplexKeyHashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const StringRef key, const String value)
+{
+    const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
+    return setAttributeValueImpl<StringRef>(attribute, key, StringRef{string_in_arena, value.size()});
+}
+
 bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
 {
-    switch (attribute.type)
+    bool result = false;
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt16:
-            return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt32:
-            return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt64:
-            return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt128:
-            return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
-        case AttributeUnderlyingType::utInt8:
-            return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utInt16:
-            return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utInt32:
-            return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utInt64:
-            return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utFloat32:
-            return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
-        case AttributeUnderlyingType::utFloat64:
-            return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
-        case AttributeUnderlyingType::utDecimal64:
-            return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
-        case AttributeUnderlyingType::utDecimal128:
-            return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
-
-        case AttributeUnderlyingType::utString:
+        if (attribute.is_nullable)
         {
-            auto & map = std::get<ContainerType<StringRef>>(attribute.maps);
-            const auto & string = value.get<String>();
-            const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-            const auto pair = map.insert({key, StringRef{string_in_arena, string.size()}});
-            return pair.second;
+            if (value.isNull())
+            {
+                attribute.nullable_set->insert(key);
+                result = true;
+                return;
+            }
+            else
+            {
+                attribute.nullable_set->erase(key);
+            }
         }
-    }
 
-    return {};
+        result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<NearestFieldType<AttributeType>>());
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
 const ComplexKeyHashedDictionary::Attribute & ComplexKeyHashedDictionary::getAttribute(const std::string & attribute_name) const
@@ -673,6 +506,9 @@ void ComplexKeyHashedDictionary::has(const Attribute & attribute, const Columns
         const auto it = attr.find(key);
         out[i] = static_cast<bool>(it);
 
+        if (attribute.is_nullable && !out[i])
+            out[i] = attribute.nullable_set->find(key) != nullptr;
+
         /// free memory allocated for the key
         temporary_keys_pool.rollback(key.size);
     }
@@ -684,41 +520,26 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys() const
 {
     const Attribute & attribute = attributes.front();
 
-    switch (attribute.type)
-    {
-        case AttributeUnderlyingType::utUInt8:
-            return getKeys<UInt8>(attribute);
-        case AttributeUnderlyingType::utUInt16:
-            return getKeys<UInt16>(attribute);
-        case AttributeUnderlyingType::utUInt32:
-            return getKeys<UInt32>(attribute);
-        case AttributeUnderlyingType::utUInt64:
-            return getKeys<UInt64>(attribute);
-        case AttributeUnderlyingType::utUInt128:
-            return getKeys<UInt128>(attribute);
-        case AttributeUnderlyingType::utInt8:
-            return getKeys<Int8>(attribute);
-        case AttributeUnderlyingType::utInt16:
-            return getKeys<Int16>(attribute);
-        case AttributeUnderlyingType::utInt32:
-            return getKeys<Int32>(attribute);
-        case AttributeUnderlyingType::utInt64:
-            return getKeys<Int64>(attribute);
-        case AttributeUnderlyingType::utFloat32:
-            return getKeys<Float32>(attribute);
-        case AttributeUnderlyingType::utFloat64:
-            return getKeys<Float64>(attribute);
-        case AttributeUnderlyingType::utString:
-            return getKeys<StringRef>(attribute);
+    std::vector<StringRef> result;
 
-        case AttributeUnderlyingType::utDecimal32:
-            return getKeys<Decimal32>(attribute);
-        case AttributeUnderlyingType::utDecimal64:
-            return getKeys<Decimal64>(attribute);
-        case AttributeUnderlyingType::utDecimal128:
-            return getKeys<Decimal128>(attribute);
-    }
-    return {};
+    auto type_call = [&](const auto & dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            result = getKeys<StringRef>(attribute);
+        }
+        else
+        {
+            result = getKeys<AttributeType>(attribute);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
 template <typename T>
@@ -730,12 +551,18 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
     for (const auto & key : attr)
         keys.push_back(key.getKey());
 
+    if (attribute.is_nullable)
+    {
+        for (const auto & key: *attribute.nullable_set)
+            keys.push_back(key.getKey());
+    }
+
     return keys;
 }
 
 BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyHashedDictionary, UInt64>;
+    using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeys(), column_names);
 }
 
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.h b/src/Dictionaries/ComplexKeyHashedDictionary.h
index baf6628eebd..ecc720ca0b0 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.h
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.h
@@ -7,17 +7,17 @@
 #include <Columns/ColumnString.h>
 #include <Common/Arena.h>
 #include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashSet.h>
 #include <Core/Block.h>
 #include <common/StringRef.h>
 #include <ext/range.h>
-#include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
-
+#include "DictionaryStructure.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
-using BlockPtr = std::shared_ptr<Block>;
 
 class ComplexKeyHashedDictionary final : public IDictionaryBase
 {
@@ -60,91 +60,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnString * const def,
-        ColumnString * const out) const;
+        const ColumnPtr default_values_column) const override;
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes & key_types,
-        const String & def,
-        ColumnString * const out) const;
-
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -152,9 +77,14 @@ private:
     template <typename Value>
     using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
 
+    using NullableSet = HashSetWithSavedHash<StringRef, StringRefHash>;
+
     struct Attribute final
     {
         AttributeUnderlyingType type;
+        bool is_nullable;
+        std::unique_ptr<NullableSet> nullable_set;
+
         std::variant<
             UInt8,
             UInt16,
@@ -170,7 +100,7 @@ private:
             Decimal128,
             Float32,
             Float64,
-            String>
+            StringRef>
             null_values;
         std::variant<
             ContainerType<UInt8>,
@@ -206,18 +136,21 @@ private:
     void calculateBytesAllocated();
 
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
+    static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-    void
-    getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
+    void getItemsImpl(
+        const Attribute & attribute,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
-    bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
+    static bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
 
-    bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
+    static bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
 
     const Attribute & getAttribute(const std::string & attribute_name) const;
 
diff --git a/src/Dictionaries/DictionaryBlockInputStream.h b/src/Dictionaries/DictionaryBlockInputStream.h
index c683ef0e9cc..f045d47c2c2 100644
--- a/src/Dictionaries/DictionaryBlockInputStream.h
+++ b/src/Dictionaries/DictionaryBlockInputStream.h
@@ -25,12 +25,10 @@ namespace ErrorCodes
 /* BlockInputStream implementation for external dictionaries
  * read() returns blocks consisting of the in-memory contents of the dictionaries
  */
-template <typename DictionaryType, typename Key>
+template <typename Key>
 class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
 {
 public:
-    using DictionaryPtr = std::shared_ptr<DictionaryType const>;
-
     DictionaryBlockInputStream(
         std::shared_ptr<const IDictionaryBase> dictionary, UInt64 max_block_size, PaddedPODArray<Key> && ids, const Names & column_names);
 
@@ -60,111 +58,9 @@ protected:
     Block getBlock(size_t start, size_t size) const override;
 
 private:
-    // pointer types to getXXX functions
-    // for single key dictionaries
-    template <typename Type>
-    using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
-
-    template <typename Type>
-    using DictionaryDecimalGetter
-        = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, DecimalPaddedPODArray<Type> &) const;
-
-    using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
-
-    // for complex complex key dictionaries
-    template <typename Type>
-    using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
-
-    template <typename Type>
-    using DecimalGetterByKey
-        = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray<Type> & out) const;
-
-    using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
-
-    // call getXXX
-    // for single key dictionaries
-    template <typename Type, typename Container>
-    void callGetter(
-        DictionaryGetter<Type> getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    template <typename Type, typename Container>
-    void callGetter(
-        DictionaryDecimalGetter<Type> getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    template <typename Container>
-    void callGetter(
-        DictionaryStringGetter getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    // for complex complex key dictionaries
-    template <typename Type, typename Container>
-    void callGetter(
-        GetterByKey<Type> getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    template <typename Type, typename Container>
-    void callGetter(
-        DecimalGetterByKey<Type> getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    template <typename Container>
-    void callGetter(
-        StringGetterByKey getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
     Block
     fillBlock(const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
 
-
-    template <typename AttributeType, typename Getter>
-    ColumnPtr getColumnFromAttribute(
-        Getter getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-    template <typename Getter>
-    ColumnPtr getColumnFromStringAttribute(
-        Getter getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
     ColumnPtr getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const;
 
     void fillKeyColumns(
@@ -174,65 +70,54 @@ private:
         const DictionaryStructure & dictionary_structure,
         ColumnsWithTypeAndName & columns) const;
 
-    DictionaryPtr dictionary;
+    std::shared_ptr<const IDictionaryBase> dictionary;
     Names column_names;
     PaddedPODArray<Key> ids;
     ColumnsWithTypeAndName key_columns;
-    Poco::Logger * logger;
-
-    using FillBlockFunction = Block (DictionaryBlockInputStream<DictionaryType, Key>::*)(
-        const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
-
-    FillBlockFunction fill_block_function;
 
     Columns data_columns;
     GetColumnsFunction get_key_columns_function;
     GetColumnsFunction get_view_columns_function;
 
-    enum class DictionaryKeyType
+    enum class DictionaryInputStreamKeyType
     {
         Id,
         ComplexKey,
         Callback
     };
 
-    DictionaryKeyType key_type;
+    DictionaryInputStreamKeyType key_type;
 };
 
 
-template <typename DictionaryType, typename Key>
-DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
+template <typename Key>
+DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
     std::shared_ptr<const IDictionaryBase> dictionary_, UInt64 max_block_size_, PaddedPODArray<Key> && ids_, const Names & column_names_)
     : DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
-    , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
+    , dictionary(dictionary_)
     , column_names(column_names_)
     , ids(std::move(ids_))
-    , logger(&Poco::Logger::get("DictionaryBlockInputStream"))
-    , fill_block_function(
-          &DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryDecimalGetter, DictionaryStringGetter>)
-    , key_type(DictionaryKeyType::Id)
+    , key_type(DictionaryInputStreamKeyType::Id)
 {
 }
 
-template <typename DictionaryType, typename Key>
-DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
+template <typename Key>
+DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
     std::shared_ptr<const IDictionaryBase> dictionary_,
     UInt64 max_block_size_,
     const std::vector<StringRef> & keys,
     const Names & column_names_)
     : DictionaryBlockInputStreamBase(keys.size(), max_block_size_)
-    , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
+    , dictionary(dictionary_)
     , column_names(column_names_)
-    , logger(&Poco::Logger::get("DictionaryBlockInputStream"))
-    , fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
-    , key_type(DictionaryKeyType::ComplexKey)
+    , key_type(DictionaryInputStreamKeyType::ComplexKey)
 {
-    const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
-    fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
+    const DictionaryStructure & dictionary_structure = dictionary->getStructure();
+    fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
 }
 
-template <typename DictionaryType, typename Key>
-DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
+template <typename Key>
+DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
     std::shared_ptr<const IDictionaryBase> dictionary_,
     UInt64 max_block_size_,
     const Columns & data_columns_,
@@ -240,24 +125,23 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
     GetColumnsFunction && get_key_columns_function_,
     GetColumnsFunction && get_view_columns_function_)
     : DictionaryBlockInputStreamBase(data_columns_.front()->size(), max_block_size_)
-    , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
+    , dictionary(dictionary_)
     , column_names(column_names_)
-    , logger(&Poco::Logger::get("DictionaryBlockInputStream"))
-    , fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
     , data_columns(data_columns_)
-    , get_key_columns_function(get_key_columns_function_)
-    , get_view_columns_function(get_view_columns_function_)
-    , key_type(DictionaryKeyType::Callback)
+    , get_key_columns_function(std::move(get_key_columns_function_))
+    , get_view_columns_function(std::move(get_view_columns_function_))
+    , key_type(DictionaryInputStreamKeyType::Callback)
 {
 }
 
 
-template <typename DictionaryType, typename Key>
-Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
+template <typename Key>
+Block DictionaryBlockInputStream<Key>::getBlock(size_t start, size_t length) const
 {
+    /// TODO: Rewrite
     switch (key_type)
     {
-        case DictionaryKeyType::ComplexKey:
+        case DictionaryInputStreamKeyType::ComplexKey:
         {
             Columns columns;
             ColumnsWithTypeAndName view_columns;
@@ -268,16 +152,16 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
                 columns.emplace_back(column);
                 view_columns.emplace_back(column, key_column.type, key_column.name);
             }
-            return (this->*fill_block_function)({}, columns, {}, std::move(view_columns));
+            return fillBlock({}, columns, {}, std::move(view_columns));
         }
 
-        case DictionaryKeyType::Id:
+        case DictionaryInputStreamKeyType::Id:
         {
             PaddedPODArray<Key> ids_to_fill(ids.begin() + start, ids.begin() + start + length);
-            return (this->*fill_block_function)(ids_to_fill, {}, {}, {});
+            return fillBlock(ids_to_fill, {}, {}, {});
         }
 
-        case DictionaryKeyType::Callback:
+        case DictionaryInputStreamKeyType::Callback:
         {
             Columns columns;
             columns.reserve(data_columns.size());
@@ -294,102 +178,15 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
                 columns.push_back(key_column.column);
                 types.push_back(key_column.type);
             }
-            return (this->*fill_block_function)({}, columns, types, std::move(view_with_type_and_name));
+            return fillBlock({}, columns, types, std::move(view_with_type_and_name));
         }
     }
 
-    throw Exception("Unexpected DictionaryKeyType.", ErrorCodes::LOGICAL_ERROR);
+    throw Exception("Unexpected DictionaryInputStreamKeyType.", ErrorCodes::LOGICAL_ERROR);
 }
 
-
-template <typename DictionaryType, typename Key>
-template <typename Type, typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    DictionaryGetter<Type> getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const Columns & /*keys*/,
-    const DataTypes & /*data_types*/,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, ids_to_fill, container);
-}
-
-template <typename DictionaryType, typename Key>
-template <typename Type, typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    DictionaryDecimalGetter<Type> getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const Columns & /*keys*/,
-    const DataTypes & /*data_types*/,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, ids_to_fill, container);
-}
-
-template <typename DictionaryType, typename Key>
-template <typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    DictionaryStringGetter getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const Columns & /*keys*/,
-    const DataTypes & /*data_types*/,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, ids_to_fill, container);
-}
-
-template <typename DictionaryType, typename Key>
-template <typename Type, typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    GetterByKey<Type> getter,
-    const PaddedPODArray<Key> & /*ids_to_fill*/,
-    const Columns & keys,
-    const DataTypes & data_types,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, keys, data_types, container);
-}
-
-template <typename DictionaryType, typename Key>
-template <typename Type, typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    DecimalGetterByKey<Type> getter,
-    const PaddedPODArray<Key> & /*ids_to_fill*/,
-    const Columns & keys,
-    const DataTypes & data_types,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, keys, data_types, container);
-}
-
-template <typename DictionaryType, typename Key>
-template <typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    StringGetterByKey getter,
-    const PaddedPODArray<Key> & /*ids_to_fill*/,
-    const Columns & keys,
-    const DataTypes & data_types,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, keys, data_types, container);
-}
-
-
-template <typename DictionaryType, typename Key>
-template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
-Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
+template <typename Key>
+Block DictionaryBlockInputStream<Key>::fillBlock(
     const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
 {
     std::unordered_set<std::string> names(column_names.begin(), column_names.end());
@@ -408,9 +205,14 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
             block_columns.push_back(column);
 
     const DictionaryStructure & structure = dictionary->getStructure();
+    ColumnPtr ids_column = getColumnFromIds(ids_to_fill);
 
     if (structure.id && names.find(structure.id->name) != names.end())
-        block_columns.emplace_back(getColumnFromIds(ids_to_fill), std::make_shared<DataTypeUInt64>(), structure.id->name);
+    {
+        block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
+    }
+
+    auto dictionary_key_type = dictionary->getKeyType();
 
     for (const auto idx : ext::range(0, structure.attributes.size()))
     {
@@ -418,126 +220,35 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
         if (names.find(attribute.name) != names.end())
         {
             ColumnPtr column;
-#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
-    column = getColumnFromAttribute<TYPE, Getter<TYPE>>(&DictionaryType::get##TYPE, ids_to_fill, keys, data_types, attribute, *dictionary)
-            switch (attribute.underlying_type)
+
+            if (dictionary_key_type == DictionaryKeyType::simple)
             {
-                case AttributeUnderlyingType::utUInt8:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt8);
-                    break;
-                case AttributeUnderlyingType::utUInt16:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt16);
-                    break;
-                case AttributeUnderlyingType::utUInt32:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt32);
-                    break;
-                case AttributeUnderlyingType::utUInt64:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt64);
-                    break;
-                case AttributeUnderlyingType::utUInt128:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt128);
-                    break;
-                case AttributeUnderlyingType::utInt8:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int8);
-                    break;
-                case AttributeUnderlyingType::utInt16:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int16);
-                    break;
-                case AttributeUnderlyingType::utInt32:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int32);
-                    break;
-                case AttributeUnderlyingType::utInt64:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int64);
-                    break;
-                case AttributeUnderlyingType::utFloat32:
-                    GET_COLUMN_FORM_ATTRIBUTE(Float32);
-                    break;
-                case AttributeUnderlyingType::utFloat64:
-                    GET_COLUMN_FORM_ATTRIBUTE(Float64);
-                    break;
-                case AttributeUnderlyingType::utDecimal32:
-                {
-                    column = getColumnFromAttribute<Decimal32, DecimalGetter<Decimal32>>(
-                        &DictionaryType::getDecimal32, ids_to_fill, keys, data_types, attribute, *dictionary);
-                    break;
-                }
-                case AttributeUnderlyingType::utDecimal64:
-                {
-                    column = getColumnFromAttribute<Decimal64, DecimalGetter<Decimal64>>(
-                        &DictionaryType::getDecimal64, ids_to_fill, keys, data_types, attribute, *dictionary);
-                    break;
-                }
-                case AttributeUnderlyingType::utDecimal128:
-                {
-                    column = getColumnFromAttribute<Decimal128, DecimalGetter<Decimal128>>(
-                        &DictionaryType::getDecimal128, ids_to_fill, keys, data_types, attribute, *dictionary);
-                    break;
-                }
-                case AttributeUnderlyingType::utString:
-                {
-                    column = getColumnFromStringAttribute<StringGetter>(
-                        &DictionaryType::getString, ids_to_fill, keys, data_types, attribute, *dictionary);
-                    break;
-                }
+                column = dictionary->getColumn(
+                    attribute.name,
+                    attribute.type,
+                    {ids_column},
+                    {std::make_shared<DataTypeUInt64>()},
+                    nullptr /* default_values_column */);
             }
-#undef GET_COLUMN_FORM_ATTRIBUTE
+            else
+            {
+                column = dictionary->getColumn(
+                    attribute.name,
+                    attribute.type,
+                    keys,
+                    data_types,
+                    nullptr /* default_values_column*/);
+            }
+
             block_columns.emplace_back(column, attribute.type, attribute.name);
         }
     }
+
     return Block(block_columns);
 }
 
-
-template <typename DictionaryType, typename Key>
-template <typename AttributeType, typename Getter>
-ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
-    Getter getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const Columns & keys,
-    const DataTypes & data_types,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    if constexpr (IsDecimalNumber<AttributeType>)
-    {
-        auto size = ids_to_fill.size();
-        if (!keys.empty())
-            size = keys.front()->size();
-        auto column = ColumnDecimal<AttributeType>::create(size, 0); /// NOTE: There's wrong scale here, but it's unused.
-        callGetter(getter, ids_to_fill, keys, data_types, column->getData(), attribute, dict);
-        return column;
-    }
-    else
-    {
-        auto size = ids_to_fill.size();
-        if (!keys.empty())
-            size = keys.front()->size();
-        auto column_vector = ColumnVector<AttributeType>::create(size);
-        callGetter(getter, ids_to_fill, keys, data_types, column_vector->getData(), attribute, dict);
-        return column_vector;
-    }
-}
-
-
-template <typename DictionaryType, typename Key>
-template <typename Getter>
-ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
-    Getter getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const Columns & keys,
-    const DataTypes & data_types,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    auto column_string = ColumnString::create();
-    auto ptr = column_string.get();
-    callGetter(getter, ids_to_fill, keys, data_types, ptr, attribute, dict);
-    return column_string;
-}
-
-
-template <typename DictionaryType, typename Key>
-ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
+template <typename Key>
+ColumnPtr DictionaryBlockInputStream<Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
 {
     auto column_vector = ColumnVector<UInt64>::create();
     column_vector->getData().reserve(ids_to_fill.size());
@@ -547,8 +258,8 @@ ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(cons
 }
 
 
-template <typename DictionaryType, typename Key>
-void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
+template <typename Key>
+void DictionaryBlockInputStream<Key>::fillKeyColumns(
     const std::vector<StringRef> & keys,
     size_t start,
     size_t size,
diff --git a/src/Dictionaries/DictionaryFactory.cpp b/src/Dictionaries/DictionaryFactory.cpp
index a889b63107f..0ab7d199186 100644
--- a/src/Dictionaries/DictionaryFactory.cpp
+++ b/src/Dictionaries/DictionaryFactory.cpp
@@ -41,7 +41,7 @@ DictionaryPtr DictionaryFactory::create(
         throw Exception{name + ": element dictionary.layout should have exactly one child element",
                         ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG};
 
-    const DictionaryStructure dict_struct{config, config_prefix + ".structure"};
+    const DictionaryStructure dict_struct{config, config_prefix};
 
     DictionarySourcePtr source_ptr = DictionarySourceFactory::instance().create(
         name, config, config_prefix + ".source", dict_struct, context, config.getString(config_prefix + ".database", ""), check_source_config);
diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
new file mode 100644
index 00000000000..0026d8848ca
--- /dev/null
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -0,0 +1,149 @@
+#pragma once
+
+#include <Columns/IColumn.h>
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include "DictionaryStructure.h"
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int TYPE_MISMATCH;
+}
+
+/**
+ * In Dictionaries implementation String attribute is stored in arena and StringRefs are pointing to it.
+ */
+template <typename DictionaryAttributeType>
+using DictionaryValueType =
+    std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, StringRef, DictionaryAttributeType>;
+
+/**
+ * Used to create column with right type for DictionaryAttributeType.
+ */
+template <typename DictionaryAttributeType>
+class DictionaryAttributeColumnProvider
+{
+public:
+    using ColumnType =
+        std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, ColumnString,
+            std::conditional_t<IsDecimalNumber<DictionaryAttributeType>, ColumnDecimal<DictionaryAttributeType>,
+                ColumnVector<DictionaryAttributeType>>>;
+
+    using ColumnPtr = typename ColumnType::MutablePtr;
+
+    static ColumnPtr getColumn(const DictionaryAttribute & dictionary_attribute, size_t size)
+    {
+        if constexpr (std::is_same_v<DictionaryAttributeType, String>)
+        {
+            return ColumnType::create();
+        }
+        if constexpr (IsDecimalNumber<DictionaryAttributeType>)
+        {
+            auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+            return ColumnType::create(size, scale);
+        }
+        else if constexpr (IsNumber<DictionaryAttributeType>)
+            return ColumnType::create(size);
+        else
+            throw Exception{"Unsupported attribute type.", ErrorCodes::TYPE_MISMATCH};
+    }
+};
+
+/**
+  * DictionaryDefaultValueExtractor used to simplify getting default value for IDictionary function `getColumn`.
+  * Provides interface for getting default value with operator[];
+  *
+  * If default_values_column is null then attribute_default_value will be used.
+  * If default_values_column is not null in constructor than this column values will be used as default values.
+ */
+template <typename DictionaryAttributeType>
+class DictionaryDefaultValueExtractor
+{
+    using DefaultColumnType = typename DictionaryAttributeColumnProvider<DictionaryAttributeType>::ColumnType;
+
+public:
+    using DefaultValueType = DictionaryValueType<DictionaryAttributeType>;
+
+    DictionaryDefaultValueExtractor(DictionaryAttributeType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
+        : default_value(std::move(attribute_default_value))
+    {
+        if (default_values_column_ == nullptr)
+            use_default_value_from_column = false;
+        else
+        {
+            if (const auto * const default_col = checkAndGetColumn<DefaultColumnType>(*default_values_column_))
+            {
+                default_values_column = default_col;
+                use_default_value_from_column = true;
+            }
+            else if (const auto * const default_col_const = checkAndGetColumnConst<DefaultColumnType>(default_values_column_.get()))
+            {
+                default_value = default_col_const->template getValue<DictionaryAttributeType>();
+                use_default_value_from_column = false;
+            }
+            else
+                throw Exception{"Type of default column is not the same as dictionary attribute type.", ErrorCodes::TYPE_MISMATCH};
+        }
+    }
+
+    DefaultValueType operator[](size_t row)
+    {
+        if (!use_default_value_from_column)
+            return static_cast<DefaultValueType>(default_value);
+
+        assert(default_values_column != nullptr);
+
+        if constexpr (std::is_same_v<DefaultColumnType, ColumnString>)
+            return default_values_column->getDataAt(row);
+        else
+            return default_values_column->getData()[row];
+    }
+private:
+    DictionaryAttributeType default_value;
+    const DefaultColumnType * default_values_column = nullptr;
+    bool use_default_value_from_column = false;
+};
+
+/**
+ * Returns ColumnVector data as PaddedPodArray.
+
+ * If column is constant parameter backup_storage is used to store values.
+ */
+template <typename T>
+static const PaddedPODArray<T> & getColumnVectorData(
+    const IDictionaryBase * dictionary,
+    const ColumnPtr column,
+    PaddedPODArray<T> & backup_storage)
+{
+    bool is_const_column = isColumnConst(*column);
+    auto full_column = column->convertToFullColumnIfConst();
+    auto vector_col = checkAndGetColumn<ColumnVector<T>>(full_column.get());
+
+    if (!vector_col)
+    {
+        throw Exception{ErrorCodes::TYPE_MISMATCH,
+            "{}: type mismatch: column has wrong type expected {}",
+            dictionary->getDictionaryID().getNameForLogs(),
+            TypeName<T>::get()};
+    }
+
+    if (is_const_column)
+    {
+        // With type conversion and const columns we need to use backup storage here
+        auto & data = vector_col->getData();
+        backup_storage.assign(data);
+
+        return backup_storage;
+    }
+    else
+    {
+        return vector_col->getData();
+    }
+}
+
+}
diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp
index 309bc64e179..2a872672aff 100644
--- a/src/Dictionaries/DictionarySourceHelpers.cpp
+++ b/src/Dictionaries/DictionarySourceHelpers.cpp
@@ -1,6 +1,5 @@
 #include "DictionarySourceHelpers.h"
 #include <Columns/ColumnsNumber.h>
-#include <Core/Block.h>
 #include <Core/ColumnWithTypeAndName.h>
 #include <DataStreams/IBlockOutputStream.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -13,44 +12,54 @@
 
 namespace DB
 {
-/// For simple key
-void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids)
+
+void formatBlock(BlockOutputStreamPtr & out, const Block & block)
 {
-    auto column = ColumnUInt64::create(ids.size());
-    memcpy(column->getData().data(), ids.data(), ids.size() * sizeof(ids.front()));
-
-    Block block{{std::move(column), std::make_shared<DataTypeUInt64>(), "id"}};
-
     out->writePrefix();
     out->write(block);
     out->writeSuffix();
     out->flush();
 }
 
-/// For composite key
-void formatKeys(
+/// For simple key
+
+Block blockForIds(
+    const DictionaryStructure & dict_struct,
+    const std::vector<UInt64> & ids)
+{
+    auto column = ColumnUInt64::create(ids.size());
+    memcpy(column->getData().data(), ids.data(), ids.size() * sizeof(ids.front()));
+
+    Block block{{std::move(column), std::make_shared<DataTypeUInt64>(), (*dict_struct.id).name}};
+
+    return block;
+}
+
+/// For composite key
+
+Block blockForKeys(
     const DictionaryStructure & dict_struct,
-    BlockOutputStreamPtr & out,
     const Columns & key_columns,
     const std::vector<size_t> & requested_rows)
 {
     Block block;
+
     for (size_t i = 0, size = key_columns.size(); i < size; ++i)
     {
         const ColumnPtr & source_column = key_columns[i];
-        auto filtered_column = source_column->cloneEmpty();
-        filtered_column->reserve(requested_rows.size());
+        size_t column_rows_size = source_column->size();
+
+        PaddedPODArray<UInt8> filter(column_rows_size, false);
 
         for (size_t idx : requested_rows)
-            filtered_column->insertFrom(*source_column, idx);
+            filter[idx] = true;
 
-        block.insert({std::move(filtered_column), (*dict_struct.key)[i].type, toString(i)});
+        auto filtered_column = source_column->filter(filter, requested_rows.size());
+
+        block.insert({std::move(filtered_column), (*dict_struct.key)[i].type, (*dict_struct.key)[i].name});
     }
 
-    out->writePrefix();
-    out->write(block);
-    out->writeSuffix();
-    out->flush();
+    return block;
 }
 
 Context copyContextAndApplySettings(
diff --git a/src/Dictionaries/DictionarySourceHelpers.h b/src/Dictionaries/DictionarySourceHelpers.h
index 3f42700d336..cad5441c66e 100644
--- a/src/Dictionaries/DictionarySourceHelpers.h
+++ b/src/Dictionaries/DictionarySourceHelpers.h
@@ -1,11 +1,15 @@
 #pragma once
 
 #include <vector>
-#include <Columns/IColumn.h>
+
 #include <common/types.h>
+
 #include <Poco/File.h>
 #include <Poco/Util/AbstractConfiguration.h>
 
+#include <Columns/IColumn.h>
+#include <Core/Block.h>
+
 namespace DB
 {
 class IBlockOutputStream;
@@ -16,13 +20,18 @@ class Context;
 
 /// Write keys to block output stream.
 
+void formatBlock(BlockOutputStreamPtr & out, const Block & block);
+
 /// For simple key
-void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids);
+
+Block blockForIds(
+    const DictionaryStructure & dict_struct,
+    const std::vector<UInt64> & ids);
 
 /// For composite key
-void formatKeys(
+
+Block blockForKeys(
     const DictionaryStructure & dict_struct,
-    BlockOutputStreamPtr & out,
     const Columns & key_columns,
     const std::vector<size_t> & requested_rows);
 
@@ -36,4 +45,5 @@ void applySettingsToContext(
     const std::string & config_prefix,
     Context & context,
     const Poco::Util::AbstractConfiguration & config);
+
 }
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 07b746d8365..408e4803b1b 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -2,6 +2,8 @@
 #include <Columns/IColumn.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeArray.h>
+#include <Functions/FunctionHelpers.h>
 #include <Formats/FormatSettings.h>
 #include <IO/WriteHelpers.h>
 #include <IO/Operators.h>
@@ -12,7 +14,6 @@
 #include <unordered_set>
 #include <ext/range.h>
 
-
 namespace DB
 {
 namespace ErrorCodes
@@ -41,54 +42,46 @@ namespace
 }
 
 
-AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
+AttributeUnderlyingType getAttributeUnderlyingType(const DataTypePtr & type)
 {
-    static const std::unordered_map<std::string, AttributeUnderlyingType> dictionary
+    auto type_index = type->getTypeId();
+
+    switch (type_index)
     {
-        {"UInt8", AttributeUnderlyingType::utUInt8},
-        {"UInt16", AttributeUnderlyingType::utUInt16},
-        {"UInt32", AttributeUnderlyingType::utUInt32},
-        {"UInt64", AttributeUnderlyingType::utUInt64},
-        {"UUID", AttributeUnderlyingType::utUInt128},
-        {"Int8", AttributeUnderlyingType::utInt8},
-        {"Int16", AttributeUnderlyingType::utInt16},
-        {"Int32", AttributeUnderlyingType::utInt32},
-        {"Int64", AttributeUnderlyingType::utInt64},
-        {"Float32", AttributeUnderlyingType::utFloat32},
-        {"Float64", AttributeUnderlyingType::utFloat64},
-        {"String", AttributeUnderlyingType::utString},
-        {"Date", AttributeUnderlyingType::utUInt16},
-    };
+        case TypeIndex::UInt8:          return AttributeUnderlyingType::utUInt8;
+        case TypeIndex::UInt16:         return AttributeUnderlyingType::utUInt16;
+        case TypeIndex::UInt32:         return AttributeUnderlyingType::utUInt32;
+        case TypeIndex::UInt64:         return AttributeUnderlyingType::utUInt64;
+        case TypeIndex::UInt128:        return AttributeUnderlyingType::utUInt128;
 
-    const auto it = dictionary.find(type);
-    if (it != std::end(dictionary))
-        return it->second;
+        case TypeIndex::Int8:           return AttributeUnderlyingType::utInt8;
+        case TypeIndex::Int16:          return AttributeUnderlyingType::utInt16;
+        case TypeIndex::Int32:          return AttributeUnderlyingType::utInt32;
+        case TypeIndex::Int64:          return AttributeUnderlyingType::utInt64;
 
-    /// Can contain arbitrary scale and timezone parameters.
-    if (type.find("DateTime64") == 0)
-        return AttributeUnderlyingType::utUInt64;
+        case TypeIndex::Float32:        return AttributeUnderlyingType::utFloat32;
+        case TypeIndex::Float64:        return AttributeUnderlyingType::utFloat64;
 
-    /// Can contain arbitrary timezone as parameter.
-    if (type.find("DateTime") == 0)
-        return AttributeUnderlyingType::utUInt32;
+        case TypeIndex::Decimal32:      return AttributeUnderlyingType::utDecimal32;
+        case TypeIndex::Decimal64:      return AttributeUnderlyingType::utDecimal64;
+        case TypeIndex::Decimal128:     return AttributeUnderlyingType::utDecimal128;
 
-    if (type.find("Decimal") == 0)
-    {
-        size_t start = strlen("Decimal");
-        if (type.find("32", start) == start)
-            return AttributeUnderlyingType::utDecimal32;
-        if (type.find("64", start) == start)
-            return AttributeUnderlyingType::utDecimal64;
-        if (type.find("128", start) == start)
-            return AttributeUnderlyingType::utDecimal128;
+        case TypeIndex::Date:           return AttributeUnderlyingType::utUInt16;
+        case TypeIndex::DateTime:       return AttributeUnderlyingType::utUInt32;
+        case TypeIndex::DateTime64:     return AttributeUnderlyingType::utUInt64;
+
+        case TypeIndex::UUID:           return AttributeUnderlyingType::utUInt128;
+
+        case TypeIndex::String:         return AttributeUnderlyingType::utString;
+
+        // Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
+        // TODO: This should be fixed by fully supporting arrays in dictionaries.
+        case TypeIndex::Array:          return AttributeUnderlyingType::utString;
+
+        default: break;
     }
 
-    // Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
-    // TODO: This should be fixed by fully supporting arrays in dictionaries.
-    if (type.find("Array") == 0)
-        return AttributeUnderlyingType::utString;
-
-    throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE};
+    throw Exception{"Unknown type for dictionary" + type->getName(), ErrorCodes::UNKNOWN_TYPE};
 }
 
 
@@ -142,17 +135,19 @@ DictionarySpecialAttribute::DictionarySpecialAttribute(const Poco::Util::Abstrac
 
 DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
 {
-    const auto has_id = config.has(config_prefix + ".id");
-    const auto has_key = config.has(config_prefix + ".key");
+    std::string structure_prefix = config_prefix + ".structure";
+
+    const auto has_id = config.has(structure_prefix + ".id");
+    const auto has_key = config.has(structure_prefix + ".key");
 
     if (has_key && has_id)
         throw Exception{"Only one of 'id' and 'key' should be specified", ErrorCodes::BAD_ARGUMENTS};
 
     if (has_id)
-        id.emplace(config, config_prefix + ".id");
+        id.emplace(config, structure_prefix + ".id");
     else if (has_key)
     {
-        key.emplace(getAttributes(config, config_prefix + ".key", false, false));
+        key.emplace(getAttributes(config, structure_prefix + ".key", false, false));
         if (key->empty())
             throw Exception{"Empty 'key' supplied", ErrorCodes::BAD_ARGUMENTS};
     }
@@ -165,11 +160,11 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
             throw Exception{"'id' cannot be empty", ErrorCodes::BAD_ARGUMENTS};
 
         const char * range_default_type = "Date";
-        if (config.has(config_prefix + ".range_min"))
-            range_min.emplace(makeDictionaryTypedSpecialAttribute(config, config_prefix + ".range_min", range_default_type));
+        if (config.has(structure_prefix + ".range_min"))
+            range_min.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_min", range_default_type));
 
-        if (config.has(config_prefix + ".range_max"))
-            range_max.emplace(makeDictionaryTypedSpecialAttribute(config, config_prefix + ".range_max", range_default_type));
+        if (config.has(structure_prefix + ".range_max"))
+            range_max.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_max", range_default_type));
 
         if (range_min.has_value() != range_max.has_value())
         {
@@ -201,10 +196,13 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
             has_expressions = true;
     }
 
-    attributes = getAttributes(config, config_prefix);
+    attributes = getAttributes(config, structure_prefix);
 
     if (attributes.empty())
         throw Exception{"Dictionary has no attributes defined", ErrorCodes::BAD_ARGUMENTS};
+
+    if (config.getBool(config_prefix + ".layout.ip_trie.access_to_key_from_attributes", false))
+        access_to_key_from_attributes = true;
 }
 
 
@@ -215,16 +213,43 @@ void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
 
     for (const auto i : ext::range(0, key_types.size()))
     {
-        const auto & expected_type = (*key)[i].type->getName();
-        const auto & actual_type = key_types[i]->getName();
+        const auto & expected_type = (*key)[i].type;
+        const auto & actual_type = key_types[i];
 
-        if (expected_type != actual_type)
-            throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found "
-                                + actual_type,
-                            ErrorCodes::TYPE_MISMATCH};
+        if (!areTypesEqual(expected_type, actual_type))
+            throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type->getName() + ", found "
+                    + actual_type->getName(),
+                ErrorCodes::TYPE_MISMATCH};
     }
 }
 
+const DictionaryAttribute & DictionaryStructure::getAttribute(const String & attribute_name) const
+{
+    auto find_iter
+        = std::find_if(attributes.begin(), attributes.end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
+    if (find_iter != attributes.end())
+        return *find_iter;
+
+    if (key && access_to_key_from_attributes)
+    {
+        find_iter = std::find_if(key->begin(), key->end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
+        if (find_iter != key->end())
+            return *find_iter;
+    }
+
+    throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
+}
+
+const DictionaryAttribute & DictionaryStructure::getAttribute(const String & attribute_name, const DataTypePtr & type) const
+{
+    const auto & attribute = getAttribute(attribute_name);
+
+    if (!areTypesEqual(attribute.type, type))
+        throw Exception{"Attribute type does not match, expected " + attribute.type->getName() + ", found " + type->getName(),
+            ErrorCodes::TYPE_MISMATCH};
+
+    return attribute;
+}
 
 std::string DictionaryStructure::getKeyDescription() const
 {
@@ -272,6 +297,21 @@ size_t DictionaryStructure::getKeySize() const
     });
 }
 
+Strings DictionaryStructure::getKeysNames() const
+{
+    if (id)
+        return { id->name };
+
+    const auto & key_attributes = *key;
+
+    Strings keys_names;
+    keys_names.reserve(key_attributes.size());
+
+    for (const auto & key_attribute : key_attributes)
+        keys_names.emplace_back(key_attribute.name);
+
+    return keys_names;
+}
 
 static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys)
 {
@@ -318,9 +358,20 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
         if ((range_min && name == range_min->name) || (range_max && name == range_max->name))
             continue;
 
+
         const auto type_string = config.getString(prefix + "type");
-        const auto type = DataTypeFactory::instance().get(type_string);
-        const auto underlying_type = getAttributeUnderlyingType(type_string);
+        const auto initial_type = DataTypeFactory::instance().get(type_string);
+        auto type = initial_type;
+        bool is_array = false;
+        bool is_nullable = false;
+
+        if (type->isNullable())
+        {
+            is_nullable = true;
+            type = removeNullable(type);
+        }
+
+        const auto underlying_type = getAttributeUnderlyingType(type);
 
         const auto expression = config.getString(prefix + "expression", "");
         if (!expression.empty())
@@ -333,7 +384,9 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
             try
             {
                 if (null_value_string.empty())
+                {
                     null_value = type->getDefault();
+                }
                 else
                 {
                     ReadBufferFromString null_value_buffer{null_value_string};
@@ -365,8 +418,18 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
 
         has_hierarchy = has_hierarchy || hierarchical;
 
-        res_attributes.emplace_back(
-            DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id});
+        res_attributes.emplace_back(DictionaryAttribute{
+            name,
+            underlying_type,
+            initial_type,
+            type,
+            expression,
+            null_value,
+            hierarchical,
+            injective,
+            is_object_id,
+            is_nullable,
+            is_array});
     }
 
     return res_attributes;
diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h
index 08885bc4d1b..0ff50868e26 100644
--- a/src/Dictionaries/DictionaryStructure.h
+++ b/src/Dictionaries/DictionaryStructure.h
@@ -42,7 +42,6 @@ std::string toString(const AttributeUnderlyingType type);
 /// Min and max lifetimes for a dictionary or it's entry
 using DictionaryLifetime = ExternalLoadableLifetime;
 
-
 /** Holds the description of a single dictionary attribute:
 *    - name, used for lookup into dictionary and source;
 *    - type, used in conjunction with DataTypeFactory and getAttributeUnderlyingTypeByname;
@@ -57,13 +56,74 @@ struct DictionaryAttribute final
     const std::string name;
     const AttributeUnderlyingType underlying_type;
     const DataTypePtr type;
+    const DataTypePtr nested_type;
     const std::string expression;
     const Field null_value;
     const bool hierarchical;
     const bool injective;
     const bool is_object_id;
+    const bool is_nullable;
+    const bool is_array;
 };
 
+template <typename Type>
+struct DictionaryAttributeType
+{
+    using AttributeType = Type;
+};
+
+template <typename F>
+void callOnDictionaryAttributeType(AttributeUnderlyingType type, F&& func)
+{
+    switch (type)
+    {
+        case AttributeUnderlyingType::utUInt8:
+            func(DictionaryAttributeType<UInt8>());
+            break;
+        case AttributeUnderlyingType::utUInt16:
+            func(DictionaryAttributeType<UInt16>());
+            break;
+        case AttributeUnderlyingType::utUInt32:
+            func(DictionaryAttributeType<UInt32>());
+            break;
+        case AttributeUnderlyingType::utUInt64:
+            func(DictionaryAttributeType<UInt64>());
+            break;
+        case AttributeUnderlyingType::utUInt128:
+            func(DictionaryAttributeType<UInt128>());
+            break;
+        case AttributeUnderlyingType::utInt8:
+            func(DictionaryAttributeType<Int8>());
+            break;
+        case AttributeUnderlyingType::utInt16:
+            func(DictionaryAttributeType<Int16>());
+            break;
+        case AttributeUnderlyingType::utInt32:
+            func(DictionaryAttributeType<Int32>());
+            break;
+        case AttributeUnderlyingType::utInt64:
+            func(DictionaryAttributeType<Int64>());
+            break;
+        case AttributeUnderlyingType::utFloat32:
+            func(DictionaryAttributeType<Float32>());
+            break;
+        case AttributeUnderlyingType::utFloat64:
+            func(DictionaryAttributeType<Float64>());
+            break;
+        case AttributeUnderlyingType::utString:
+            func(DictionaryAttributeType<String>());
+            break;
+        case AttributeUnderlyingType::utDecimal32:
+            func(DictionaryAttributeType<Decimal32>());
+            break;
+        case AttributeUnderlyingType::utDecimal64:
+            func(DictionaryAttributeType<Decimal64>());
+            break;
+        case AttributeUnderlyingType::utDecimal128:
+            func(DictionaryAttributeType<Decimal128>());
+            break;
+    }
+};
 
 struct DictionarySpecialAttribute final
 {
@@ -90,13 +150,17 @@ struct DictionaryStructure final
     std::optional<DictionaryTypedSpecialAttribute> range_min;
     std::optional<DictionaryTypedSpecialAttribute> range_max;
     bool has_expressions = false;
+    bool access_to_key_from_attributes = false;
 
     DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
 
     void validateKeyTypes(const DataTypes & key_types) const;
+    const DictionaryAttribute & getAttribute(const String & attribute_name) const;
+    const DictionaryAttribute & getAttribute(const String & attribute_name, const DataTypePtr & type) const;
     std::string getKeyDescription() const;
     bool isKeySizeFixed() const;
     size_t getKeySize() const;
+    Strings getKeysNames() const;
 
 private:
     /// range_min and range_max have to be parsed before this function call
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index 1fbfcc07215..b61f256b0cc 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -3,7 +3,10 @@
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 #include <Core/Defines.h>
-
+#include <Functions/FunctionHelpers.h>
+#include <Columns/ColumnNullable.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <Common/HashTable/HashSet.h>
 
 namespace DB
 {
@@ -35,11 +38,13 @@ DirectDictionary::DirectDictionary(
 void DirectDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
 {
     const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
+    DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
+
     getItemsImpl<UInt64, UInt64>(
         *hierarchical_attribute,
         ids,
-        [&](const size_t row, const UInt64 value) { out[row] = value; },
-        [&](const size_t) { return null_value; });
+        [&](const size_t row, const UInt64 value, bool) { out[row] = value; },
+        extractor);
 }
 
 
@@ -128,395 +133,101 @@ void DirectDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
     isInImpl(child_id, ancestor_ids, out);
 }
 
-
-#define DECLARE(TYPE) \
-    void DirectDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void DirectDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
+ColumnPtr DirectDictionary::getColumn(
+        const std::string & attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes &,
+        const ColumnPtr default_values_column) const
 {
+    ColumnPtr result;
+
+    PaddedPODArray<Key> backup_storage;
+    const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
+
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
 
-    const auto & null_value = std::get<StringRef>(attribute.null_values);
-    getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t) { return String(null_value.data, null_value.size); });
-}
+    auto keys_size = ids.size();
 
-#define DECLARE(TYPE) \
-    void DirectDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void DirectDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
-{
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
-}
-
-#define DECLARE(TYPE) \
-    void DirectDictionary::get##TYPE( \
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void DirectDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
-{
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    DirectDictionary::getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t) { return def; });
-}
-
-
-void DirectDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
-{
-    const auto & attribute = attributes.front();
-
-    switch (attribute.type)
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container * vec_null_map_to = nullptr;
+    if (attribute.is_nullable)
     {
-        case AttributeUnderlyingType::utUInt8:
-            has<UInt8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            has<UInt16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            has<UInt32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            has<UInt64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            has<UInt128>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            has<Int8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            has<Int16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            has<Int32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            has<Int64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            has<Float32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            has<Float64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utString:
-            has<String>(attribute, ids, out);
-            break;
-
-        case AttributeUnderlyingType::utDecimal32:
-            has<Decimal32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            has<Decimal64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            has<Decimal128>(attribute, ids, out);
-            break;
+        col_null_map_to = ColumnUInt8::create(keys_size, false);
+        vec_null_map_to = &col_null_map_to->getData();
     }
-}
 
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-void DirectDictionary::createAttributes()
-{
-    const auto size = dict_struct.attributes.size();
-    attributes.reserve(size);
-
-    for (const auto & attribute : dict_struct.attributes)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attribute_name_by_index.emplace(attributes.size(), attribute.name);
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        if (attribute.hierarchical)
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
+
+        if constexpr (std::is_same_v<AttributeType, String>)
         {
-            hierarchical_attribute = &attributes.back();
+            auto * out = column.get();
 
-            if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
-                throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
-        }
-    }
-}
-
-
-template <typename T>
-void DirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
-{
-    attribute.null_values = T(null_value.get<NearestFieldType<T>>());
-}
-
-template <>
-void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
-{
-    attribute.string_arena = std::make_unique<Arena>();
-    const String & string = null_value.get<String>();
-    const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-    attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
-}
-
-
-DirectDictionary::Attribute DirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
-{
-    Attribute attr{type, {}, {}, attr_name};
-
-    switch (type)
-    {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utString:
-            createAttributeImpl<String>(attr, null_value);
-            break;
-
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-    }
-
-    return attr;
-}
-
-
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-void DirectDictionary::getItemsImpl(
-    const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
-{
-    const auto rows = ext::size(ids);
-
-    HashMap<Key, OutputType> value_by_key;
-    for (const auto row : ext::range(0, rows))
-        value_by_key[ids[row]] = get_default(row);
-
-    std::vector<Key> to_load;
-    to_load.reserve(value_by_key.size());
-    for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
-        to_load.emplace_back(static_cast<Key>(it->getKey()));
-
-    auto stream = source_ptr->loadIds(to_load);
-    stream->readPrefix();
-
-    while (const auto block = stream->read())
-    {
-        const IColumn & id_column = *block.safeGetByPosition(0).column;
-
-        for (const size_t attribute_idx : ext::range(0, attributes.size()))
-        {
-            const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
-
-            for (const auto row_idx : ext::range(0, id_column.size()))
-            {
-                const auto key = id_column[row_idx].get<UInt64>();
-
-                if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
+            getItemsImpl<String, String>(
+                attribute,
+                ids,
+                [&](const size_t row, const String value, bool is_null)
                 {
-                    if (attribute.type == AttributeUnderlyingType::utFloat32)
-                    {
-                        value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].get<Float64>());
-                    }
-                    else
-                    {
-                        value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].get<AttributeType>());
-                    }
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
 
-                }
-            }
+                    const auto ref = StringRef{value};
+                    out->insertData(ref.data, ref.size);
+                },
+                default_value_extractor);
         }
-    }
-
-    stream->readSuffix();
-
-    for (const auto row : ext::range(0, rows))
-        set_value(row, value_by_key[ids[row]]);
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-}
-
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-void DirectDictionary::getItemsStringImpl(
-    const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
-{
-    const auto rows = ext::size(ids);
-
-    HashMap<Key, String> value_by_key;
-    for (const auto row : ext::range(0, rows))
-        value_by_key[ids[row]] = get_default(row);
-
-    std::vector<Key> to_load;
-    to_load.reserve(value_by_key.size());
-    for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
-        to_load.emplace_back(static_cast<Key>(it->getKey()));
-
-    auto stream = source_ptr->loadIds(to_load);
-    stream->readPrefix();
-
-    while (const auto block = stream->read())
-    {
-        const IColumn & id_column = *block.safeGetByPosition(0).column;
-
-        for (const size_t attribute_idx : ext::range(0, attributes.size()))
+        else
         {
+            auto & out = column->getData();
 
-            const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
-
-            for (const auto row_idx : ext::range(0, id_column.size()))
-            {
-                const auto key = id_column[row_idx].get<UInt64>();
-                if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
+            getItemsImpl<AttributeType, AttributeType>(
+                attribute,
+                ids,
+                [&](const size_t row, const auto value, bool is_null)
                 {
-                    const String from_source = attribute_column[row_idx].get<String>();
-                    value_by_key[key] = from_source;
-                }
-            }
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
+
+                    out[row] = value;
+                },
+                default_value_extractor);
         }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    if (attribute.is_nullable)
+    {
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
     }
-    stream->readSuffix();
 
-    for (const auto row : ext::range(0, rows))
-        set_value(row, value_by_key[ids[row]]);
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
+    return result;
 }
 
-
-const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
+ColumnUInt8::Ptr DirectDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
-    const auto it = attribute_index_by_name.find(attribute_name);
-    if (it == std::end(attribute_index_by_name))
-        throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
 
-    return attributes[it->second];
-}
+    auto result = ColumnUInt8::create(ext::size(ids));
+    auto& out = result->getData();
 
-
-template <typename T>
-void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
-{
     const auto rows = ext::size(ids);
 
     HashMap<Key, UInt8> has_key;
@@ -548,6 +259,137 @@ void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, P
         out[row] = has_key[ids[row]];
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
+
+    return result;
+}
+
+void DirectDictionary::createAttributes()
+{
+    const auto size = dict_struct.attributes.size();
+    attributes.reserve(size);
+
+    for (const auto & attribute : dict_struct.attributes)
+    {
+        attribute_index_by_name.emplace(attribute.name, attributes.size());
+        attribute_name_by_index.emplace(attributes.size(), attribute.name);
+        attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
+
+        if (attribute.hierarchical)
+        {
+            hierarchical_attribute = &attributes.back();
+
+            if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
+                throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
+        }
+    }
+}
+
+
+template <typename T>
+void DirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
+{
+    attribute.null_values = T(null_value.get<NearestFieldType<T>>());
+}
+
+template <>
+void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
+{
+    attribute.string_arena = std::make_unique<Arena>();
+    const String & string = null_value.get<String>();
+    const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
+    attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
+}
+
+
+DirectDictionary::Attribute DirectDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & attr_name)
+{
+    Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
+
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
+
+    return attr;
+}
+
+
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
+void DirectDictionary::getItemsImpl(
+    const Attribute & attribute,
+    const PaddedPODArray<Key> & ids,
+    ValueSetter && set_value,
+    DefaultValueExtractor & default_value_extractor) const
+{
+    const auto rows = ext::size(ids);
+
+    HashMap<Key, OutputType> value_by_key;
+    HashSet<Key> value_is_null;
+
+    for (const auto row : ext::range(0, rows))
+    {
+        auto key = ids[row];
+        value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
+    }
+
+    std::vector<Key> to_load;
+    to_load.reserve(value_by_key.size());
+    for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
+        to_load.emplace_back(static_cast<Key>(it->getKey()));
+
+    auto stream = source_ptr->loadIds(to_load);
+    stream->readPrefix();
+
+    const auto it = attribute_index_by_name.find(attribute.name);
+    if (it == std::end(attribute_index_by_name))
+        throw Exception{full_name + ": no such attribute '" + attribute.name + "'", ErrorCodes::BAD_ARGUMENTS};
+
+    auto attribute_index = it->second;
+
+    while (const auto block = stream->read())
+    {
+        const IColumn & id_column = *block.safeGetByPosition(0).column;
+
+        const IColumn & attribute_column = *block.safeGetByPosition(attribute_index + 1).column;
+
+        for (const auto row_idx : ext::range(0, id_column.size()))
+        {
+            const auto key = id_column[row_idx].get<UInt64>();
+
+            if (value_by_key.find(key) != value_by_key.end())
+            {
+                auto value = attribute_column[row_idx];
+
+                if (value.isNull())
+                    value_is_null.insert(key);
+                else
+                    value_by_key[key] = static_cast<OutputType>(value.get<NearestFieldType<AttributeType>>());
+            }
+        }
+    }
+
+    stream->readSuffix();
+
+    for (const auto row : ext::range(0, rows))
+    {
+        auto key = ids[row];
+        set_value(row, value_by_key[key], value_is_null.find(key) != nullptr);
+    }
+
+    query_count.fetch_add(rows, std::memory_order_relaxed);
+}
+
+const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
+{
+    const auto it = attribute_index_by_name.find(attribute_name);
+    if (it == std::end(attribute_index_by_name))
+        throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
+
+    return attributes[it->second];
 }
 
 
diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h
index 18ef5224a8a..c6f4c15556b 100644
--- a/src/Dictionaries/DirectDictionary.h
+++ b/src/Dictionaries/DirectDictionary.h
@@ -13,11 +13,10 @@
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
-
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
-using BlockPtr = std::shared_ptr<Block>;
 
 class DirectDictionary final : public IDictionary
 {
@@ -65,76 +64,16 @@ public:
     void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
     void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_values_column) const override;
 
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void
-    getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
-        const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
-
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -142,6 +81,7 @@ private:
     struct Attribute final
     {
         AttributeUnderlyingType type;
+        bool is_nullable;
         std::variant<
             UInt8,
             UInt16,
@@ -168,23 +108,17 @@ private:
     template <typename T>
     void addAttributeSize(const Attribute & attribute);
 
-    void calculateBytesAllocated();
-
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
+    static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & name);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-    void getItemsStringImpl(
-        const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
-
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
-        const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
-
-    template <typename T>
-    void resize(Attribute & attribute, const Key id);
+        const Attribute & attribute,
+        const PaddedPODArray<Key> & ids,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
@@ -193,9 +127,6 @@ private:
 
     const Attribute & getAttribute(const std::string & attribute_name) const;
 
-    template <typename T>
-    void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
-
     Key getValueOrNullByKey(const Key & to_find) const;
 
     template <typename ChildType, typename AncestorType>
diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index f2abe10f970..37dde600adf 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -26,6 +26,8 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int DICTIONARY_ACCESS_DENIED;
+    extern const int UNSUPPORTED_METHOD;
+    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
 }
 
 namespace
@@ -65,18 +67,34 @@ ExecutableDictionarySource::ExecutableDictionarySource(
     const Context & context_)
     : log(&Poco::Logger::get("ExecutableDictionarySource"))
     , dict_struct{dict_struct_}
+    , implicit_key{config.getBool(config_prefix + ".implicit_key", false)}
     , command{config.getString(config_prefix + ".command")}
     , update_field{config.getString(config_prefix + ".update_field", "")}
     , format{config.getString(config_prefix + ".format")}
     , sample_block{sample_block_}
     , context(context_)
 {
+    /// Remove keys from sample_block for implicit_key dictionary because
+    /// these columns will not be returned from source
+    /// Implicit key means that the source script will return only values,
+    /// and the correspondence to the requested keys is determined implicitly - by the order of rows in the result.
+    if (implicit_key)
+    {
+        auto keys_names = dict_struct.getKeysNames();
+
+        for (auto & key_name : keys_names)
+        {
+            size_t key_column_position_in_block = sample_block.getPositionByName(key_name);
+            sample_block.erase(key_column_position_in_block);
+        }
+    }
 }
 
 ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionarySource & other)
     : log(&Poco::Logger::get("ExecutableDictionarySource"))
     , update_time{other.update_time}
     , dict_struct{other.dict_struct}
+    , implicit_key{other.implicit_key}
     , command{other.command}
     , update_field{other.update_field}
     , format{other.format}
@@ -87,6 +105,9 @@ ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionar
 
 BlockInputStreamPtr ExecutableDictionarySource::loadAll()
 {
+    if (implicit_key)
+        throw Exception("ExecutableDictionarySource with implicit_key does not support loadAll method", ErrorCodes::UNSUPPORTED_METHOD);
+
     LOG_TRACE(log, "loadAll {}", toString());
     auto process = ShellCommand::execute(command);
     auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
@@ -95,6 +116,9 @@ BlockInputStreamPtr ExecutableDictionarySource::loadAll()
 
 BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll()
 {
+    if (implicit_key)
+        throw Exception("ExecutableDictionarySource with implicit_key does not support loadUpdatedAll method", ErrorCodes::UNSUPPORTED_METHOD);
+
     time_t new_update_time = time(nullptr);
     SCOPE_EXIT(update_time = new_update_time);
 
@@ -162,6 +186,9 @@ namespace
             if (!err.empty())
                 LOG_ERROR(log, "Having stderr: {}", err);
 
+            if (thread.joinable())
+                thread.join();
+
             command->wait();
         }
 
@@ -173,6 +200,77 @@ namespace
         std::function<void(WriteBufferFromFile &)> send_data;
         ThreadFromGlobalPool thread;
     };
+
+    /** A stream, adds additional columns to each block that it will read from inner stream.
+     *
+     *  block_to_add rows size must be equal to final sum rows size of all inner stream blocks.
+     */
+    class BlockInputStreamWithAdditionalColumns final: public IBlockInputStream
+    {
+    public:
+        BlockInputStreamWithAdditionalColumns(
+            Block block_to_add_,
+            std::unique_ptr<IBlockInputStream>&& stream_)
+            : block_to_add(std::move(block_to_add_))
+            , stream(std::move(stream_))
+        {
+        }
+
+        Block getHeader() const override
+        {
+            auto header = stream->getHeader();
+
+            if (header)
+            {
+                for (Int64 i = static_cast<Int64>(block_to_add.columns() - 1); i >= 0; --i)
+                    header.insert(0, block_to_add.getByPosition(i).cloneEmpty());
+            }
+
+            return header;
+        }
+
+        Block readImpl() override
+        {
+            auto block = stream->read();
+
+            if (block)
+            {
+                auto block_rows = block.rows();
+
+                auto cut_block = block_to_add.cloneWithCutColumns(current_range_index, block_rows);
+
+                if (cut_block.rows() != block_rows)
+                    throw Exception(
+                        "Number of rows in block to add after cut must equal to number of rows in block from inner stream",
+                        ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
+
+                for (Int64 i = static_cast<Int64>(cut_block.columns() - 1); i >= 0; --i)
+                    block.insert(0, cut_block.getByPosition(i));
+
+                current_range_index += block_rows;
+            }
+
+            return block;
+        }
+
+        void readPrefix() override
+        {
+            stream->readPrefix();
+        }
+
+        void readSuffix() override
+        {
+            stream->readSuffix();
+        }
+
+        String getName() const override { return "BlockInputStreamWithAdditionalColumns"; }
+
+    private:
+        Block block_to_add;
+        std::unique_ptr<IBlockInputStream> stream;
+        size_t current_range_index = 0;
+    };
+
 }
 
 
@@ -180,28 +278,44 @@ BlockInputStreamPtr ExecutableDictionarySource::loadIds(const std::vector<UInt64
 {
     LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
 
-    return std::make_shared<BlockInputStreamWithBackgroundThread>(
+    auto block = blockForIds(dict_struct, ids);
+
+    auto stream = std::make_unique<BlockInputStreamWithBackgroundThread>(
         context, format, sample_block, command, log,
-        [&ids, this](WriteBufferFromFile & out) mutable
+        [block, this](WriteBufferFromFile & out) mutable
         {
-            auto output_stream = context.getOutputStream(format, out, sample_block);
-            formatIDs(output_stream, ids);
+            auto output_stream = context.getOutputStream(format, out, block.cloneEmpty());
+            formatBlock(output_stream, block);
             out.close();
         });
+
+    if (implicit_key)
+    {
+        return std::make_shared<BlockInputStreamWithAdditionalColumns>(block, std::move(stream));
+    }
+    else
+        return std::shared_ptr<BlockInputStreamWithBackgroundThread>(stream.release());
 }
 
 BlockInputStreamPtr ExecutableDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
 {
     LOG_TRACE(log, "loadKeys {} size = {}", toString(), requested_rows.size());
 
-    return std::make_shared<BlockInputStreamWithBackgroundThread>(
+    auto block = blockForKeys(dict_struct, key_columns, requested_rows);
+
+    auto stream = std::make_unique<BlockInputStreamWithBackgroundThread>(
         context, format, sample_block, command, log,
-        [key_columns, &requested_rows, this](WriteBufferFromFile & out) mutable
+        [block, this](WriteBufferFromFile & out) mutable
         {
-            auto output_stream = context.getOutputStream(format, out, sample_block);
-            formatKeys(dict_struct, output_stream, key_columns, requested_rows);
+            auto output_stream = context.getOutputStream(format, out, block.cloneEmpty());
+            formatBlock(output_stream, block);
             out.close();
         });
+
+    if (implicit_key)
+        return std::make_shared<BlockInputStreamWithAdditionalColumns>(block, std::move(stream));
+    else
+        return std::shared_ptr<BlockInputStreamWithBackgroundThread>(stream.release());
 }
 
 bool ExecutableDictionarySource::isModified() const
diff --git a/src/Dictionaries/ExecutableDictionarySource.h b/src/Dictionaries/ExecutableDictionarySource.h
index f28d71ca5e3..7aa203f267b 100644
--- a/src/Dictionaries/ExecutableDictionarySource.h
+++ b/src/Dictionaries/ExecutableDictionarySource.h
@@ -49,9 +49,9 @@ public:
 
 private:
     Poco::Logger * log;
-
     time_t update_time = 0;
     const DictionaryStructure dict_struct;
+    bool implicit_key;
     const std::string command;
     const std::string update_field;
     const std::string format;
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 47ffdaeb5bd..f4f50a69598 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -1,9 +1,14 @@
 #include "FlatDictionary.h"
+
+#include <Core/Defines.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include <IO/WriteHelpers.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnNullable.h>
+#include <Functions/FunctionHelpers.h>
+
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
-#include <Core/Defines.h>
-
 
 namespace DB
 {
@@ -44,12 +49,13 @@ FlatDictionary::FlatDictionary(
 void FlatDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
 {
     const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
+    DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
 
     getItemsImpl<UInt64, UInt64>(
         *hierarchical_attribute,
         ids,
         [&](const size_t row, const UInt64 value) { out[row] = value; },
-        [&](const size_t) { return null_value; });
+        extractor);
 }
 
 
@@ -102,186 +108,103 @@ void FlatDictionary::isInConstantVector(const Key child_id, const PaddedPODArray
     isInImpl(child_id, ancestor_ids, out);
 }
 
-
-#define DECLARE(TYPE) \
-    void FlatDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void FlatDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
+ColumnPtr FlatDictionary::getColumn(
+        const std::string & attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes &,
+        const ColumnPtr default_values_column) const
 {
+    ColumnPtr result;
+
+    PaddedPODArray<Key> backup_storage;
+    const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
+
+    auto size = ids.size();
+
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-    const auto & null_value = std::get<StringRef>(attribute.null_values);
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return null_value; });
-}
-
-#define DECLARE(TYPE) \
-    void FlatDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void FlatDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
-{
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t row) { return def->getDataAt(row); });
-}
-
-#define DECLARE(TYPE) \
-    void FlatDictionary::get##TYPE( \
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void FlatDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
-{
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    FlatDictionary::getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return StringRef{def}; });
-}
-
-
-void FlatDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
-{
-    const auto & attribute = attributes.front();
-
-    switch (attribute.type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            has<UInt8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            has<UInt16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            has<UInt32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            has<UInt64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            has<UInt128>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            has<Int8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            has<Int16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            has<Int32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            has<Int64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            has<Float32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            has<Float64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utString:
-            has<String>(attribute, ids, out);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        case AttributeUnderlyingType::utDecimal32:
-            has<Decimal32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            has<Decimal64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            has<Decimal128>(attribute, ids, out);
-            break;
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, size);
+
+        if constexpr (std::is_same_v<ValueType, StringRef>)
+        {
+            auto * out = column.get();
+
+            getItemsImpl<ValueType, ValueType>(
+                attribute,
+                ids,
+                [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                default_value_extractor);
+        }
+        else
+        {
+            auto & out = column->getData();
+
+            getItemsImpl<ValueType, ValueType>(
+                attribute,
+                ids,
+                [&](const size_t row, const auto value) { out[row] = value; },
+                default_value_extractor);
+        }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    if (attribute.nullable_set)
+    {
+        ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
+        ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
+
+        for (size_t row = 0; row < ids.size(); ++row)
+        {
+            auto id = ids[row];
+
+            if (attribute.nullable_set->find(id) != nullptr)
+                vec_null_map_to[row] = true;
+        }
+
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
     }
+
+    return result;
 }
 
 
+ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
+{
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
+
+    auto result = ColumnUInt8::create(ext::size(ids));
+    auto& out = result->getData();
+
+    const auto ids_count = ext::size(ids);
+
+    for (const auto i : ext::range(0, ids_count))
+    {
+        const auto id = ids[i];
+        out[i] = id < loaded_ids.size() && loaded_ids[id];
+    }
+
+    query_count.fetch_add(ids_count, std::memory_order_relaxed);
+
+    return result;
+}
+
 void FlatDictionary::createAttributes()
 {
     const auto size = dict_struct.attributes.size();
@@ -290,7 +213,7 @@ void FlatDictionary::createAttributes()
     for (const auto & attribute : dict_struct.attributes)
     {
         attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
+        attributes.push_back(createAttribute(attribute, attribute.null_value));
 
         if (attribute.hierarchical)
         {
@@ -416,6 +339,14 @@ void FlatDictionary::addAttributeSize(const Attribute & attribute)
     bucket_count = array_ref.capacity();
 }
 
+template <>
+void FlatDictionary::addAttributeSize<String>(const Attribute & attribute)
+{
+    const auto & array_ref = std::get<ContainerType<StringRef>>(attribute.arrays);
+    bytes_allocated += sizeof(PaddedPODArray<StringRef>) + array_ref.allocated_bytes();
+    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+    bucket_count = array_ref.capacity();
+}
 
 void FlatDictionary::calculateBytesAllocated()
 {
@@ -423,60 +354,15 @@ void FlatDictionary::calculateBytesAllocated()
 
     for (const auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
-            case AttributeUnderlyingType::utUInt8:
-                addAttributeSize<UInt8>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                addAttributeSize<UInt16>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                addAttributeSize<UInt32>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                addAttributeSize<UInt64>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                addAttributeSize<UInt128>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                addAttributeSize<Int8>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                addAttributeSize<Int16>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                addAttributeSize<Int32>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                addAttributeSize<Int64>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                addAttributeSize<Float32>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                addAttributeSize<Float64>(attribute);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
 
-            case AttributeUnderlyingType::utDecimal32:
-                addAttributeSize<Decimal32>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                addAttributeSize<Decimal64>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                addAttributeSize<Decimal128>(attribute);
-                break;
+            addAttributeSize<AttributeType>(attribute);
+        };
 
-            case AttributeUnderlyingType::utString:
-            {
-                addAttributeSize<StringRef>(attribute);
-                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-
-                break;
-            }
-        }
+        callOnDictionaryAttributeType(attribute.type, type_call);
     }
 }
 
@@ -500,67 +386,31 @@ void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Fi
 }
 
 
-FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
+FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
 {
-    Attribute attr{type, {}, {}, {}};
+    auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
+    Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}};
 
-    switch (type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utString:
-            createAttributeImpl<String>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-    }
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
+
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
 
     return attr;
 }
 
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void FlatDictionary::getItemsImpl(
-    const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const PaddedPODArray<Key> & ids,
+    ValueSetter && set_value,
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays);
     const auto rows = ext::size(ids);
@@ -568,7 +418,7 @@ void FlatDictionary::getItemsImpl(
     for (const auto row : ext::range(0, rows))
     {
         const auto id = ids[row];
-        set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : get_default(row));
+        set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : default_value_extractor[row]);
     }
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
@@ -592,7 +442,6 @@ void FlatDictionary::resize(Attribute & attribute, const Key id)
 template <typename T>
 void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const T & value)
 {
-    resize<T>(attribute, id);
     auto & array = std::get<ContainerType<T>>(attribute.arrays);
     array[id] = value;
     loaded_ids[id] = true;
@@ -601,64 +450,38 @@ void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id,
 template <>
 void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String & value)
 {
-    resize<StringRef>(attribute, id);
     const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
-    auto & array = std::get<ContainerType<StringRef>>(attribute.arrays);
-    array[id] = StringRef{string_in_arena, value.size()};
-    loaded_ids[id] = true;
+    setAttributeValueImpl(attribute, id, StringRef{string_in_arena, value.size()});
 }
 
 void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
 {
-    switch (attribute.type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
-            break;
-        case AttributeUnderlyingType::utInt8:
-            setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt16:
-            setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt32:
-            setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt64:
-            setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
-            break;
-        case AttributeUnderlyingType::utString:
-            setAttributeValueImpl<String>(attribute, id, value.get<String>());
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ResizeType = std::conditional_t<std::is_same_v<AttributeType, String>, StringRef, AttributeType>;
 
-        case AttributeUnderlyingType::utDecimal32:
-            setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
-            break;
-    }
+        resize<ResizeType>(attribute, id);
+
+        if (attribute.nullable_set)
+        {
+            if (value.isNull())
+            {
+                attribute.nullable_set->insert(id);
+                loaded_ids[id] = true;
+                return;
+            }
+            else
+            {
+                attribute.nullable_set->erase(id);
+            }
+        }
+
+        setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
 }
 
 
@@ -671,27 +494,13 @@ const FlatDictionary::Attribute & FlatDictionary::getAttribute(const std::string
     return attributes[it->second];
 }
 
-
-template <typename T>
-void FlatDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
-{
-    const auto ids_count = ext::size(ids);
-
-    for (const auto i : ext::range(0, ids_count))
-    {
-        const auto id = ids[i];
-        out[i] = id < loaded_ids.size() && loaded_ids[id];
-    }
-
-    query_count.fetch_add(ids_count, std::memory_order_relaxed);
-}
-
-
 PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
 {
     const auto ids_count = ext::size(loaded_ids);
 
     PaddedPODArray<Key> ids;
+    ids.reserve(ids_count);
+
     for (auto idx : ext::range(0, ids_count))
         if (loaded_ids[idx])
             ids.push_back(idx);
@@ -700,7 +509,7 @@ PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
 
 BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<FlatDictionary, Key>;
+    using BlockInputStreamType = DictionaryBlockInputStream<Key>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
 }
 
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index 2f51c1f5c1b..23bfa3d37b5 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -3,20 +3,25 @@
 #include <atomic>
 #include <variant>
 #include <vector>
+#include <optional>
+
+#include <Common/HashTable/HashSet.h>
+#include <Common/Arena.h>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
-#include <Common/Arena.h>
+#include <Columns/ColumnArray.h>
+#include <DataTypes/IDataType.h>
 #include <Core/Block.h>
 #include <ext/range.h>
 #include <ext/size.h>
+
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
-
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
-using BlockPtr = std::shared_ptr<Block>;
 
 class FlatDictionary final : public IDictionary
 {
@@ -66,76 +71,16 @@ public:
     void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
     void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_values_column) const override;
 
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void
-    getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
-        const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
-
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -143,9 +88,13 @@ private:
     template <typename Value>
     using ContainerType = PaddedPODArray<Value>;
 
+    using NullableSet = HashSet<Key, DefaultHash<Key>>;
+
     struct Attribute final
     {
         AttributeUnderlyingType type;
+        std::optional<NullableSet> nullable_set;
+
         std::variant<
             UInt8,
             UInt16,
@@ -180,6 +129,7 @@ private:
             ContainerType<Float64>,
             ContainerType<StringRef>>
             arrays;
+
         std::unique_ptr<Arena> string_arena;
     };
 
@@ -194,13 +144,16 @@ private:
     void calculateBytesAllocated();
 
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
+    static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
-        const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
+        const Attribute & attribute,
+        const PaddedPODArray<Key> & ids,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     void resize(Attribute & attribute, const Key id);
@@ -212,9 +165,6 @@ private:
 
     const Attribute & getAttribute(const std::string & attribute_name) const;
 
-    template <typename T>
-    void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
-
     template <typename ChildType, typename AncestorType>
     void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
 
diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp
index da5623bcdb0..ddcac117e58 100644
--- a/src/Dictionaries/HTTPDictionarySource.cpp
+++ b/src/Dictionaries/HTTPDictionarySource.cpp
@@ -131,11 +131,13 @@ BlockInputStreamPtr HTTPDictionarySource::loadIds(const std::vector<UInt64> & id
 {
     LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
 
-    ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr)
+    auto block = blockForIds(dict_struct, ids);
+
+    ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
     {
         WriteBufferFromOStream out_buffer(ostr);
         auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
-        formatIDs(output_stream, ids);
+        formatBlock(output_stream, block);
     };
 
     Poco::URI uri(url);
@@ -150,11 +152,13 @@ BlockInputStreamPtr HTTPDictionarySource::loadKeys(const Columns & key_columns,
 {
     LOG_TRACE(log, "loadKeys {} size = {}", toString(), requested_rows.size());
 
-    ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr)
+    auto block = blockForKeys(dict_struct, key_columns, requested_rows);
+
+    ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
     {
         WriteBufferFromOStream out_buffer(ostr);
         auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
-        formatKeys(dict_struct, output_stream, key_columns, requested_rows);
+        formatBlock(output_stream, block);
     };
 
     Poco::URI uri(url);
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 1439036bf8d..b51f2414142 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -4,7 +4,10 @@
 #include "DictionaryFactory.h"
 #include "ClickHouseDictionarySource.h"
 #include <Core/Defines.h>
-
+#include <Functions/FunctionHelpers.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnNullable.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 namespace
 {
@@ -57,12 +60,13 @@ HashedDictionary::HashedDictionary(
 void HashedDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
 {
     const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
+    DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
 
     getItemsImpl<UInt64, UInt64>(
         *hierarchical_attribute,
         ids,
         [&](const size_t row, const UInt64 value) { out[row] = value; },
-        [&](const size_t) { return null_value; });
+        extractor);
 }
 
 
@@ -125,183 +129,105 @@ void HashedDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
     isInImpl(child_id, ancestor_ids, out);
 }
 
-
-#define DECLARE(TYPE) \
-    void HashedDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
-        const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void HashedDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
+ColumnPtr HashedDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr & result_type,
+    const Columns & key_columns,
+    const DataTypes &,
+    const ColumnPtr default_values_column) const
 {
+    ColumnPtr result;
+
+    PaddedPODArray<Key> backup_storage;
+    const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
+
+    auto size = ids.size();
+
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-    const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
+    auto type_call = [&](const auto & dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return null_value; });
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, size);
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto * out = column.get();
+
+            getItemsImpl<StringRef, StringRef>(
+                attribute,
+                ids,
+                [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                default_value_extractor);
+        }
+        else
+        {
+            auto & out = column->getData();
+
+            getItemsImpl<AttributeType, AttributeType>(
+                attribute,
+                ids,
+                [&](const size_t row, const auto value) { return out[row] = value; },
+                default_value_extractor);
+        }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    if (attribute.nullable_set)
+    {
+        ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
+        ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
+
+        for (size_t row = 0; row < ids.size(); ++row)
+        {
+            auto id = ids[row];
+
+            if (attribute.nullable_set->find(id) != nullptr)
+                vec_null_map_to[row] = true;
+        }
+
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
+    }
+
+    return result;
 }
 
-#define DECLARE(TYPE) \
-    void HashedDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void HashedDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
+ColumnUInt8::Ptr HashedDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
 
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t row) { return def->getDataAt(row); });
-}
+    size_t ids_count = ext::size(ids);
 
-#define DECLARE(TYPE) \
-    void HashedDictionary::get##TYPE( \
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
+    auto result = ColumnUInt8::create(ext::size(ids));
+    auto& out = result->getData();
 
-void HashedDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
-{
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return StringRef{def}; });
-}
-
-void HashedDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
-{
     const auto & attribute = attributes.front();
 
-    switch (attribute.type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            has<UInt8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            has<UInt16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            has<UInt32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            has<UInt64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            has<UInt128>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            has<Int8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            has<Int16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            has<Int32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            has<Int64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            has<Float32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            has<Float64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utString:
-            has<StringRef>(attribute, ids, out);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        has<AttributeType>(attribute, ids, out);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            has<Decimal32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            has<Decimal64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            has<Decimal128>(attribute, ids, out);
-            break;
-    }
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    query_count.fetch_add(ids_count, std::memory_order_relaxed);
+
+    return result;
 }
 
 void HashedDictionary::createAttributes()
@@ -312,7 +238,7 @@ void HashedDictionary::createAttributes()
     for (const auto & attribute : dict_struct.attributes)
     {
         attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
+        attributes.push_back(createAttribute(attribute, attribute.null_value));
 
         if (attribute.hierarchical)
         {
@@ -429,6 +355,13 @@ void HashedDictionary::resize(Attribute & attribute, size_t added_rows)
         map_ref->resize(added_rows);
     }
 }
+
+template <>
+void HashedDictionary::resize<String>(Attribute & attribute, size_t added_rows)
+{
+    resize<StringRef>(attribute, added_rows);
+}
+
 void HashedDictionary::resize(size_t added_rows)
 {
     if (!added_rows)
@@ -436,56 +369,14 @@ void HashedDictionary::resize(size_t added_rows)
 
     for (auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
-            case AttributeUnderlyingType::utUInt8:
-                resize<UInt8>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                resize<UInt16>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                resize<UInt32>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                resize<UInt64>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                resize<UInt128>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                resize<Int8>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                resize<Int16>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                resize<Int32>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                resize<Int64>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                resize<Float32>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                resize<Float64>(attribute, added_rows);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
+            resize<AttributeType>(attribute, added_rows);
+        };
 
-            case AttributeUnderlyingType::utDecimal32:
-                resize<Decimal32>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                resize<Decimal64>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                resize<Decimal128>(attribute, added_rows);
-                break;
-
-            case AttributeUnderlyingType::utString:
-                resize<StringRef>(attribute, added_rows);
-                break;
-        }
+        callOnDictionaryAttributeType(attribute.type, type_call);
     }
 }
 
@@ -562,66 +453,27 @@ void HashedDictionary::addAttributeSize(const Attribute & attribute)
     }
 }
 
+template <>
+void HashedDictionary::addAttributeSize<String>(const Attribute & attribute)
+{
+    addAttributeSize<StringRef>(attribute);
+    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+}
+
 void HashedDictionary::calculateBytesAllocated()
 {
     bytes_allocated += attributes.size() * sizeof(attributes.front());
 
     for (const auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
-            case AttributeUnderlyingType::utUInt8:
-                addAttributeSize<UInt8>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                addAttributeSize<UInt16>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                addAttributeSize<UInt32>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                addAttributeSize<UInt64>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                addAttributeSize<UInt128>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                addAttributeSize<Int8>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                addAttributeSize<Int16>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                addAttributeSize<Int32>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                addAttributeSize<Int64>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                addAttributeSize<Float32>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                addAttributeSize<Float64>(attribute);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
+            addAttributeSize<AttributeType>(attribute);
+        };
 
-            case AttributeUnderlyingType::utDecimal32:
-                addAttributeSize<Decimal32>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                addAttributeSize<Decimal64>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                addAttributeSize<Decimal128>(attribute);
-                break;
-
-            case AttributeUnderlyingType::utString:
-            {
-                addAttributeSize<StringRef>(attribute);
-                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-
-                break;
-            }
-        }
+        callOnDictionaryAttributeType(attribute.type, type_call);
     }
 }
 
@@ -635,93 +487,66 @@ void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field &
         attribute.sparse_maps = std::make_unique<SparseCollectionType<T>>();
 }
 
-HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
+template <>
+void HashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
 {
-    Attribute attr{type, {}, {}, {}, {}};
+    attribute.string_arena = std::make_unique<Arena>();
+    const String & string = null_value.get<String>();
+    const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
+    attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
 
-    switch (type)
+    if (!sparse)
+        attribute.maps = std::make_unique<CollectionType<StringRef>>();
+    else
+        attribute.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
+}
+
+HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
+{
+    auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
+    Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}, {}};
+
+    auto type_call = [&, this](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-
-        case AttributeUnderlyingType::utString:
-        {
-            attr.null_values = null_value.get<String>();
-            if (!sparse)
-                attr.maps = std::make_unique<CollectionType<StringRef>>();
-            else
-                attr.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
-            attr.string_arena = std::make_unique<Arena>();
-            break;
-        }
-    }
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
 
     return attr;
 }
 
 
-template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
 void HashedDictionary::getItemsAttrImpl(
-    const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
+    const MapType & attr,
+    const PaddedPODArray<Key> & ids,
+    ValueSetter && set_value,
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto rows = ext::size(ids);
 
     for (const auto i : ext::range(0, rows))
     {
         const auto it = attr.find(ids[i]);
-        set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : get_default(i));
+        set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : default_value_extractor[i]);
     }
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
 }
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void HashedDictionary::getItemsImpl(
-    const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const PaddedPODArray<Key> & ids,
+    ValueSetter && set_value,
+    DefaultValueExtractor & default_value_extractor) const
 {
     if (!sparse)
-        return getItemsAttrImpl<OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, get_default);
-    return getItemsAttrImpl<OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, get_default);
+        return getItemsAttrImpl<AttributeType, OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, default_value_extractor);
+    return getItemsAttrImpl<AttributeType, OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, default_value_extractor);
 }
 
 
@@ -740,58 +565,41 @@ bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id
     }
 }
 
+template <>
+bool HashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String value)
+{
+    const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
+    return setAttributeValueImpl<StringRef>(attribute, id, StringRef{string_in_arena, value.size()});
+}
+
 bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
 {
-    switch (attribute.type)
+    bool result = false;
+
+    auto type_call = [&, this](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            return setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt16:
-            return setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt32:
-            return setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt64:
-            return setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt128:
-            return setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
-        case AttributeUnderlyingType::utInt8:
-            return setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
-        case AttributeUnderlyingType::utInt16:
-            return setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
-        case AttributeUnderlyingType::utInt32:
-            return setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
-        case AttributeUnderlyingType::utInt64:
-            return setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
-        case AttributeUnderlyingType::utFloat32:
-            return setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
-        case AttributeUnderlyingType::utFloat64:
-            return setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            return setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
-        case AttributeUnderlyingType::utDecimal64:
-            return setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
-        case AttributeUnderlyingType::utDecimal128:
-            return setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
-
-        case AttributeUnderlyingType::utString:
+        if (attribute.nullable_set)
         {
-            const auto & string = value.get<String>();
-            const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-            if (!sparse)
+            if (value.isNull())
             {
-                auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
-                return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
+                result = attribute.nullable_set->insert(id).second;
+                return;
             }
             else
             {
-                auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps);
-                return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
+                attribute.nullable_set->erase(id);
             }
         }
-    }
 
-    throw Exception{"Invalid attribute type", ErrorCodes::BAD_ARGUMENTS};
+        result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
 const HashedDictionary::Attribute & HashedDictionary::getAttribute(const std::string & attribute_name) const
@@ -810,9 +618,18 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
     const auto rows = ext::size(ids);
 
     for (const auto i : ext::range(0, rows))
+    {
         out[i] = attr.find(ids[i]) != nullptr;
 
-    query_count.fetch_add(rows, std::memory_order_relaxed);
+        if (attribute.nullable_set && !out[i])
+            out[i] = attribute.nullable_set->find(ids[i]) != nullptr;
+    }
+}
+
+template <>
+void HashedDictionary::has<String>(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
+{
+    has<StringRef>(attribute, ids, out);
 }
 
 template <typename T, typename AttrType>
@@ -833,50 +650,39 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute &
     return getIdsAttrImpl<T>(*std::get<SparseCollectionPtrType<T>>(attribute.sparse_maps));
 }
 
+template <>
+PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds<String>(const Attribute & attribute) const
+{
+    return getIds<StringRef>(attribute);
+}
+
 PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
 {
     const auto & attribute = attributes.front();
+    PaddedPODArray<HashedDictionary::Key> result;
 
-    switch (attribute.type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            return getIds<UInt8>(attribute);
-        case AttributeUnderlyingType::utUInt16:
-            return getIds<UInt16>(attribute);
-        case AttributeUnderlyingType::utUInt32:
-            return getIds<UInt32>(attribute);
-        case AttributeUnderlyingType::utUInt64:
-            return getIds<UInt64>(attribute);
-        case AttributeUnderlyingType::utUInt128:
-            return getIds<UInt128>(attribute);
-        case AttributeUnderlyingType::utInt8:
-            return getIds<Int8>(attribute);
-        case AttributeUnderlyingType::utInt16:
-            return getIds<Int16>(attribute);
-        case AttributeUnderlyingType::utInt32:
-            return getIds<Int32>(attribute);
-        case AttributeUnderlyingType::utInt64:
-            return getIds<Int64>(attribute);
-        case AttributeUnderlyingType::utFloat32:
-            return getIds<Float32>(attribute);
-        case AttributeUnderlyingType::utFloat64:
-            return getIds<Float64>(attribute);
-        case AttributeUnderlyingType::utString:
-            return getIds<StringRef>(attribute);
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        /// TODO: Check if order is satisfied
+        result = getIds<AttributeType>(attribute);
 
-        case AttributeUnderlyingType::utDecimal32:
-            return getIds<Decimal32>(attribute);
-        case AttributeUnderlyingType::utDecimal64:
-            return getIds<Decimal64>(attribute);
-        case AttributeUnderlyingType::utDecimal128:
-            return getIds<Decimal128>(attribute);
-    }
-    return PaddedPODArray<Key>();
+        if (attribute.nullable_set)
+        {
+            for (const auto& value: *attribute.nullable_set)
+                result.push_back(value.getKey());
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
 BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<HashedDictionary, Key>;
+    using BlockInputStreamType = DictionaryBlockInputStream<Key>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
 }
 
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index b9c4ab138b8..97b329a8b25 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -3,15 +3,18 @@
 #include <atomic>
 #include <memory>
 #include <variant>
+#include <optional>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Core/Block.h>
 #include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashSet.h>
 #include <sparsehash/sparse_hash_map>
 #include <ext/range.h>
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
+#include "DictionaryHelpers.h"
 
 /** This dictionary stores all content in a hash table in memory
   * (a separate Key -> Value map for each attribute)
@@ -20,7 +23,6 @@
 
 namespace DB
 {
-using BlockPtr = std::shared_ptr<Block>;
 
 class HashedDictionary final : public IDictionary
 {
@@ -66,77 +68,16 @@ public:
 
     void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_values_column) const override;
 
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void
-    getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
-        const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) \
-        const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
-
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     void isInVectorVector(
         const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
@@ -162,9 +103,13 @@ private:
     template <typename Value>
     using SparseCollectionPtrType = std::unique_ptr<SparseCollectionType<Value>>;
 
+    using NullableSet = HashSet<Key, DefaultHash<Key>>;
+
     struct Attribute final
     {
         AttributeUnderlyingType type;
+        std::optional<NullableSet> nullable_set;
+
         std::variant<
             UInt8,
             UInt16,
@@ -180,7 +125,7 @@ private:
             Decimal128,
             Float32,
             Float64,
-            String>
+            StringRef>
             null_values;
         std::variant<
             CollectionPtrType<UInt8>,
@@ -235,14 +180,21 @@ private:
     template <typename T>
     void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
+    Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
-    template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsAttrImpl(
-        const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+        const MapType & attr,
+        const PaddedPODArray<Key> & ids,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
+
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
-        const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
+        const Attribute & attribute,
+        const PaddedPODArray<Key> & ids,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     bool setAttributeValueImpl(Attribute & attribute, const Key id, const T value);
diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h
index 6bc8d32295a..e0e4c7eb880 100644
--- a/src/Dictionaries/IDictionary.h
+++ b/src/Dictionaries/IDictionary.h
@@ -10,6 +10,8 @@
 #include <common/StringRef.h>
 #include "IDictionarySource.h"
 #include <Dictionaries/DictionaryStructure.h>
+#include <DataTypes/IDataType.h>
+#include <Columns/ColumnsNumber.h>
 
 #include <chrono>
 #include <memory>
@@ -20,15 +22,31 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
-    extern const int TYPE_MISMATCH;
 }
 
 struct IDictionaryBase;
 using DictionaryPtr = std::unique_ptr<IDictionaryBase>;
 
-struct DictionaryStructure;
-class ColumnString;
+/** DictionaryKeyType provides IDictionary client information about
+  * which key type is supported by dictionary.
+  *
+  * Simple is for dictionaries that support UInt64 key column.
+  *
+  * Complex is for dictionaries that support any combination of key columns.
+  *
+  * Range is for dictionary that support combination of UInt64 key column,
+  * and numeric representable range key column.
+  */
+enum class DictionaryKeyType
+{
+    simple,
+    complex,
+    range
+};
 
+/**
+ * Base class for Dictionaries implementation.
+ */
 struct IDictionaryBase : public IExternalLoadable
 {
     using Key = UInt64;
@@ -85,6 +103,33 @@ struct IDictionaryBase : public IExternalLoadable
 
     virtual bool isInjective(const std::string & attribute_name) const = 0;
 
+    /** Subclass must provide key type that is supported by dictionary.
+      * Client will use that key type to provide valid key columns for `getColumn` and `has` functions.
+      */
+    virtual DictionaryKeyType getKeyType() const = 0;
+
+    /** Subclass must validate key columns and keys types
+      * and return column representation of dictionary attribute.
+      *
+      * Parameter default_values_column must be used to provide default values
+      * for keys that are not in dictionary. If null pointer is passed,
+      * then default attribute value must be used.
+      */
+    virtual ColumnPtr getColumn(
+        const std::string & attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_values_column) const = 0;
+
+    /** Subclass must validate key columns and key types and return ColumnUInt8 that
+      * is bitmask representation of is key in dictionary or not.
+      * If key is in dictionary then value of associated row will be 1, otherwise 0.
+      */
+    virtual ColumnUInt8::Ptr hasKeys(
+        const Columns & key_columns,
+        const DataTypes & key_types) const = 0;
+
     virtual BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const = 0;
 
     bool supportUpdates() const override { return true; }
@@ -115,7 +160,6 @@ protected:
     const String full_name;
 };
 
-
 struct IDictionary : IDictionaryBase
 {
     IDictionary(const StorageID & dict_id_) : IDictionaryBase(dict_id_) {}
@@ -124,8 +168,7 @@ struct IDictionary : IDictionaryBase
 
     virtual void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const = 0;
 
-    virtual void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const = 0;
-
+    /// TODO: Rewrite
     /// Methods for hierarchy.
 
     virtual void isInVectorVector(
@@ -157,14 +200,4 @@ struct IDictionary : IDictionaryBase
     }
 };
 
-/// Implicit conversions in dictGet functions is disabled.
-inline void checkAttributeType(const IDictionaryBase * dictionary, const std::string & attribute_name,
-                               AttributeUnderlyingType attribute_type, AttributeUnderlyingType to)
-{
-    if (attribute_type != to)
-        throw Exception{ErrorCodes::TYPE_MISMATCH, "{}: type mismatch: attribute {} has type {}, expected {}",
-                        dictionary->getDictionaryID().getNameForLogs(),
-                        attribute_name, toString(attribute_type), toString(to)};
-}
-
 }
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index d2bbf6ec2fa..6447c76ee73 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -8,6 +8,7 @@
 #include <Common/typeid_cast.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include <IO/WriteIntText.h>
 #include <Poco/ByteOrder.h>
 #include <Common/formatIPv6.h>
@@ -16,6 +17,7 @@
 #include <ext/range.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
+#include <Functions/FunctionHelpers.h>
 
 namespace DB
 {
@@ -245,188 +247,90 @@ IPAddressDictionary::IPAddressDictionary(
     const DictionaryStructure & dict_struct_,
     DictionarySourcePtr source_ptr_,
     const DictionaryLifetime dict_lifetime_,
-    bool require_nonempty_,
-    bool access_to_key_from_attributes_)
+    bool require_nonempty_)
     : IDictionaryBase(dict_id_)
     , dict_struct(dict_struct_)
     , source_ptr{std::move(source_ptr_)}
     , dict_lifetime(dict_lifetime_)
     , require_nonempty(require_nonempty_)
-    , access_to_key_from_attributes(access_to_key_from_attributes_)
+    , access_to_key_from_attributes(dict_struct_.access_to_key_from_attributes)
     , logger(&Poco::Logger::get("IPAddressDictionary"))
 {
-    if (access_to_key_from_attributes)
-    {
-        dict_struct.attributes.emplace_back(dict_struct.key->front());
-    }
-
     createAttributes();
 
     loadData();
     calculateBytesAllocated();
 }
 
-#define DECLARE(TYPE) \
-    void IPAddressDictionary::get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
-    { \
-        validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void IPAddressDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
-{
-    validateKeyTypes(key_types);
-
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return null_value; });
-}
-
-#define DECLARE(TYPE) \
-    void IPAddressDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void IPAddressDictionary::getString(
+ColumnPtr IPAddressDictionary::getColumn(
     const std::string & attribute_name,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnString * const def,
-    ColumnString * const out) const
+    const ColumnPtr default_values_column) const
 {
     validateKeyTypes(key_types);
 
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t row) { return def->getDataAt(row); });
-}
-
-#define DECLARE(TYPE) \
-    void IPAddressDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void IPAddressDictionary::getString(
-    const std::string & attribute_name,
-    const Columns & key_columns,
-    const DataTypes & key_types,
-    const String & def,
-    ColumnString * const out) const
-{
-    validateKeyTypes(key_types);
+    ColumnPtr result;
 
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return StringRef{def}; });
+    auto size = key_columns.front()->size();
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+
+        const auto & null_value = std::get<AttributeType>(attribute.null_values);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, size);
+
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto * out = column.get();
+
+            getItemsImpl<ValueType, ValueType>(
+                attribute,
+                key_columns,
+                [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                default_value_extractor);
+        }
+        else
+        {
+            auto & out = column->getData();
+
+            getItemsImpl<ValueType, ValueType>(
+                attribute,
+                key_columns,
+                [&](const size_t row, const auto value) { return out[row] = value; },
+                default_value_extractor);
+        }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
-void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
+
+ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
 {
     validateKeyTypes(key_types);
 
     const auto first_column = key_columns.front();
     const auto rows = first_column->size();
+
+    auto result = ColumnUInt8::create(rows);
+    auto& out = result->getData();
+
     if (first_column->isNumeric())
     {
         uint8_t addrv6_buf[IPV6_BINARY_LENGTH];
@@ -451,22 +355,29 @@ void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key
     }
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
+
+    return result;
 }
 
 void IPAddressDictionary::createAttributes()
 {
-    const auto size = dict_struct.attributes.size();
-    attributes.reserve(size);
-
-    for (const auto & attribute : dict_struct.attributes)
+    auto create_attributes_from_dictionary_attributes = [this](const std::vector<DictionaryAttribute> & dict_attrs)
     {
-        attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
+        attributes.reserve(attributes.size() + dict_attrs.size());
+        for (const auto & attribute : dict_attrs)
+        {
+            attribute_index_by_name.emplace(attribute.name, attributes.size());
+            attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
 
-        if (attribute.hierarchical)
-            throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
-                            ErrorCodes::TYPE_MISMATCH};
-    }
+            if (attribute.hierarchical)
+                throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
+                                ErrorCodes::TYPE_MISMATCH};
+        }
+    };
+
+    create_attributes_from_dictionary_attributes(dict_struct.attributes);
+    if (access_to_key_from_attributes)
+        create_attributes_from_dictionary_attributes(*dict_struct.key);
 }
 
 void IPAddressDictionary::loadData()
@@ -484,19 +395,13 @@ void IPAddressDictionary::loadData()
         element_count += rows;
 
         const ColumnPtr key_column_ptr = block.safeGetByPosition(0).column;
-
-        size_t attributes_size = dict_struct.attributes.size();
-        if (access_to_key_from_attributes)
-        {
-            /// last attribute contains key and will be filled in code below
-            attributes_size--;
-        }
-        const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size),
+        const auto attribute_column_ptrs = ext::map<Columns>(
+            ext::range(0, dict_struct.attributes.size()),
             [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx + 1).column; });
 
         for (const auto row : ext::range(0, rows))
         {
-            for (const auto attribute_idx : ext::range(0, attribute_column_ptrs.size()))
+            for (const auto attribute_idx : ext::range(0, dict_struct.attributes.size()))
             {
                 const auto & attribute_column = *attribute_column_ptrs[attribute_idx];
                 auto & attribute = attributes[attribute_idx];
@@ -652,6 +557,13 @@ void IPAddressDictionary::addAttributeSize(const Attribute & attribute)
     bucket_count = vec.size();
 }
 
+template <>
+void IPAddressDictionary::addAttributeSize<String>(const Attribute & attribute)
+{
+    addAttributeSize<StringRef>(attribute);
+    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+}
+
 void IPAddressDictionary::calculateBytesAllocated()
 {
     if (auto * ipv4_col = std::get_if<IPv4Container>(&ip_column))
@@ -669,64 +581,18 @@ void IPAddressDictionary::calculateBytesAllocated()
 
     for (const auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
-            case AttributeUnderlyingType::utUInt8:
-                addAttributeSize<UInt8>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                addAttributeSize<UInt16>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                addAttributeSize<UInt32>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                addAttributeSize<UInt64>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                addAttributeSize<UInt128>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                addAttributeSize<Int8>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                addAttributeSize<Int16>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                addAttributeSize<Int32>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                addAttributeSize<Int64>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                addAttributeSize<Float32>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                addAttributeSize<Float64>(attribute);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
 
-            case AttributeUnderlyingType::utDecimal32:
-                addAttributeSize<Decimal32>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                addAttributeSize<Decimal64>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                addAttributeSize<Decimal128>(attribute);
-                break;
+            addAttributeSize<AttributeType>(attribute);
+        };
 
-            case AttributeUnderlyingType::utString:
-            {
-                addAttributeSize<StringRef>(attribute);
-                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-
-                break;
-            }
-        }
+        callOnDictionaryAttributeType(attribute.type, type_call);
     }
 }
 
-
 template <typename T>
 void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
 {
@@ -734,65 +600,27 @@ void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field
     attribute.maps.emplace<ContainerType<T>>();
 }
 
+template <>
+void IPAddressDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
+{
+    attribute.null_values = null_value.isNull() ? String() : null_value.get<String>();
+    attribute.maps.emplace<ContainerType<StringRef>>();
+    attribute.string_arena = std::make_unique<Arena>();
+}
+
 IPAddressDictionary::Attribute IPAddressDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
 {
     Attribute attr{type, {}, {}, {}};
 
-    switch (type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utString:
-        {
-
-            attr.null_values = null_value.isNull() ? String() : null_value.get<String>();
-            attr.maps.emplace<ContainerType<StringRef>>();
-            attr.string_arena = std::make_unique<Arena>();
-            break;
-        }
-    }
+    callOnDictionaryAttributeType(type, type_call);
 
     return attr;
 }
@@ -802,9 +630,12 @@ const uint8_t * IPAddressDictionary::getIPv6FromOffset(const IPAddressDictionary
     return reinterpret_cast<const uint8_t *>(&ipv6_col[i * IPV6_BINARY_LENGTH]);
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
-    const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const Columns & key_columns,
+    ValueSetter && set_value,
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto first_column = key_columns.front();
     const auto rows = first_column->size();
@@ -841,7 +672,7 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
                 set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
             }
             else
-                set_value(i, get_default(i));
+                set_value(i, default_value_extractor[i]);
         }
         return;
     }
@@ -876,13 +707,16 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
             mask_column[*found_it] == mask))
             set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
         else
-            set_value(i, get_default(i));
+            set_value(i, default_value_extractor[i]);
     }
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void IPAddressDictionary::getItemsImpl(
-    const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const Columns & key_columns,
+    ValueSetter && set_value,
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto first_column = key_columns.front();
     const auto rows = first_column->size();
@@ -891,7 +725,7 @@ void IPAddressDictionary::getItemsImpl(
     if (unlikely(key_columns.size() == 2))
     {
         getItemsByTwoKeyColumnsImpl<AttributeType, OutputType>(
-            attribute, key_columns, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default));
+            attribute, key_columns, std::forward<ValueSetter>(set_value), default_value_extractor);
         query_count.fetch_add(rows, std::memory_order_relaxed);
         return;
     }
@@ -909,7 +743,7 @@ void IPAddressDictionary::getItemsImpl(
             if (found != ipNotFound())
                 set_value(i, static_cast<OutputType>(vec[*found]));
             else
-                set_value(i, get_default(i));
+                set_value(i, default_value_extractor[i]);
         }
     }
     else
@@ -924,7 +758,7 @@ void IPAddressDictionary::getItemsImpl(
             if (found != ipNotFound())
                 set_value(i, static_cast<OutputType>(vec[*found]));
             else
-                set_value(i, get_default(i));
+                set_value(i, default_value_extractor[i]);
         }
     }
 
@@ -940,45 +774,24 @@ void IPAddressDictionary::setAttributeValueImpl(Attribute & attribute, const T v
 
 void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field & value)
 {
-    switch (attribute.type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            return setAttributeValueImpl<UInt8>(attribute, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt16:
-            return setAttributeValueImpl<UInt16>(attribute, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt32:
-            return setAttributeValueImpl<UInt32>(attribute, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt64:
-            return setAttributeValueImpl<UInt64>(attribute, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt128:
-            return setAttributeValueImpl<UInt128>(attribute, value.get<UInt128>());
-        case AttributeUnderlyingType::utInt8:
-            return setAttributeValueImpl<Int8>(attribute, value.get<Int64>());
-        case AttributeUnderlyingType::utInt16:
-            return setAttributeValueImpl<Int16>(attribute, value.get<Int64>());
-        case AttributeUnderlyingType::utInt32:
-            return setAttributeValueImpl<Int32>(attribute, value.get<Int64>());
-        case AttributeUnderlyingType::utInt64:
-            return setAttributeValueImpl<Int64>(attribute, value.get<Int64>());
-        case AttributeUnderlyingType::utFloat32:
-            return setAttributeValueImpl<Float32>(attribute, value.get<Float64>());
-        case AttributeUnderlyingType::utFloat64:
-            return setAttributeValueImpl<Float64>(attribute, value.get<Float64>());
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            return setAttributeValueImpl<Decimal32>(attribute, value.get<Decimal32>());
-        case AttributeUnderlyingType::utDecimal64:
-            return setAttributeValueImpl<Decimal64>(attribute, value.get<Decimal64>());
-        case AttributeUnderlyingType::utDecimal128:
-            return setAttributeValueImpl<Decimal128>(attribute, value.get<Decimal128>());
-
-        case AttributeUnderlyingType::utString:
+        if constexpr (std::is_same_v<AttributeType, String>)
         {
             const auto & string = value.get<String>();
             const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-            return setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
+            setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
         }
-    }
+        else
+        {
+            setAttributeValueImpl<AttributeType>(attribute, value.get<NearestFieldType<AttributeType>>());
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
 }
 
 const IPAddressDictionary::Attribute & IPAddressDictionary::getAttribute(const std::string & attribute_name) const
@@ -1045,7 +858,7 @@ static auto keyViewGetter()
 
 BlockInputStreamPtr IPAddressDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<IPAddressDictionary, UInt64>;
+    using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
 
 
     const bool is_ipv4 = std::get_if<IPv4Container>(&ip_column) != nullptr;
@@ -1171,11 +984,8 @@ void registerDictionaryTrie(DictionaryFactory & factory)
         const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
         const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
 
-        const auto & layout_prefix = config_prefix + ".layout.ip_trie";
-        const bool access_to_key_from_attributes = config.getBool(layout_prefix + ".access_to_key_from_attributes", false);
         // This is specialised dictionary for storing IPv4 and IPv6 prefixes.
-        return std::make_unique<IPAddressDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime,
-                                                     require_nonempty, access_to_key_from_attributes);
+        return std::make_unique<IPAddressDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
     };
     factory.registerLayout("ip_trie", create_layout, true);
 }
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index 2009141ebcc..6c5cfa765e8 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -16,6 +16,7 @@
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
@@ -27,8 +28,7 @@ public:
         const DictionaryStructure & dict_struct_,
         DictionarySourcePtr source_ptr_,
         const DictionaryLifetime dict_lifetime_,
-        bool require_nonempty_,
-        bool access_to_key_from_attributes_);
+        bool require_nonempty_);
 
     std::string getKeyDescription() const { return key_description; }
 
@@ -46,8 +46,7 @@ public:
 
     std::shared_ptr<const IExternalLoadable> clone() const override
     {
-        return std::make_shared<IPAddressDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime,
-                                                     require_nonempty, access_to_key_from_attributes);
+        return std::make_shared<IPAddressDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty);
     }
 
     const IDictionarySource * getSource() const override { return source_ptr.get(); }
@@ -61,91 +60,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnString * const def,
-        ColumnString * const out) const;
+        const ColumnPtr default_values_column) const override;
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes & key_types,
-        const String & def,
-        ColumnString * const out) const;
-
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -211,17 +135,23 @@ private:
     void calculateBytesAllocated();
 
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
+    static Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsByTwoKeyColumnsImpl(
-        const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
+        const Attribute & attribute,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-    void
-    getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
+    void getItemsImpl(
+        const Attribute & attribute,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const T value);
diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp
index 04eadbfc0ce..e0d0fa0a0e6 100644
--- a/src/Dictionaries/PolygonDictionary.cpp
+++ b/src/Dictionaries/PolygonDictionary.cpp
@@ -5,6 +5,8 @@
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
 #include <DataTypes/DataTypeArray.h>
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 #include <numeric>
 
@@ -92,6 +94,61 @@ bool IPolygonDictionary::isInjective(const std::string &) const
     return false;
 }
 
+ColumnPtr IPolygonDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr & result_type,
+    const Columns & key_columns,
+    const DataTypes &,
+    const ColumnPtr default_values_column) const
+{
+    ColumnPtr result;
+
+    const auto index = getAttributeIndex(attribute_name);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
+
+    auto keys_size = key_columns.front()->size();
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+
+        const auto & null_value = std::get<AttributeType>(null_values[index]);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
+            auto * out = column.get();
+
+            getItemsImpl<String, StringRef>(
+                index,
+                key_columns,
+                [&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
+                default_value_extractor);
+        }
+        else
+        {
+            auto & out = column->getData();
+
+            getItemsImpl<AttributeType, AttributeType>(
+                index,
+                key_columns,
+                [&](const size_t row, const auto value) { return out[row] = value; },
+                default_value_extractor);
+        }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
+
+    return result;
+}
+
 BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const
 {
     // TODO: In order for this to work one would first have to support retrieving arrays from dictionaries.
@@ -255,8 +312,12 @@ std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const C
     return result;
 }
 
-void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, PaddedPODArray<UInt8> & out) const
+ColumnUInt8::Ptr IPolygonDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
+    auto size = key_columns.front()->size();
+    auto result = ColumnUInt8::create(size);
+    auto& out = result->getData();
+
     size_t row = 0;
     for (const auto & pt : extractPoints(key_columns))
     {
@@ -266,6 +327,8 @@ void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, Pad
     }
 
     query_count.fetch_add(row, std::memory_order_relaxed);
+
+    return result;
 }
 
 size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const
@@ -276,152 +339,12 @@ size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name)
     return it->second;
 }
 
-#define DECLARE(TYPE) \
-    void IPolygonDictionary::get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto ind = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(null_values[ind]); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            ind, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t) { return null_value; }); \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-void IPolygonDictionary::getString(
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const
-{
-    const auto ind = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
-
-    const auto & null_value = StringRef{std::get<String>(null_values[ind])};
-
-    getItemsImpl<String, StringRef>(
-            ind,
-            key_columns,
-            [&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
-            [&](const size_t) { return null_value; });
-}
-
-#define DECLARE(TYPE) \
-    void IPolygonDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes &, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto ind = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            ind, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t row) { return def[row]; }); \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-void IPolygonDictionary::getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes &,
-        const ColumnString * const def,
-        ColumnString * const out) const
-{
-    const auto ind = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<String, StringRef>(
-            ind,
-            key_columns,
-            [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-            [&](const size_t row) { return def->getDataAt(row); });
-}
-
-#define DECLARE(TYPE) \
-    void IPolygonDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes &, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto ind = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            ind, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-void IPolygonDictionary::getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes &,
-        const String & def,
-        ColumnString * const out) const
-{
-    const auto ind = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<String, StringRef>(
-            ind,
-            key_columns,
-            [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-            [&](const size_t) { return StringRef{def}; });
-}
-
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void IPolygonDictionary::getItemsImpl(
-        size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
+        size_t attribute_ind,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const
 {
     const auto points = extractPoints(key_columns);
 
@@ -437,7 +360,7 @@ void IPolygonDictionary::getItemsImpl(
         id = ids[id];
         if (!found)
         {
-            set_value(i, static_cast<OutputType>(get_default(i)));
+            set_value(i, static_cast<OutputType>(default_value_extractor[i]));
             continue;
         }
         if constexpr (std::is_same<AttributeType, String>::value)
diff --git a/src/Dictionaries/PolygonDictionary.h b/src/Dictionaries/PolygonDictionary.h
index 75114cff435..a0ea189c10a 100644
--- a/src/Dictionaries/PolygonDictionary.h
+++ b/src/Dictionaries/PolygonDictionary.h
@@ -12,6 +12,7 @@
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
@@ -78,101 +79,19 @@ public:
 
     bool isInjective(const std::string & attribute_name) const override;
 
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
+
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_values_column) const override;
+
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
+
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
-
-    /** Functions used to retrieve attributes of specific type by key. */
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const;
-        DECLARE(UInt8)
-        DECLARE(UInt16)
-        DECLARE(UInt32)
-        DECLARE(UInt64)
-        DECLARE(UInt128)
-        DECLARE(Int8)
-        DECLARE(Int16)
-        DECLARE(Int32)
-        DECLARE(Int64)
-        DECLARE(Float32)
-        DECLARE(Float64)
-        DECLARE(Decimal32)
-        DECLARE(Decimal64)
-        DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes &, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-        DECLARE(UInt8)
-        DECLARE(UInt16)
-        DECLARE(UInt32)
-        DECLARE(UInt64)
-        DECLARE(UInt128)
-        DECLARE(Int8)
-        DECLARE(Int16)
-        DECLARE(Int32)
-        DECLARE(Int64)
-        DECLARE(Float32)
-        DECLARE(Float64)
-        DECLARE(Decimal32)
-        DECLARE(Decimal64)
-        DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-            const std::string & attribute_name,
-            const Columns & key_columns,
-            const DataTypes &,
-            const ColumnString * const def,
-            ColumnString * const out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes &, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const;
-        DECLARE(UInt8)
-        DECLARE(UInt16)
-        DECLARE(UInt32)
-        DECLARE(UInt64)
-        DECLARE(UInt128)
-        DECLARE(Int8)
-        DECLARE(Int16)
-        DECLARE(Int32)
-        DECLARE(Int64)
-        DECLARE(Float32)
-        DECLARE(Float64)
-        DECLARE(Decimal32)
-        DECLARE(Decimal64)
-        DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-            const std::string & attribute_name,
-            const Columns & key_columns,
-            const DataTypes & key_types,
-            const String & def,
-            ColumnString * const out) const;
-
-    /** Checks whether or not a point can be found in one of the polygons in the dictionary.
-     *  The check is performed for multiple points represented by columns of their x and y coordinates.
-     *  The boolean result is written to out.
-     */
-    // TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override.
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
-
     /** Single coordinate type. */
     using Coord = Float32;
     /** A two-dimensional point in Euclidean coordinates. */
@@ -224,8 +143,12 @@ private:
     void appendNullValue(AttributeUnderlyingType type, const Field & value);
 
     /** Helper function for retrieving the value of an attribute by key. */
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-    void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
+    void getItemsImpl(
+        size_t attribute_ind,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
     /** A mapping from the names of the attributes to their index in the two vectors defined below. */
     std::map<std::string, size_t> attribute_index_by_name;
diff --git a/src/Dictionaries/RangeDictionaryBlockInputStream.h b/src/Dictionaries/RangeDictionaryBlockInputStream.h
index a2353051e5d..ccd77d49e0f 100644
--- a/src/Dictionaries/RangeDictionaryBlockInputStream.h
+++ b/src/Dictionaries/RangeDictionaryBlockInputStream.h
@@ -37,26 +37,6 @@ protected:
     Block getBlock(size_t start, size_t length) const override;
 
 private:
-    template <typename Type>
-    using DictionaryGetter = void (DictionaryType::*)(
-        const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const;
-
-    template <typename Type>
-    using DictionaryDecimalGetter = void (DictionaryType::*)(
-        const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, DecimalPaddedPODArray<Type> &) const;
-
-    template <typename AttributeType, typename Getter>
-    ColumnPtr getColumnFromAttribute(
-        Getter getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const PaddedPODArray<Int64> & dates,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & concrete_dictionary) const;
-    ColumnPtr getColumnFromAttributeString(
-        const PaddedPODArray<Key> & ids_to_fill,
-        const PaddedPODArray<Int64> & dates,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & concrete_dictionary) const;
     template <typename T>
     ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
 
@@ -67,7 +47,8 @@ private:
         const std::string & default_name,
         const std::unordered_set<std::string> & column_names_set,
         const PaddedPODArray<T> & values,
-        ColumnsWithTypeAndName & columns) const;
+        ColumnsWithTypeAndName & columns,
+        bool force = false) const;
 
     Block fillBlock(
         const PaddedPODArray<Key> & ids_to_fill,
@@ -122,41 +103,6 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(
     return fillBlock(block_ids, block_start_dates, block_end_dates);
 }
 
-template <typename DictionaryType, typename RangeType, typename Key>
-template <typename AttributeType, typename Getter>
-ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttribute(
-    Getter getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const PaddedPODArray<Int64> & dates,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & concrete_dictionary) const
-{
-    if constexpr (IsDecimalNumber<AttributeType>)
-    {
-        auto column = ColumnDecimal<AttributeType>::create(ids_to_fill.size(), 0); /// NOTE: There's wrong scale here, but it's unused.
-        (concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column->getData());
-        return column;
-    }
-    else
-    {
-        auto column_vector = ColumnVector<AttributeType>::create(ids_to_fill.size());
-        (concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column_vector->getData());
-        return column_vector;
-    }
-}
-
-template <typename DictionaryType, typename RangeType, typename Key>
-ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttributeString(
-    const PaddedPODArray<Key> & ids_to_fill,
-    const PaddedPODArray<Int64> & dates,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & concrete_dictionary) const
-{
-    auto column_string = ColumnString::create();
-    concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get());
-    return column_string;
-}
-
 template <typename DictionaryType, typename RangeType, typename Key>
 template <typename T>
 ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
@@ -168,7 +114,6 @@ ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getCo
     return column_vector;
 }
 
-
 template <typename DictionaryType, typename RangeType, typename Key>
 template <typename DictionarySpecialAttributeType, typename T>
 void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn(
@@ -177,13 +122,14 @@ void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecial
     const std::string & default_name,
     const std::unordered_set<std::string> & column_names_set,
     const PaddedPODArray<T> & values,
-    ColumnsWithTypeAndName & columns) const
+    ColumnsWithTypeAndName & columns,
+    bool force) const
 {
     std::string name = default_name;
     if (attribute)
         name = attribute->name;
 
-    if (column_names_set.find(name) != column_names_set.end())
+    if (force || column_names_set.find(name) != column_names_set.end())
         columns.emplace_back(getColumnFromPODArray(values), type, name);
 }
 
@@ -215,69 +161,25 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock
 
     std::unordered_set<std::string> names(column_names.begin(), column_names.end());
 
-    addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns);
+    addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns, true);
+    auto ids_column = columns.back().column;
     addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
     addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
 
     auto date_key = makeDateKey(block_start_dates, block_end_dates);
+    auto date_column = getColumnFromPODArray(date_key);
 
     for (const auto idx : ext::range(0, structure.attributes.size()))
     {
         const DictionaryAttribute & attribute = structure.attributes[idx];
         if (names.find(attribute.name) != names.end())
         {
-            ColumnPtr column;
-#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
-    column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids_to_fill, date_key, attribute, *dictionary)
-            switch (attribute.underlying_type)
-            {
-                case AttributeUnderlyingType::utUInt8:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt8);
-                    break;
-                case AttributeUnderlyingType::utUInt16:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt16);
-                    break;
-                case AttributeUnderlyingType::utUInt32:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt32);
-                    break;
-                case AttributeUnderlyingType::utUInt64:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt64);
-                    break;
-                case AttributeUnderlyingType::utUInt128:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt128);
-                    break;
-                case AttributeUnderlyingType::utInt8:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int8);
-                    break;
-                case AttributeUnderlyingType::utInt16:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int16);
-                    break;
-                case AttributeUnderlyingType::utInt32:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int32);
-                    break;
-                case AttributeUnderlyingType::utInt64:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int64);
-                    break;
-                case AttributeUnderlyingType::utFloat32:
-                    GET_COLUMN_FORM_ATTRIBUTE(Float32);
-                    break;
-                case AttributeUnderlyingType::utFloat64:
-                    GET_COLUMN_FORM_ATTRIBUTE(Float64);
-                    break;
-                case AttributeUnderlyingType::utDecimal32:
-                    GET_COLUMN_FORM_ATTRIBUTE(Decimal32);
-                    break;
-                case AttributeUnderlyingType::utDecimal64:
-                    GET_COLUMN_FORM_ATTRIBUTE(Decimal64);
-                    break;
-                case AttributeUnderlyingType::utDecimal128:
-                    GET_COLUMN_FORM_ATTRIBUTE(Decimal128);
-                    break;
-                case AttributeUnderlyingType::utString:
-                    column = getColumnFromAttributeString(ids_to_fill, date_key, attribute, *dictionary);
-                    break;
-            }
-#undef GET_COLUMN_FORM_ATTRIBUTE
+            ColumnPtr column = dictionary->getColumn(
+                attribute.name,
+                attribute.type,
+                {ids_column, date_column},
+                {std::make_shared<DataTypeUInt64>(), std::make_shared<DataTypeInt64>()},
+                nullptr);
             columns.emplace_back(column, attribute.type, attribute.name);
         }
     }
diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp
index eeed581c6f4..9fb1a57a381 100644
--- a/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/src/Dictionaries/RangeHashedDictionary.cpp
@@ -5,6 +5,8 @@
 #include <ext/range.h>
 #include "DictionaryFactory.h"
 #include "RangeDictionaryBlockInputStream.h"
+#include <Interpreters/castColumn.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 namespace
 {
@@ -50,6 +52,7 @@ namespace ErrorCodes
     extern const int DICTIONARY_IS_EMPTY;
     extern const int TYPE_MISMATCH;
     extern const int UNSUPPORTED_METHOD;
+    extern const int NOT_IMPLEMENTED;
 }
 
 bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date)
@@ -85,66 +88,101 @@ RangeHashedDictionary::RangeHashedDictionary(
     calculateBytesAllocated();
 }
 
-
-#define DECLARE_MULTIPLE_GETTER(TYPE) \
-    void RangeHashedDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<RangeStorageType> & dates, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::ut##TYPE); \
-        getItems<TYPE>(attribute, ids, dates, out); \
-    }
-DECLARE_MULTIPLE_GETTER(UInt8)
-DECLARE_MULTIPLE_GETTER(UInt16)
-DECLARE_MULTIPLE_GETTER(UInt32)
-DECLARE_MULTIPLE_GETTER(UInt64)
-DECLARE_MULTIPLE_GETTER(UInt128)
-DECLARE_MULTIPLE_GETTER(Int8)
-DECLARE_MULTIPLE_GETTER(Int16)
-DECLARE_MULTIPLE_GETTER(Int32)
-DECLARE_MULTIPLE_GETTER(Int64)
-DECLARE_MULTIPLE_GETTER(Float32)
-DECLARE_MULTIPLE_GETTER(Float64)
-DECLARE_MULTIPLE_GETTER(Decimal32)
-DECLARE_MULTIPLE_GETTER(Decimal64)
-DECLARE_MULTIPLE_GETTER(Decimal128)
-#undef DECLARE_MULTIPLE_GETTER
-
-void RangeHashedDictionary::getString(
+ColumnPtr RangeHashedDictionary::getColumn(
     const std::string & attribute_name,
-    const PaddedPODArray<Key> & ids,
-    const PaddedPODArray<RangeStorageType> & dates,
-    ColumnString * out) const
+    const DataTypePtr & result_type,
+    const Columns & key_columns,
+    const DataTypes & key_types,
+    const ColumnPtr default_values_column) const
 {
-    const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::utString);
-    const auto & attr = *std::get<Ptr<StringRef>>(attribute.maps);
-    const auto & null_value = std::get<String>(attribute.null_values);
+    ColumnPtr result;
 
-    for (const auto i : ext::range(0, ids.size()))
+    const auto & attribute = getAttribute(attribute_name);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
+
+    auto keys_size = key_columns.front()->size();
+
+    /// Cast second column to storage type
+    Columns modified_key_columns = key_columns;
+
+    auto range_storage_column = key_columns[1];
+    ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
+
+    auto range_column_storage_type = std::make_shared<DataTypeInt64>();
+    modified_key_columns[1] = castColumnAccurate(column_to_cast, range_column_storage_type);
+
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container * vec_null_map_to = nullptr;
+    if (attribute.is_nullable)
     {
-        const auto * it = attr.find(ids[i]);
-        if (it)
-        {
-            const auto date = dates[i];
-            const auto & ranges_and_values = it->getMapped();
-            const auto val_it
-                = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
-                  {
-                      return v.range.contains(date);
-                  });
+        col_null_map_to = ColumnUInt8::create(keys_size, false);
+        vec_null_map_to = &col_null_map_to->getData();
+    }
 
-            const auto string_ref = val_it != std::end(ranges_and_values) ? val_it->value : StringRef{null_value};
-            out->insertData(string_ref.data, string_ref.size);
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto * out = column.get();
+
+            getItemsImpl<ValueType, ValueType>(
+                attribute,
+                modified_key_columns,
+                [&](const size_t row, const StringRef value, bool is_null)
+                {
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
+
+                    out->insertData(value.data, value.size);
+                },
+                default_value_extractor);
         }
         else
-            out->insertData(null_value.data(), null_value.size());
+        {
+            auto & out = column->getData();
+
+            getItemsImpl<ValueType, ValueType>(
+                attribute,
+                modified_key_columns,
+                [&](const size_t row, const auto value, bool is_null)
+                {
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
+
+                    out[row] = value;
+                },
+                default_value_extractor);
+        }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    if (attribute.is_nullable)
+    {
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
     }
 
-    query_count.fetch_add(ids.size(), std::memory_order_relaxed);
+    return result;
 }
 
+ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns &, const DataTypes &) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+        "Has not supported", getDictionaryID().getNameForLogs());
+}
 
 void RangeHashedDictionary::createAttributes()
 {
@@ -154,7 +192,7 @@ void RangeHashedDictionary::createAttributes()
     for (const auto & attribute : dict_struct.attributes)
     {
         attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
+        attributes.push_back(createAttribute(attribute, attribute.null_value));
 
         if (attribute.hierarchical)
             throw Exception{ErrorCodes::BAD_ARGUMENTS, "Hierarchical attributes not supported by {} dictionary.",
@@ -220,66 +258,27 @@ void RangeHashedDictionary::addAttributeSize(const Attribute & attribute)
     bucket_count = map_ref->getBufferSizeInCells();
 }
 
+template <>
+void RangeHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
+{
+    addAttributeSize<StringRef>(attribute);
+    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+}
+
 void RangeHashedDictionary::calculateBytesAllocated()
 {
     bytes_allocated += attributes.size() * sizeof(attributes.front());
 
     for (const auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
-            case AttributeUnderlyingType::utUInt8:
-                addAttributeSize<UInt8>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                addAttributeSize<UInt16>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                addAttributeSize<UInt32>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                addAttributeSize<UInt64>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                addAttributeSize<UInt128>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                addAttributeSize<Int8>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                addAttributeSize<Int16>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                addAttributeSize<Int32>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                addAttributeSize<Int64>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                addAttributeSize<Float32>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                addAttributeSize<Float64>(attribute);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
+            addAttributeSize<AttributeType>(attribute);
+        };
 
-            case AttributeUnderlyingType::utDecimal32:
-                addAttributeSize<Decimal32>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                addAttributeSize<Decimal64>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                addAttributeSize<Decimal128>(attribute);
-                break;
-
-            case AttributeUnderlyingType::utString:
-            {
-                addAttributeSize<StringRef>(attribute);
-                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-
-                break;
-            }
-        }
+        callOnDictionaryAttributeType(attribute.type, type_call);
     }
 }
 
@@ -290,125 +289,80 @@ void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Fie
     attribute.maps = std::make_unique<Collection<T>>();
 }
 
-RangeHashedDictionary::Attribute
-RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
+template <>
+void RangeHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
 {
-    Attribute attr{type, {}, {}, {}};
+    attribute.string_arena = std::make_unique<Arena>();
+    const String & string = null_value.get<String>();
+    const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
+    attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
+    attribute.maps = std::make_unique<Collection<StringRef>>();
+}
 
-    switch (type)
+RangeHashedDictionary::Attribute
+RangeHashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
+{
+    Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, {}};
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-
-        case AttributeUnderlyingType::utString:
-        {
-            attr.null_values = null_value.get<String>();
-            attr.maps = std::make_unique<Collection<StringRef>>();
-            attr.string_arena = std::make_unique<Arena>();
-            break;
-        }
-    }
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
 
     return attr;
 }
 
-
-template <typename OutputType>
-void RangeHashedDictionary::getItems(
-    const Attribute & attribute,
-    const PaddedPODArray<Key> & ids,
-    const PaddedPODArray<RangeStorageType> & dates,
-    PaddedPODArray<OutputType> & out) const
-{
-    if (false) {} // NOLINT
-#define DISPATCH(TYPE) else if (attribute.type == AttributeUnderlyingType::ut##TYPE) getItemsImpl<TYPE, OutputType>(attribute, ids, dates, out);
-    DISPATCH(UInt8)
-    DISPATCH(UInt16)
-    DISPATCH(UInt32)
-    DISPATCH(UInt64)
-    DISPATCH(UInt128)
-    DISPATCH(Int8)
-    DISPATCH(Int16)
-    DISPATCH(Int32)
-    DISPATCH(Int64)
-    DISPATCH(Float32)
-    DISPATCH(Float64)
-    DISPATCH(Decimal32)
-    DISPATCH(Decimal64)
-    DISPATCH(Decimal128)
-#undef DISPATCH
-    else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
-}
-
-template <typename AttributeType, typename OutputType>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void RangeHashedDictionary::getItemsImpl(
     const Attribute & attribute,
-    const PaddedPODArray<Key> & ids,
-    const PaddedPODArray<RangeStorageType> & dates,
-    PaddedPODArray<OutputType> & out) const
+    const Columns & key_columns,
+    ValueSetter && set_value,
+    DefaultValueExtractor & default_value_extractor) const
 {
-    const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
-    const auto null_value = std::get<AttributeType>(attribute.null_values);
+    PaddedPODArray<Key> key_backup_storage;
+    PaddedPODArray<RangeStorageType> range_backup_storage;
 
-    for (const auto i : ext::range(0, ids.size()))
+    const PaddedPODArray<Key> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
+    const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, key_columns[1], range_backup_storage);
+
+    const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
+
+    for (const auto row : ext::range(0, ids.size()))
     {
-        const auto it = attr.find(ids[i]);
+        const auto it = attr.find(ids[row]);
         if (it)
         {
-            const auto date = dates[i];
+            const auto date = dates[row];
             const auto & ranges_and_values = it->getMapped();
-            const auto val_it
-                = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
-                  {
-                      return v.range.contains(date);
-                  });
+            const auto val_it = std::find_if(
+                std::begin(ranges_and_values),
+                std::end(ranges_and_values),
+                [date](const Value<AttributeType> & v)
+                {
+                    return v.range.contains(date);
+                });
 
-            out[i] = static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : null_value); // NOLINT
+            if (val_it != std::end(ranges_and_values))
+            {
+                auto& value = val_it->value;
+
+                if (value)
+                    set_value(row, static_cast<OutputType>(*value), false); // NOLINT
+                else
+                    set_value(row, default_value_extractor[row], true);
+            }
+            else
+            {
+                set_value(row, default_value_extractor[row], false);
+            }
         }
         else
         {
-            out[i] = static_cast<OutputType>(null_value); // NOLINT
+            set_value(row, default_value_extractor[row], false);
         }
     }
 
@@ -417,9 +371,32 @@ void RangeHashedDictionary::getItemsImpl(
 
 
 template <typename T>
-void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value)
+void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value)
 {
-    auto & map = *std::get<Ptr<T>>(attribute.maps);
+    using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
+    auto & map = *std::get<Ptr<ValueType>>(attribute.maps);
+
+    Value<ValueType> value_to_insert;
+
+    if (attribute.is_nullable && value.isNull())
+    {
+        value_to_insert = { range, {} };
+    }
+    else
+    {
+        if constexpr (std::is_same_v<T, String>)
+        {
+            const auto & string = value.get<String>();
+            const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
+            const StringRef string_ref{string_in_arena, string.size()};
+            value_to_insert = Value<ValueType>{ range, { string_ref }};
+        }
+        else
+        {
+            value_to_insert = Value<ValueType>{ range, { value.get<NearestFieldType<ValueType>>() }};
+        }
+    }
+
     const auto it = map.find(id);
 
     if (it)
@@ -427,92 +404,28 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
         auto & values = it->getMapped();
 
         const auto insert_it
-            = std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
+            = std::lower_bound(std::begin(values), std::end(values), range, [](const Value<ValueType> & lhs, const Range & rhs_range)
               {
                   return lhs.range < rhs_range;
               });
 
-        values.insert(insert_it, Value<T>{range, value});
+        values.insert(insert_it, std::move(value_to_insert));
     }
     else
-        map.insert({id, Values<T>{Value<T>{range, value}}});
+        map.insert({id, Values<ValueType>{std::move(value_to_insert)}});
 }
 
 void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value)
 {
-    switch (attribute.type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            setAttributeValueImpl<UInt8>(attribute, id, range, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            setAttributeValueImpl<UInt16>(attribute, id, range, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            setAttributeValueImpl<UInt32>(attribute, id, range, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            setAttributeValueImpl<UInt64>(attribute, id, range, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            setAttributeValueImpl<UInt128>(attribute, id, range, value.get<UInt128>());
-            break;
-        case AttributeUnderlyingType::utInt8:
-            setAttributeValueImpl<Int8>(attribute, id, range, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt16:
-            setAttributeValueImpl<Int16>(attribute, id, range, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt32:
-            setAttributeValueImpl<Int32>(attribute, id, range, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt64:
-            setAttributeValueImpl<Int64>(attribute, id, range, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            setAttributeValueImpl<Float32>(attribute, id, range, value.get<Float64>());
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            setAttributeValueImpl<Float64>(attribute, id, range, value.get<Float64>());
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            setAttributeValueImpl<Decimal32>(attribute, id, range, value.get<Decimal32>());
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            setAttributeValueImpl<Decimal64>(attribute, id, range, value.get<Decimal64>());
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            setAttributeValueImpl<Decimal128>(attribute, id, range, value.get<Decimal128>());
-            break;
+        setAttributeValueImpl<AttributeType>(attribute, id, range, value);
+    };
 
-        case AttributeUnderlyingType::utString:
-        {
-            auto & map = *std::get<Ptr<StringRef>>(attribute.maps);
-            const auto & string = value.get<String>();
-            const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-            const StringRef string_ref{string_in_arena, string.size()};
-
-            auto * it = map.find(id);
-
-            if (it)
-            {
-                auto & values = it->getMapped();
-
-                const auto insert_it = std::lower_bound(
-                    std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
-                    {
-                        return lhs.range < rhs_range;
-                    });
-
-                values.insert(insert_it, Value<StringRef>{range, string_ref});
-            }
-            else
-                map.insert({id, Values<StringRef>{Value<StringRef>{range, string_ref}}});
-
-            break;
-        }
-    }
+    callOnDictionaryAttributeType(attribute.type, type_call);
 }
 
 const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(const std::string & attribute_name) const
@@ -541,55 +454,18 @@ void RangeHashedDictionary::getIdsAndDates(
 {
     const auto & attribute = attributes.front();
 
-    switch (attribute.type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            getIdsAndDates<UInt128>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utString:
-            getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            getIdsAndDates<Decimal32>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            getIdsAndDates<Decimal64>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            getIdsAndDates<Decimal128>(attribute, ids, start_dates, end_dates);
-            break;
-    }
+        if constexpr (std::is_same_v<AttributeType, String>)
+            getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
+        else
+            getIdsAndDates<AttributeType>(attribute, ids, start_dates, end_dates);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
 }
 
 template <typename T, typename RangeType>
diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index 46ae0390b6a..80cf47eb93b 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -1,16 +1,18 @@
 #pragma once
 
-#include <Columns/ColumnDecimal.h>
-#include <Columns/ColumnString.h>
-#include <Common/HashTable/HashMap.h>
-#include "DictionaryStructure.h"
-#include "IDictionary.h"
-#include "IDictionarySource.h"
-
 #include <atomic>
 #include <memory>
 #include <variant>
+#include <optional>
 
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnString.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashSet.h>
+#include "DictionaryStructure.h"
+#include "IDictionary.h"
+#include "IDictionarySource.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
@@ -52,38 +54,18 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    typedef Int64 RangeStorageType;
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; }
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_values_column) const override;
 
-#define DECLARE_MULTIPLE_GETTER(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<RangeStorageType> & dates, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE_MULTIPLE_GETTER(UInt8)
-    DECLARE_MULTIPLE_GETTER(UInt16)
-    DECLARE_MULTIPLE_GETTER(UInt32)
-    DECLARE_MULTIPLE_GETTER(UInt64)
-    DECLARE_MULTIPLE_GETTER(UInt128)
-    DECLARE_MULTIPLE_GETTER(Int8)
-    DECLARE_MULTIPLE_GETTER(Int16)
-    DECLARE_MULTIPLE_GETTER(Int32)
-    DECLARE_MULTIPLE_GETTER(Int64)
-    DECLARE_MULTIPLE_GETTER(Float32)
-    DECLARE_MULTIPLE_GETTER(Float64)
-    DECLARE_MULTIPLE_GETTER(Decimal32)
-    DECLARE_MULTIPLE_GETTER(Decimal64)
-    DECLARE_MULTIPLE_GETTER(Decimal128)
-#undef DECLARE_MULTIPLE_GETTER
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
-    void getString(
-        const std::string & attribute_name,
-        const PaddedPODArray<Key> & ids,
-        const PaddedPODArray<RangeStorageType> & dates,
-        ColumnString * out) const;
+    using RangeStorageType = Int64;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -101,7 +83,7 @@ private:
     struct Value final
     {
         Range range;
-        T value;
+        std::optional<T> value;
     };
 
     template <typename T>
@@ -111,10 +93,14 @@ private:
     template <typename T>
     using Ptr = std::unique_ptr<Collection<T>>;
 
+    using NullableSet = HashSet<Key, DefaultHash<Key>>;
+
     struct Attribute final
     {
     public:
         AttributeUnderlyingType type;
+        bool is_nullable;
+
         std::variant<
             UInt8,
             UInt16,
@@ -130,7 +116,7 @@ private:
             Decimal128,
             Float32,
             Float64,
-            String>
+            StringRef>
             null_values;
         std::variant<
             Ptr<UInt8>,
@@ -162,30 +148,21 @@ private:
     void calculateBytesAllocated();
 
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
+    static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
-
-    template <typename OutputType>
-    void getItems(
-        const Attribute & attribute,
-        const PaddedPODArray<Key> & ids,
-        const PaddedPODArray<RangeStorageType> & dates,
-        PaddedPODArray<OutputType> & out) const;
-
-    template <typename AttributeType, typename OutputType>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
         const Attribute & attribute,
-        const PaddedPODArray<Key> & ids,
-        const PaddedPODArray<RangeStorageType> & dates,
-        PaddedPODArray<OutputType> & out) const;
-
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
-    void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value);
+    static void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value);
 
-    void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
+    static void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
 
     const Attribute & getAttribute(const std::string & attribute_name) const;
 
diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index 1cf5946c95c..cbeea39decb 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -22,7 +22,8 @@
 #include <filesystem>
 #include <city.h>
 #include <fcntl.h>
-
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 namespace ProfileEvents
 {
@@ -445,7 +446,7 @@ void SSDCachePartition::flush()
 
 template <typename Out, typename GetDefault>
 void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
-    ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
+    ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
     std::chrono::system_clock::time_point now) const
 {
     auto set_value = [&](const size_t index, ReadBuffer & buf)
@@ -456,7 +457,7 @@ void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODAr
         if (metadata.expiresAt() > now)
         {
             if (metadata.isDefault())
-                out[index] = get_default(index);
+                out[index] = default_value_extractor[index];
             else
             {
                 ignoreFromBufferToAttributeIndex(attribute_index, buf);
@@ -939,14 +940,14 @@ SSDCacheStorage::~SSDCacheStorage()
 template <typename Out, typename GetDefault>
 void SSDCacheStorage::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
       ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
-      GetDefault & get_default, std::chrono::system_clock::time_point now) const
+      GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const
 {
     std::vector<bool> found(ids.size(), false);
 
     {
         std::shared_lock lock(rw_lock);
         for (const auto & partition : partitions)
-            partition->getValue<Out>(attribute_index, ids, out, found, get_default, now);
+            partition->getValue<Out>(attribute_index, ids, out, found, default_value_extractor, now);
     }
 
     for (size_t i = 0; i < ids.size(); ++i)
@@ -1327,102 +1328,62 @@ SSDCacheDictionary::SSDCacheDictionary(
     createAttributes();
 }
 
-#define DECLARE(TYPE) \
-    void SSDCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
-        getItemsNumberImpl<TYPE, TYPE>(index, ids, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
-    }
+ColumnPtr SSDCacheDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr & result_type,
+    const Columns & key_columns,
+    const DataTypes &,
+    const ColumnPtr default_values_column) const
+{
+    ColumnPtr result;
 
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    PaddedPODArray<Key> backup_storage;
+    const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
+    auto keys_size = ids.size();
 
-#define DECLARE(TYPE) \
-    void SSDCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>( \
-            index, \
-            ids, \
-            out, \
-            [&](const size_t row) { return def[row]; }); \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    const auto index = getAttributeIndex(attribute_name);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-#define DECLARE(TYPE) \
-    void SSDCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>( \
-            index, \
-            ids, \
-            out, \
-            [&](const size_t) { return def; }); \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+
+        const auto & null_value = std::get<AttributeType>(null_values[index]);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            getItemsStringImpl(index, ids, column.get(), default_value_extractor);
+        }
+        else
+        {
+            auto & out = column->getData();
+            getItemsNumberImpl<AttributeType, AttributeType>(index, ids, out, default_value_extractor);
+        }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
+
+    return result;
+}
 
 template <typename AttributeType, typename OutputType, typename DefaultGetter>
 void SSDCacheDictionary::getItemsNumberImpl(
-        const size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
+        const size_t attribute_index,
+        const PaddedPODArray<Key> & ids,
+        ResultArrayType<OutputType> & out,
+        DefaultGetter & default_value_extractor) const
 {
     const auto now = std::chrono::system_clock::now();
 
     std::unordered_map<Key, std::vector<size_t>> not_found_ids;
-    storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, get_default, now);
+    storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, default_value_extractor, now);
     if (not_found_ids.empty())
         return;
 
@@ -1440,42 +1401,17 @@ void SSDCacheDictionary::getItemsNumberImpl(
             [&](const size_t id)
             {
                 for (const size_t row : not_found_ids[id])
-                    out[row] = get_default(row);
+                    out[row] = default_value_extractor[row];
             },
             getLifetime());
 }
 
-void SSDCacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    const auto null_value = StringRef{std::get<String>(null_values[index])};
-
-    getItemsStringImpl(index, ids, out, [&](const size_t) { return null_value; });
-}
-
-void SSDCacheDictionary::getString(
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsStringImpl(index, ids, out, [&](const size_t row) { return def->getDataAt(row); });
-}
-
-void SSDCacheDictionary::getString(
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsStringImpl(index, ids, out, [&](const size_t) { return StringRef{def}; });
-}
-
 template <typename DefaultGetter>
-void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const PaddedPODArray<Key> & ids,
-        ColumnString * out, DefaultGetter && get_default) const
+void SSDCacheDictionary::getItemsStringImpl(
+    const size_t attribute_index,
+    const PaddedPODArray<Key> & ids,
+    ColumnString * out,
+    DefaultGetter & default_value_extractor) const
 {
     const auto now = std::chrono::system_clock::now();
 
@@ -1494,7 +1430,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
         {
             if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
             {
-                auto to_insert = get_default(row);
+                auto to_insert = default_value_extractor[row];
                 out->insertData(to_insert.data, to_insert.size);
                 ++default_index;
             }
@@ -1525,7 +1461,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
         const auto & id = ids[row];
         if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
         {
-            auto to_insert = get_default(row);
+            auto to_insert = default_value_extractor[row];
             out->insertData(to_insert.data, to_insert.size);
             ++default_index;
         }
@@ -1539,20 +1475,30 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
         }
         else
         {
-            auto to_insert = get_default(row);
+            auto to_insert = default_value_extractor[row];
             out->insertData(to_insert.data, to_insert.size);
         }
     }
 }
 
-void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
+ColumnUInt8::Ptr SSDCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
+
+    auto result = ColumnUInt8::create(ext::size(ids));
+    auto& out = result->getData();
+
+    const auto rows = ext::size(ids);
+    for (const auto row : ext::range(0, rows))
+        out[row] = false;
+
     const auto now = std::chrono::system_clock::now();
 
     std::unordered_map<Key, std::vector<size_t>> not_found_ids;
     storage.has(ids, out, not_found_ids, now);
     if (not_found_ids.empty())
-        return;
+        return result;
 
     std::vector<Key> required_ids(not_found_ids.size());
     std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](const auto & pair) { return pair.first; });
@@ -1571,11 +1517,13 @@ void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UIn
                     out[row] = false;
             },
             getLifetime());
+
+    return result;
 }
 
 BlockInputStreamPtr SSDCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<SSDCacheDictionary, Key>;
+    using BlockInputStreamType = DictionaryBlockInputStream<Key>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, storage.getCachedIds(), column_names);
 }
 
diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h
index 59df778e1f2..4d4d3befa22 100644
--- a/src/Dictionaries/SSDCacheDictionary.h
+++ b/src/Dictionaries/SSDCacheDictionary.h
@@ -2,11 +2,15 @@
 
 #if defined(__linux__) || defined(__FreeBSD__)
 
-#include "DictionaryStructure.h"
-#include "IDictionary.h"
-#include "IDictionarySource.h"
 #include <atomic>
 #include <chrono>
+#include <list>
+#include <shared_mutex>
+#include <variant>
+#include <vector>
+
+#include <Poco/Logger.h>
+
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Common/ArenaWithFreeLists.h>
@@ -16,12 +20,11 @@
 #include <Core/Block.h>
 #include <Dictionaries/BucketCache.h>
 #include <IO/HashingWriteBuffer.h>
-#include <list>
 #include <pcg_random.hpp>
-#include <Poco/Logger.h>
-#include <shared_mutex>
-#include <variant>
-#include <vector>
+#include "DictionaryStructure.h"
+#include "IDictionary.h"
+#include "IDictionarySource.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
@@ -109,7 +112,7 @@ public:
 
     template <typename Out, typename GetDefault>
     void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
-            ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
+            ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
             std::chrono::system_clock::time_point now) const;
 
     void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
@@ -232,7 +235,7 @@ public:
     template <typename Out, typename GetDefault>
     void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
             ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
-            GetDefault & get_default, std::chrono::system_clock::time_point now) const;
+            GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const;
 
     void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
             StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map<Key, std::vector<size_t>> & not_found,
@@ -351,77 +354,20 @@ public:
 
     std::exception_ptr getLastException() const override { return storage.getLastException(); }
 
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
+
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_values_column) const override;
+
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
+
     template <typename T>
     using ResultArrayType = SSDCacheStorage::ResultArrayType<T>;
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void
-    getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * def, ColumnString * out)
-    const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * out) const;
-
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
-
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
 private:
@@ -434,11 +380,17 @@ private:
 
     template <typename AttributeType, typename OutputType, typename DefaultGetter>
     void getItemsNumberImpl(
-            size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
+        size_t attribute_index,
+        const PaddedPODArray<Key> & ids,
+        ResultArrayType<OutputType> & out,
+        DefaultGetter & default_value_extractor) const;
 
     template <typename DefaultGetter>
-    void getItemsStringImpl(size_t attribute_index, const PaddedPODArray<Key> & ids,
-            ColumnString * out, DefaultGetter && get_default) const;
+    void getItemsStringImpl(
+        size_t attribute_index,
+        const PaddedPODArray<Key> & ids,
+        ColumnString * out,
+        DefaultGetter & default_value_extractor) const;
 
     const std::string name;
     const DictionaryStructure dict_struct;
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
index b23529eac7d..cb22dd2be15 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@@ -9,6 +9,7 @@
 #include <Common/ProfilingScopedRWLock.h>
 #include <Common/MemorySanitizer.h>
 #include <DataStreams/IBlockInputStream.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 #include <IO/AIO.h>
@@ -23,7 +24,7 @@
 #include <filesystem>
 #include <city.h>
 #include <fcntl.h>
-
+#include <Functions/FunctionHelpers.h>
 
 namespace ProfileEvents
 {
@@ -461,8 +462,12 @@ void SSDComplexKeyCachePartition::flush()
 
 template <typename Out, typename GetDefault>
 void SSDComplexKeyCachePartition::getValue(
-    const size_t attribute_index, const Columns & key_columns, const DataTypes & key_types,
-    ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
+    const size_t attribute_index,
+    const Columns & key_columns,
+    const DataTypes & key_types,
+    ResultArrayType<Out> & out,
+    std::vector<bool> & found,
+    GetDefault & default_value_extractor,
     std::chrono::system_clock::time_point now) const
 {
     auto set_value = [&](const size_t index, ReadBuffer & buf)
@@ -474,7 +479,7 @@ void SSDComplexKeyCachePartition::getValue(
         if (metadata.expiresAt() > now)
         {
             if (metadata.isDefault())
-                out[index] = get_default(index);
+                out[index] = default_value_extractor[index];
             else
             {
                 ignoreFromBufferToAttributeIndex(attribute_index, buf);
@@ -520,7 +525,7 @@ void SSDComplexKeyCachePartition::getString(const size_t attribute_index,
     getImpl(key_columns, key_types, set_value, found);
 }
 
-void SSDComplexKeyCachePartition::has(
+void SSDComplexKeyCachePartition::hasKeys(
     const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
     std::vector<bool> & found, std::chrono::system_clock::time_point now) const
 {
@@ -1018,7 +1023,7 @@ void SSDComplexKeyCacheStorage::getString(
     hit_count.fetch_add(n - count_not_found, std::memory_order_release);
 }
 
-void SSDComplexKeyCacheStorage::has(
+void SSDComplexKeyCacheStorage::hasKeys(
     const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
     std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
     TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const
@@ -1031,7 +1036,7 @@ void SSDComplexKeyCacheStorage::has(
     {
         std::shared_lock lock(rw_lock);
         for (const auto & partition : partitions)
-            partition->has(key_columns, key_types, out, found, now);
+            partition->hasKeys(key_columns, key_types, out, found, now);
     }
 
     size_t count_not_found = 0;
@@ -1376,96 +1381,64 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary(
     createAttributes();
 }
 
-#define DECLARE(TYPE) \
-    void SSDComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
-        getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
-    }
+ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr & result_type,
+    const Columns & key_columns,
+    const DataTypes & key_types,
+    const ColumnPtr default_values_column) const
+{
+    ColumnPtr result;
 
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    dict_struct.validateKeyTypes(key_types);
 
-#define DECLARE(TYPE) \
-    void SSDComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t row) { return def[row]; }); /* NOLINT */ \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    const auto index = getAttributeIndex(attribute_name);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-#define DECLARE(TYPE) \
-    void SSDComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return def; }); /* NOLINT */ \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    auto keys_size = key_columns.front()->size();
 
-template <typename AttributeType, typename OutputType, typename DefaultGetter>
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+
+        const auto & null_value = std::get<AttributeType>(null_values[index]);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto * out = column.get();
+            getItemsStringImpl(index, key_columns, key_types, out, default_value_extractor);
+        }
+        else
+        {
+            auto & out = column->getData();
+            getItemsNumberImpl<AttributeType, AttributeType>(
+                index,
+                key_columns,
+                key_types,
+                out,
+                default_value_extractor);
+        }
+
+        result = std::move(column);
+    };
+
+    callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
+
+    return result;
+}
+
+template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
 void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
     const size_t attribute_index,
-    const Columns & key_columns, const DataTypes & key_types,
-    ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
+    const Columns & key_columns,
+    const DataTypes & key_types,
+    ResultArrayType<OutputType> & out,
+    DefaultValueExtractor & default_value_extractor) const
 {
     assert(dict_struct.key);
     assert(key_columns.size() == key_types.size());
@@ -1476,7 +1449,7 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
 
     TemporalComplexKeysPool not_found_pool;
     std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
-    storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, get_default, now);
+    storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, default_value_extractor, now);
     if (not_found_keys.empty())
         return;
 
@@ -1503,54 +1476,17 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
             [&](const auto key)
             {
                 for (const size_t row : not_found_keys[key])
-                    out[row] = get_default(row);
+                    out[row] = default_value_extractor[row];
             },
             getLifetime());
 }
 
-void SSDComplexKeyCacheDictionary::getString(
-    const std::string & attribute_name,
-    const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    const auto null_value = StringRef{std::get<String>(null_values[index])};
-
-    getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return null_value; });
-}
-
-void SSDComplexKeyCacheDictionary::getString(
-        const std::string & attribute_name,
-        const Columns & key_columns, const DataTypes & key_types,
-        const ColumnString * const def, ColumnString * const out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t row) { return def->getDataAt(row); });
-}
-
-void SSDComplexKeyCacheDictionary::getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes & key_types,
-        const String & def,
-        ColumnString * const out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return StringRef{def}; });
-}
-
-template <typename DefaultGetter>
 void SSDComplexKeyCacheDictionary::getItemsStringImpl(
     const size_t attribute_index,
     const Columns & key_columns,
     const DataTypes & key_types,
     ColumnString * out,
-    DefaultGetter && get_default) const
+    DictionaryDefaultValueExtractor<String> & default_value_extractor) const
 {
     dict_struct.validateKeyTypes(key_types);
 
@@ -1576,7 +1512,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
         {
             if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
             {
-                auto to_insert = get_default(row);
+                auto to_insert = default_value_extractor[row];
                 out->insertData(to_insert.data, to_insert.size);
                 ++default_index;
             }
@@ -1619,7 +1555,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
         SCOPE_EXIT(tmp_keys_pool.rollback(key));
         if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
         {
-            auto to_insert = get_default(row);
+            auto to_insert = default_value_extractor[row];
             out->insertData(to_insert.data, to_insert.size);
             ++default_index;
         }
@@ -1633,26 +1569,31 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
         }
         else
         {
-            auto to_insert = get_default(row);
+            auto to_insert = default_value_extractor[row];
             out->insertData(to_insert.data, to_insert.size);
         }
     }
 }
 
-void SSDComplexKeyCacheDictionary::has(
-    const Columns & key_columns,
-    const DataTypes & key_types,
-    PaddedPODArray<UInt8> & out) const
+ColumnUInt8::Ptr SSDComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
 {
     dict_struct.validateKeyTypes(key_types);
 
+    const auto rows_num = key_columns.front()->size();
+
+    auto result = ColumnUInt8::create(rows_num);
+    auto& out = result->getData();
+
+    for (const auto row : ext::range(0, rows_num))
+        out[row] = false;
+
     const auto now = std::chrono::system_clock::now();
 
     std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
     TemporalComplexKeysPool not_found_pool;
-    storage.has(key_columns, key_types, out, not_found_keys, not_found_pool, now);
+    storage.hasKeys(key_columns, key_types, out, not_found_keys, not_found_pool, now);
     if (not_found_keys.empty())
-        return;
+        return result;
 
     std::vector<KeyRef> required_keys(not_found_keys.size());
     std::transform(std::begin(not_found_keys), std::end(not_found_keys), std::begin(required_keys), [](const auto & pair) { return pair.first; });
@@ -1681,6 +1622,8 @@ void SSDComplexKeyCacheDictionary::has(
                     out[row] = false;
             },
             getLifetime());
+
+    return result;
 }
 
 BlockInputStreamPtr SSDComplexKeyCacheDictionary::getBlockInputStream(
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
index 4758d62f1df..be65d823e34 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
@@ -2,11 +2,13 @@
 
 #if defined(OS_LINUX) || defined(__FreeBSD__)
 
-#include "DictionaryStructure.h"
-#include "IDictionary.h"
-#include "IDictionarySource.h"
 #include <atomic>
 #include <chrono>
+#include <list>
+#include <shared_mutex>
+#include <variant>
+#include <vector>
+#include <Poco/Logger.h>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Common/Arena.h>
@@ -19,13 +21,11 @@
 #include <Dictionaries/BucketCache.h>
 #include <ext/scope_guard.h>
 #include <IO/HashingWriteBuffer.h>
-#include <list>
 #include <pcg_random.hpp>
-#include <Poco/Logger.h>
-#include <shared_mutex>
-#include <variant>
-#include <vector>
-
+#include "IDictionary.h"
+#include "IDictionarySource.h"
+#include "DictionaryStructure.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
@@ -313,7 +313,7 @@ public:
     template <typename Out, typename GetDefault>
     void getValue(const size_t attribute_index,
             const Columns & key_columns, const DataTypes & key_types,
-            ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
+            ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
             std::chrono::system_clock::time_point now) const;
 
     void getString(const size_t attribute_index,
@@ -321,7 +321,7 @@ public:
             StringRefs & refs, ArenaWithFreeLists & arena, std::vector<bool> & found,
             std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
 
-    void has(const Columns & key_columns, const DataTypes & key_types,
+    void hasKeys(const Columns & key_columns, const DataTypes & key_types,
             ResultArrayType<UInt8> & out, std::vector<bool> & found,
             std::chrono::system_clock::time_point now) const;
 
@@ -459,7 +459,7 @@ public:
             TemporalComplexKeysPool & not_found_pool,
             std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
 
-    void has(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
+    void hasKeys(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
             std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
             TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const;
 
@@ -569,88 +569,20 @@ public:
 
     std::exception_ptr getLastException() const override { return storage.getLastException(); }
 
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
+
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_values_column) const override;
+
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
+
     template <typename T>
     using ResultArrayType = SSDComplexKeyCacheStorage::ResultArrayType<T>;
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns,
-        const DataTypes & key_types, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns,
-        const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns,
-        const DataTypes & key_types, const String & def, ColumnString * const out) const;
-
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
-
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
 private:
@@ -661,17 +593,20 @@ private:
     AttributeValueVariant createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value);
     void createAttributes();
 
-    template <typename AttributeType, typename OutputType, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
     void getItemsNumberImpl(
         const size_t attribute_index,
-        const Columns & key_columns, const DataTypes & key_types,
-        ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        ResultArrayType<OutputType> & out,
+        DefaultValueExtractor & default_value_extractor) const;
 
-    template <typename DefaultGetter>
     void getItemsStringImpl(
         const size_t attribute_index,
-        const Columns & key_columns, const DataTypes & key_types,
-        ColumnString * out, DefaultGetter && get_default) const;
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        ColumnString * out,
+        DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
 
     const std::string name;
     const DictionaryStructure dict_struct;
diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp
index aa27f4efc25..3615f72605f 100644
--- a/src/Dictionaries/XDBCDictionarySource.cpp
+++ b/src/Dictionaries/XDBCDictionarySource.cpp
@@ -21,7 +21,7 @@
 #include "registerDictionaries.h"
 
 #if USE_ODBC
-#    include <Poco/Data/ODBC/Connector.h>
+#    include <Poco/Data/ODBC/Connector.h> // Y_IGNORE
 #endif
 
 namespace DB
diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
index 2d4f971ef58..04ba1db09fc 100644
--- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
+++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
@@ -401,10 +401,16 @@ void buildConfigurationFromFunctionWithKeyValueArguments(
         {
             auto builder = FunctionFactory::instance().tryGet(func->name, context);
             auto function = builder->build({});
-            auto result = function->execute({}, {}, 0);
+            function->prepare({});
+
+            /// We assume that function will not take arguments and will return constant value like tcpPort or hostName
+            /// Such functions will return column with size equal to input_rows_count.
+            size_t input_rows_count = 1;
+            auto result = function->execute({}, function->getResultType(), input_rows_count);
 
             Field value;
             result->get(0, value);
+
             AutoPtr<Text> text_value(doc->createTextNode(getFieldAsString(value)));
             current_xml_element->appendChild(text_value);
         }
diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make
index 19a0f5008b8..4f33dc80559 100644
--- a/src/Dictionaries/ya.make
+++ b/src/Dictionaries/ya.make
@@ -6,31 +6,25 @@ LIBRARY()
 PEERDIR(
     clickhouse/src/Common
     contrib/libs/poco/Data
-    contrib/libs/poco/Data/ODBC
     contrib/libs/poco/MongoDB
     contrib/libs/poco/Redis
     contrib/libs/sparsehash
 )
 
+IF (USE_ODBC)
+    PEERDIR(contrib/libs/poco/Data/ODBC)
+ENDIF ()
+
 NO_COMPILER_WARNINGS()
 
 
 SRCS(
     CacheDictionary.cpp
-    CacheDictionary_generate1.cpp
-    CacheDictionary_generate2.cpp
-    CacheDictionary_generate3.cpp
     CassandraBlockInputStream.cpp
     CassandraDictionarySource.cpp
     CassandraHelpers.cpp
     ClickHouseDictionarySource.cpp
     ComplexKeyCacheDictionary.cpp
-    ComplexKeyCacheDictionary_createAttributeWithType.cpp
-    ComplexKeyCacheDictionary_generate1.cpp
-    ComplexKeyCacheDictionary_generate2.cpp
-    ComplexKeyCacheDictionary_generate3.cpp
-    ComplexKeyCacheDictionary_setAttributeValue.cpp
-    ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp
     ComplexKeyDirectDictionary.cpp
     ComplexKeyHashedDictionary.cpp
     DictionaryBlockInputStreamBase.cpp
diff --git a/src/Dictionaries/ya.make.in b/src/Dictionaries/ya.make.in
index 5df5803e7f4..e52b106d034 100644
--- a/src/Dictionaries/ya.make.in
+++ b/src/Dictionaries/ya.make.in
@@ -5,12 +5,15 @@ LIBRARY()
 PEERDIR(
     clickhouse/src/Common
     contrib/libs/poco/Data
-    contrib/libs/poco/Data/ODBC
     contrib/libs/poco/MongoDB
     contrib/libs/poco/Redis
     contrib/libs/sparsehash
 )
 
+IF (USE_ODBC)
+    PEERDIR(contrib/libs/poco/Data/ODBC)
+ENDIF ()
+
 NO_COMPILER_WARNINGS()
 
 
diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp
index 26e6b7609f3..6d991c17c67 100644
--- a/src/Disks/DiskCacheWrapper.cpp
+++ b/src/Disks/DiskCacheWrapper.cpp
@@ -108,7 +108,7 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate
     if (!cache_file_predicate(path))
         return DiskDecorator::readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold);
 
-    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Read file {} from cache", backQuote(path));
+    LOG_DEBUG(&Poco::Logger::get("DiskCache"), "Read file {} from cache", backQuote(path));
 
     if (cache_disk->exists(path))
         return cache_disk->readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold);
@@ -122,11 +122,11 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate
         {
             /// This thread will responsible for file downloading to cache.
             metadata->status = DOWNLOADING;
-            LOG_DEBUG(&Poco::Logger::get("DiskS3"), "File {} doesn't exist in cache. Will download it", backQuote(path));
+            LOG_DEBUG(&Poco::Logger::get("DiskCache"), "File {} doesn't exist in cache. Will download it", backQuote(path));
         }
         else if (metadata->status == DOWNLOADING)
         {
-            LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Waiting for file {} download to cache", backQuote(path));
+            LOG_DEBUG(&Poco::Logger::get("DiskCache"), "Waiting for file {} download to cache", backQuote(path));
             metadata->condition.wait(lock, [metadata] { return metadata->status == DOWNLOADED || metadata->status == ERROR; });
         }
     }
@@ -139,7 +139,7 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate
         {
             try
             {
-                auto dir_path = getDirectoryPath(path);
+                auto dir_path = directoryPath(path);
                 if (!cache_disk->exists(dir_path))
                     cache_disk->createDirectories(dir_path);
 
@@ -151,11 +151,11 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate
                 }
                 cache_disk->moveFile(tmp_path, path);
 
-                LOG_DEBUG(&Poco::Logger::get("DiskS3"), "File {} downloaded to cache", backQuote(path));
+                LOG_DEBUG(&Poco::Logger::get("DiskCache"), "File {} downloaded to cache", backQuote(path));
             }
             catch (...)
             {
-                tryLogCurrentException("DiskS3", "Failed to download file + " + backQuote(path) + " to cache");
+                tryLogCurrentException("DiskCache", "Failed to download file + " + backQuote(path) + " to cache");
                 result_status = ERROR;
             }
         }
@@ -180,9 +180,9 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode
     if (!cache_file_predicate(path))
         return DiskDecorator::writeFile(path, buf_size, mode);
 
-    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Write file {} to cache", backQuote(path));
+    LOG_DEBUG(&Poco::Logger::get("DiskCache"), "Write file {} to cache", backQuote(path));
 
-    auto dir_path = getDirectoryPath(path);
+    auto dir_path = directoryPath(path);
     if (!cache_disk->exists(dir_path))
         cache_disk->createDirectories(dir_path);
 
@@ -217,7 +217,7 @@ void DiskCacheWrapper::moveFile(const String & from_path, const String & to_path
 {
     if (cache_disk->exists(from_path))
     {
-        auto dir_path = getDirectoryPath(to_path);
+        auto dir_path = directoryPath(to_path);
         if (!cache_disk->exists(dir_path))
             cache_disk->createDirectories(dir_path);
 
@@ -230,7 +230,7 @@ void DiskCacheWrapper::replaceFile(const String & from_path, const String & to_p
 {
     if (cache_disk->exists(from_path))
     {
-        auto dir_path = getDirectoryPath(to_path);
+        auto dir_path = directoryPath(to_path);
         if (!cache_disk->exists(dir_path))
             cache_disk->createDirectories(dir_path);
 
@@ -239,19 +239,6 @@ void DiskCacheWrapper::replaceFile(const String & from_path, const String & to_p
     DiskDecorator::replaceFile(from_path, to_path);
 }
 
-void DiskCacheWrapper::copyFile(const String & from_path, const String & to_path)
-{
-    if (cache_disk->exists(from_path))
-    {
-        auto dir_path = getDirectoryPath(to_path);
-        if (!cache_disk->exists(dir_path))
-            cache_disk->createDirectories(dir_path);
-
-        cache_disk->copyFile(from_path, to_path);
-    }
-    DiskDecorator::copyFile(from_path, to_path);
-}
-
 void DiskCacheWrapper::removeFile(const String & path)
 {
     cache_disk->removeFileIfExists(path);
@@ -280,9 +267,10 @@ void DiskCacheWrapper::removeRecursive(const String & path)
 
 void DiskCacheWrapper::createHardLink(const String & src_path, const String & dst_path)
 {
-    if (cache_disk->exists(src_path))
+    /// Don't create hardlinks for cache files to shadow directory as it just waste cache disk space.
+    if (cache_disk->exists(src_path) && !dst_path.starts_with("shadow/"))
     {
-        auto dir_path = getDirectoryPath(dst_path);
+        auto dir_path = directoryPath(dst_path);
         if (!cache_disk->exists(dir_path))
             cache_disk->createDirectories(dir_path);
 
@@ -303,11 +291,6 @@ void DiskCacheWrapper::createDirectories(const String & path)
     DiskDecorator::createDirectories(path);
 }
 
-inline String DiskCacheWrapper::getDirectoryPath(const String & path)
-{
-    return Poco::Path{path}.setFileName("").toString();
-}
-
 /// TODO: Current reservation mechanism leaks IDisk abstraction details.
 /// This hack is needed to return proper disk pointer (wrapper instead of implementation) from reservation object.
 class ReservationDelegate : public IReservation
diff --git a/src/Disks/DiskCacheWrapper.h b/src/Disks/DiskCacheWrapper.h
index 10d24bf92e8..bf1a5df693a 100644
--- a/src/Disks/DiskCacheWrapper.h
+++ b/src/Disks/DiskCacheWrapper.h
@@ -32,7 +32,6 @@ public:
     void moveDirectory(const String & from_path, const String & to_path) override;
     void moveFile(const String & from_path, const String & to_path) override;
     void replaceFile(const String & from_path, const String & to_path) override;
-    void copyFile(const String & from_path, const String & to_path) override;
     std::unique_ptr<ReadBufferFromFileBase>
     readFile(const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold) const override;
     std::unique_ptr<WriteBufferFromFileBase>
@@ -46,7 +45,6 @@ public:
 
 private:
     std::shared_ptr<FileDownloadMetadata> acquireDownloadMetadata(const String & path) const;
-    static String getDirectoryPath(const String & path);
 
     /// Disk to cache files.
     std::shared_ptr<DiskLocal> cache_disk;
diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp
index 0c052ce8e91..3ebd1b6cf3b 100644
--- a/src/Disks/DiskDecorator.cpp
+++ b/src/Disks/DiskDecorator.cpp
@@ -103,11 +103,6 @@ void DiskDecorator::replaceFile(const String & from_path, const String & to_path
     delegate->replaceFile(from_path, to_path);
 }
 
-void DiskDecorator::copyFile(const String & from_path, const String & to_path)
-{
-    delegate->copyFile(from_path, to_path);
-}
-
 void DiskDecorator::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
 {
     delegate->copy(from_path, to_disk, to_path);
@@ -185,4 +180,9 @@ SyncGuardPtr DiskDecorator::getDirectorySyncGuard(const String & path) const
     return delegate->getDirectorySyncGuard(path);
 }
 
+void DiskDecorator::onFreeze(const String & path)
+{
+    delegate->onFreeze(path);
+}
+
 }
diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h
index b50252c2c97..c204d10bed9 100644
--- a/src/Disks/DiskDecorator.h
+++ b/src/Disks/DiskDecorator.h
@@ -32,7 +32,6 @@ public:
     void createFile(const String & path) override;
     void moveFile(const String & from_path, const String & to_path) override;
     void replaceFile(const String & from_path, const String & to_path) override;
-    void copyFile(const String & from_path, const String & to_path) override;
     void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
     void listFiles(const String & path, std::vector<String> & file_names) override;
     std::unique_ptr<ReadBufferFromFileBase>
@@ -48,8 +47,9 @@ public:
     void setReadOnly(const String & path) override;
     void createHardLink(const String & src_path, const String & dst_path) override;
     void truncateFile(const String & path, size_t size) override;
-    const String getType() const override { return delegate->getType(); }
+    DiskType::Type getType() const override { return delegate->getType(); }
     Executor & getExecutor() override;
+    void onFreeze(const String & path) override;
     SyncGuardPtr getDirectorySyncGuard(const String & path) const override;
 
 protected:
diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp
index 8787f613bf7..5035a865191 100644
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@@ -218,11 +218,6 @@ void DiskLocal::replaceFile(const String & from_path, const String & to_path)
         from_file.renameTo(to_file.path());
 }
 
-void DiskLocal::copyFile(const String & from_path, const String & to_path)
-{
-    Poco::File(disk_path + from_path).copyTo(disk_path + to_path);
-}
-
 std::unique_ptr<ReadBufferFromFileBase>
 DiskLocal::readFile(const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold) const
 {
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index d8d45290986..7dbfbe445f8 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -67,8 +67,6 @@ public:
 
     void replaceFile(const String & from_path, const String & to_path) override;
 
-    void copyFile(const String & from_path, const String & to_path) override;
-
     void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
 
     void listFiles(const String & path, std::vector<String> & file_names) override;
@@ -100,7 +98,7 @@ public:
 
     void truncateFile(const String & path, size_t size) override;
 
-    const String getType() const override { return "local"; }
+    DiskType::Type getType() const override { return DiskType::Type::Local; }
 
     SyncGuardPtr getDirectorySyncGuard(const String & path) const override;
 
diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp
index d2ed91a8263..a0905e67427 100644
--- a/src/Disks/DiskMemory.cpp
+++ b/src/Disks/DiskMemory.cpp
@@ -314,11 +314,6 @@ void DiskMemory::replaceFileImpl(const String & from_path, const String & to_pat
     files.insert(std::move(node));
 }
 
-void DiskMemory::copyFile(const String & /*from_path*/, const String & /*to_path*/)
-{
-    throw Exception("Method copyFile is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
-}
-
 std::unique_ptr<ReadBufferFromFileBase> DiskMemory::readFile(const String & path, size_t /*buf_size*/, size_t, size_t, size_t) const
 {
     std::lock_guard lock(mutex);
diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h
index 3ebc76661d4..29ac4919833 100644
--- a/src/Disks/DiskMemory.h
+++ b/src/Disks/DiskMemory.h
@@ -60,8 +60,6 @@ public:
 
     void replaceFile(const String & from_path, const String & to_path) override;
 
-    void copyFile(const String & from_path, const String & to_path) override;
-
     void listFiles(const String & path, std::vector<String> & file_names) override;
 
     std::unique_ptr<ReadBufferFromFileBase> readFile(
@@ -91,7 +89,7 @@ public:
 
     void truncateFile(const String & path, size_t size) override;
 
-    const String getType() const override { return "memory"; }
+    DiskType::Type getType() const override { return DiskType::Type::RAM; }
 
 private:
     void createDirectoriesImpl(const String & path);
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index f41490a0807..6f021346174 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -57,6 +57,29 @@ public:
 
 using SpacePtr = std::shared_ptr<Space>;
 
+struct DiskType
+{
+    enum class Type
+    {
+        Local,
+        RAM,
+        S3
+    };
+    static String toString(Type disk_type)
+    {
+        switch (disk_type)
+        {
+            case Type::Local:
+                return "local";
+            case Type::RAM:
+                return "memory";
+            case Type::S3:
+                return "s3";
+        }
+        __builtin_unreachable();
+    }
+};
+
 /**
  * A guard, that should synchronize file's or directory's state
  * with storage device (e.g. fsync in POSIX) in its destructor.
@@ -140,9 +163,6 @@ public:
     /// If a file with `to_path` path already exists, it will be replaced.
     virtual void replaceFile(const String & from_path, const String & to_path) = 0;
 
-    /// Copy the file from `from_path` to `to_path`.
-    virtual void copyFile(const String & from_path, const String & to_path) = 0;
-
     /// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`.
     virtual void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path);
 
@@ -191,7 +211,7 @@ public:
     virtual void truncateFile(const String & path, size_t size);
 
     /// Return disk type - "local", "s3", etc.
-    virtual const String getType() const = 0;
+    virtual DiskType::Type getType() const = 0;
 
     /// Invoked when Global Context is shutdown.
     virtual void shutdown() { }
@@ -199,6 +219,9 @@ public:
     /// Returns executor to perform asynchronous operations.
     virtual Executor & getExecutor() { return *executor; }
 
+    /// Invoked on partitions freeze query.
+    virtual void onFreeze(const String &) { }
+
     /// Returns guard, that insures synchronization of directory metadata with storage device.
     virtual SyncGuardPtr getDirectorySyncGuard(const String & path) const;
 
@@ -269,4 +292,11 @@ inline String fileName(const String & path)
 {
     return Poco::Path(path).getFileName();
 }
+
+/// Return directory path for the specified path.
+inline String directoryPath(const String & path)
+{
+    return Poco::Path(path).setFileName("").toString();
+}
+
 }
diff --git a/src/Disks/IStoragePolicy.h b/src/Disks/IStoragePolicy.h
new file mode 100644
index 00000000000..a41ea87c328
--- /dev/null
+++ b/src/Disks/IStoragePolicy.h
@@ -0,0 +1,62 @@
+#pragma once
+#include <memory>
+#include <vector>
+#include <common/types.h>
+
+namespace DB
+{
+class IStoragePolicy;
+using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
+class IVolume;
+using VolumePtr = std::shared_ptr<IVolume>;
+using Volumes = std::vector<VolumePtr>;
+class IDisk;
+using DiskPtr = std::shared_ptr<IDisk>;
+using Disks = std::vector<DiskPtr>;
+class IReservation;
+using ReservationPtr = std::unique_ptr<IReservation>;
+using Reservations = std::vector<ReservationPtr>;
+
+using String = std::string;
+
+class IStoragePolicy
+{
+public:
+    virtual ~IStoragePolicy() = default;
+    virtual const String & getName() const = 0;
+    virtual const Volumes & getVolumes() const = 0;
+    /// Returns number [0., 1.] -- fraction of free space on disk
+    /// which should be kept with help of background moves
+    virtual double getMoveFactor() const = 0;
+    virtual bool isDefaultPolicy() const = 0;
+    /// Returns disks ordered by volumes priority
+    virtual Disks getDisks() const = 0;
+    /// Returns any disk
+    /// Used when it's not important, for example for
+    /// mutations files
+    virtual DiskPtr getAnyDisk() const = 0;
+    virtual DiskPtr getDiskByName(const String & disk_name) const = 0;
+    /// Get free space from most free disk
+    virtual UInt64 getMaxUnreservedFreeSpace() const = 0;
+    /// Reserves space on any volume with index > min_volume_index or returns nullptr
+    virtual ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const = 0;
+    /// Returns valid reservation or nullptr
+    virtual ReservationPtr reserve(UInt64 bytes) const = 0;
+    /// Reserves space on any volume or throws
+    virtual ReservationPtr reserveAndCheck(UInt64 bytes) const = 0;
+    /// Reserves 0 bytes on disk with max available space
+    /// Do not use this function when it is possible to predict size.
+    virtual ReservationPtr makeEmptyReservationOnLargestDisk() const = 0;
+    /// Get volume by index.
+    virtual VolumePtr getVolume(size_t index) const = 0;
+    virtual VolumePtr getVolumeByName(const String & volume_name) const = 0;
+    /// Checks if storage policy can be replaced by another one.
+    virtual void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const = 0;
+    /// Find volume index, which contains disk
+    virtual size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const = 0;
+    /// Check if we have any volume with stopped merges
+    virtual bool hasAnyVolumeWithDisabledMerges() const = 0;
+    virtual bool containsVolume(const String & volume_name) const = 0;
+};
+
+}
diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index 238db98c9cc..3d91d5fbb78 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -23,6 +23,8 @@
 #include <aws/s3/model/CopyObjectRequest.h>
 #include <aws/s3/model/DeleteObjectsRequest.h>
 #include <aws/s3/model/GetObjectRequest.h>
+#include <aws/s3/model/ListObjectsV2Request.h>
+#include <aws/s3/model/HeadObjectRequest.h>
 
 #include <boost/algorithm/string.hpp>
 
@@ -32,12 +34,15 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int S3_ERROR;
     extern const int FILE_ALREADY_EXISTS;
     extern const int CANNOT_SEEK_THROUGH_FILE;
     extern const int UNKNOWN_FORMAT;
     extern const int INCORRECT_DISK_INDEX;
+    extern const int BAD_ARGUMENTS;
     extern const int PATH_ACCESS_DENIED;
     extern const int CANNOT_DELETE_DIRECTORY;
+    extern const int LOGICAL_ERROR;
 }
 
 
@@ -76,12 +81,12 @@ String getRandomName()
 }
 
 template <typename Result, typename Error>
-void throwIfError(Aws::Utils::Outcome<Result, Error> && response)
+void throwIfError(Aws::Utils::Outcome<Result, Error> & response)
 {
     if (!response.IsSuccess())
     {
         const auto & err = response.GetError();
-        throw Exception(err.GetMessage(), static_cast<int>(err.GetErrorType()));
+        throw Exception(std::to_string(static_cast<int>(err.GetErrorType())) + ": " + err.GetMessage(), ErrorCodes::S3_ERROR);
     }
 }
 
@@ -244,7 +249,7 @@ public:
         if (whence == SEEK_CUR)
         {
             /// If position within current working buffer - shift pos.
-            if (working_buffer.size() && size_t(getPosition() + offset_) < absolute_position)
+            if (!working_buffer.empty() && size_t(getPosition() + offset_) < absolute_position)
             {
                 pos += offset_;
                 return getPosition();
@@ -257,7 +262,7 @@ public:
         else if (whence == SEEK_SET)
         {
             /// If position within current working buffer - shift pos.
-            if (working_buffer.size() && size_t(offset_) >= absolute_position - working_buffer.size()
+            if (!working_buffer.empty() && size_t(offset_) >= absolute_position - working_buffer.size()
                 && size_t(offset_) < absolute_position)
             {
                 pos = working_buffer.end() - (absolute_position - offset_);
@@ -500,17 +505,17 @@ private:
     CurrentMetrics::Increment metric_increment;
 };
 
-/// Runs tasks asynchronously using global thread pool.
+/// Runs tasks asynchronously using thread pool.
 class AsyncExecutor : public Executor
 {
 public:
-    explicit AsyncExecutor() = default;
+    explicit AsyncExecutor(int thread_pool_size) : pool(ThreadPool(thread_pool_size)) { }
 
     std::future<void> execute(std::function<void()> task) override
     {
         auto promise = std::make_shared<std::promise<void>>();
 
-        GlobalThreadPool::instance().scheduleOrThrowOnError(
+        pool.scheduleOrThrowOnError(
             [promise, task]()
             {
                 try
@@ -531,6 +536,9 @@ public:
 
         return promise->get_future();
     }
+
+private:
+    ThreadPool pool;
 };
 
 
@@ -544,8 +552,10 @@ DiskS3::DiskS3(
     size_t min_upload_part_size_,
     size_t max_single_part_upload_size_,
     size_t min_bytes_for_seek_,
-    bool send_metadata_)
-    : IDisk(std::make_unique<AsyncExecutor>())
+    bool send_metadata_,
+    int thread_pool_size_,
+    int list_object_keys_size_)
+    : IDisk(std::make_unique<AsyncExecutor>(thread_pool_size_))
     , name(std::move(name_))
     , client(std::move(client_))
     , proxy_configuration(std::move(proxy_configuration_))
@@ -556,6 +566,8 @@ DiskS3::DiskS3(
     , max_single_part_upload_size(max_single_part_upload_size_)
     , min_bytes_for_seek(min_bytes_for_seek_)
     , send_metadata(send_metadata_)
+    , revision_counter(0)
+    , list_object_keys_size(list_object_keys_size_)
 {
 }
 
@@ -613,45 +625,31 @@ void DiskS3::moveFile(const String & from_path, const String & to_path)
 {
     if (exists(to_path))
         throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS);
+
+    if (send_metadata)
+    {
+        auto revision = ++revision_counter;
+        const DiskS3::ObjectMetadata object_metadata {
+            {"from_path", from_path},
+            {"to_path", to_path}
+        };
+        createFileOperationObject("rename", revision, object_metadata);
+    }
+
     Poco::File(metadata_path + from_path).renameTo(metadata_path + to_path);
 }
 
 void DiskS3::replaceFile(const String & from_path, const String & to_path)
 {
-    Poco::File from_file(metadata_path + from_path);
-    Poco::File to_file(metadata_path + to_path);
-    if (to_file.exists())
+    if (exists(to_path))
     {
-        Poco::File tmp_file(metadata_path + to_path + ".old");
-        to_file.renameTo(tmp_file.path());
-        from_file.renameTo(metadata_path + to_path);
-        removeFile(to_path + ".old");
+        const String tmp_path = to_path + ".old";
+        moveFile(to_path, tmp_path);
+        moveFile(from_path, to_path);
+        removeFile(tmp_path);
     }
     else
-        from_file.renameTo(to_file.path());
-}
-
-void DiskS3::copyFile(const String & from_path, const String & to_path)
-{
-    if (exists(to_path))
-        removeFile(to_path);
-
-    auto from = readMeta(from_path);
-    auto to = createMeta(to_path);
-
-    for (const auto & [path, size] : from.s3_objects)
-    {
-        auto new_path = getRandomName();
-        Aws::S3::Model::CopyObjectRequest req;
-        req.SetCopySource(bucket + "/" + s3_root_path + path);
-        req.SetBucket(bucket);
-        req.SetKey(s3_root_path + new_path);
-        throwIfError(client->CopyObject(req));
-
-        to.addObject(new_path, size);
-    }
-
-    to.save();
+        moveFile(from_path, to_path);
 }
 
 std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, size_t buf_size, size_t, size_t, size_t) const
@@ -673,7 +671,17 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
 
     /// Path to store new S3 object.
     auto s3_path = getRandomName();
-    auto object_metadata = createObjectMetadata(path);
+
+    std::optional<ObjectMetadata> object_metadata;
+    if (send_metadata)
+    {
+        auto revision = ++revision_counter;
+        object_metadata = {
+            {"path", path}
+        };
+        s3_path = "r" + revisionToString(revision) + "-file-" + s3_path;
+    }
+
     if (!exist || mode == WriteMode::Rewrite)
     {
         /// If metadata file exists - remove and create new.
@@ -777,7 +785,8 @@ void DiskS3::removeAws(const AwsS3KeyKeeper & keys)
             Aws::S3::Model::DeleteObjectsRequest request;
             request.SetBucket(bucket);
             request.SetDelete(delkeys);
-            throwIfError(client->DeleteObjects(request));
+            auto outcome = client->DeleteObjects(request);
+            throwIfError(outcome);
         }
     }
 }
@@ -852,6 +861,17 @@ Poco::Timestamp DiskS3::getLastModified(const String & path)
 
 void DiskS3::createHardLink(const String & src_path, const String & dst_path)
 {
+    /// We don't need to record hardlinks created to shadow folder.
+    if (send_metadata && !dst_path.starts_with("shadow/"))
+    {
+        auto revision = ++revision_counter;
+        const ObjectMetadata object_metadata {
+            {"src_path", src_path},
+            {"dst_path", dst_path}
+        };
+        createFileOperationObject("hardlink", revision, object_metadata);
+    }
+
     /// Increment number of references.
     auto src = readMeta(src_path);
     ++src.ref_count;
@@ -886,12 +906,368 @@ void DiskS3::shutdown()
     client->DisableRequestProcessing();
 }
 
-std::optional<DiskS3::ObjectMetadata> DiskS3::createObjectMetadata(const String & path) const
+void DiskS3::createFileOperationObject(const String & operation_name, UInt64 revision, const DiskS3::ObjectMetadata & metadata)
 {
-    if (send_metadata)
-        return (DiskS3::ObjectMetadata){{"path", path}};
+    const String key = "operations/r" + revisionToString(revision) + "-" + operation_name;
+    WriteBufferFromS3 buffer(client, bucket, s3_root_path + key, min_upload_part_size, max_single_part_upload_size, metadata);
+    buffer.write('0');
+    buffer.finalize();
+}
 
-    return {};
+void DiskS3::startup()
+{
+    if (!send_metadata)
+        return;
+
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting up disk {}", name);
+
+    /// Find last revision.
+    UInt64 l = 0, r = LATEST_REVISION;
+    while (l < r)
+    {
+        LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Check revision in bounds {}-{}", l, r);
+
+        auto revision = l + (r - l + 1) / 2;
+        auto revision_str = revisionToString(revision);
+
+        LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Check object with revision {}", revision);
+
+        /// Check file or operation with such revision exists.
+        if (checkObjectExists(s3_root_path + "r" + revision_str)
+            || checkObjectExists(s3_root_path + "operations/r" + revision_str))
+            l = revision;
+        else
+            r = revision - 1;
+    }
+    revision_counter = l;
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "Found last revision number {} for disk {}", revision_counter, name);
+}
+
+bool DiskS3::checkObjectExists(const String & prefix)
+{
+    Aws::S3::Model::ListObjectsV2Request request;
+    request.SetBucket(bucket);
+    request.SetPrefix(prefix);
+    request.SetMaxKeys(1);
+
+    auto outcome = client->ListObjectsV2(request);
+    throwIfError(outcome);
+
+    return !outcome.GetResult().GetContents().empty();
+}
+
+Aws::S3::Model::HeadObjectResult DiskS3::headObject(const String & source_bucket, const String & key)
+{
+    Aws::S3::Model::HeadObjectRequest request;
+    request.SetBucket(source_bucket);
+    request.SetKey(key);
+
+    auto outcome = client->HeadObject(request);
+    throwIfError(outcome);
+
+    return outcome.GetResultWithOwnership();
+}
+
+void DiskS3::listObjects(const String & source_bucket, const String & source_path, std::function<bool(const Aws::S3::Model::ListObjectsV2Result &)> callback)
+{
+    Aws::S3::Model::ListObjectsV2Request request;
+    request.SetBucket(source_bucket);
+    request.SetPrefix(source_path);
+    request.SetMaxKeys(list_object_keys_size);
+
+    Aws::S3::Model::ListObjectsV2Outcome outcome;
+    do
+    {
+        outcome = client->ListObjectsV2(request);
+        throwIfError(outcome);
+
+        bool should_continue = callback(outcome.GetResult());
+
+        if (!should_continue)
+            break;
+
+        request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
+    } while (outcome.GetResult().GetIsTruncated());
+}
+
+void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key)
+{
+    Aws::S3::Model::CopyObjectRequest request;
+    request.SetCopySource(src_bucket + "/" + src_key);
+    request.SetBucket(dst_bucket);
+    request.SetKey(dst_key);
+
+    auto outcome = client->CopyObject(request);
+    throwIfError(outcome);
+}
+
+struct DiskS3::RestoreInformation
+{
+    UInt64 revision = LATEST_REVISION;
+    String source_bucket;
+    String source_path;
+};
+
+void DiskS3::readRestoreInformation(DiskS3::RestoreInformation & restore_information)
+{
+    ReadBufferFromFile buffer(metadata_path + restore_file_name, 512);
+    buffer.next();
+
+    /// Empty file - just restore all metadata.
+    if (!buffer.hasPendingData())
+        return;
+
+    try
+    {
+        readIntText(restore_information.revision, buffer);
+        assertChar('\n', buffer);
+
+        if (!buffer.hasPendingData())
+            return;
+
+        readText(restore_information.source_bucket, buffer);
+        assertChar('\n', buffer);
+
+        if (!buffer.hasPendingData())
+            return;
+
+        readText(restore_information.source_path, buffer);
+        assertChar('\n', buffer);
+
+        if (buffer.hasPendingData())
+            throw Exception("Extra information at the end of restore file", ErrorCodes::UNKNOWN_FORMAT);
+    }
+    catch (const Exception & e)
+    {
+        throw Exception("Failed to read restore information", e, ErrorCodes::UNKNOWN_FORMAT);
+    }
+}
+
+void DiskS3::restore()
+{
+    if (!exists(restore_file_name))
+        return;
+
+    try
+    {
+        RestoreInformation information;
+        information.source_bucket = bucket;
+        information.source_path = s3_root_path;
+
+        readRestoreInformation(information);
+        if (information.revision == 0)
+            information.revision = LATEST_REVISION;
+        if (!information.source_path.ends_with('/'))
+            information.source_path += '/';
+
+        if (information.source_bucket == bucket)
+        {
+            /// In this case we need to additionally cleanup S3 from objects with later revision.
+            /// Will be simply just restore to different path.
+            if (information.source_path == s3_root_path && information.revision != LATEST_REVISION)
+                throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS);
+
+            /// This case complicates S3 cleanup in case of unsuccessful restore.
+            if (information.source_path != s3_root_path && s3_root_path.starts_with(information.source_path))
+                throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS);
+        }
+
+        ///TODO: Cleanup FS and bucket if previous restore was failed.
+
+        LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting to restore disk {}. Revision: {}, Source bucket: {}, Source path: {}",
+                 name, information.revision, information.source_bucket, information.source_path);
+
+        restoreFiles(information.source_bucket, information.source_path, information.revision);
+        restoreFileOperations(information.source_bucket, information.source_path, information.revision);
+
+        Poco::File restore_file(metadata_path + restore_file_name);
+        restore_file.remove();
+
+        LOG_INFO(&Poco::Logger::get("DiskS3"), "Restore disk {} finished", name);
+    }
+    catch (const Exception & e)
+    {
+        LOG_ERROR(&Poco::Logger::get("DiskS3"), "Failed to restore disk. Code: {}, e.displayText() = {}, Stack trace:\n\n{}", e.code(), e.displayText(), e.getStackTraceString());
+
+        throw;
+    }
+}
+
+void DiskS3::restoreFiles(const String & source_bucket, const String & source_path, UInt64 target_revision)
+{
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting restore files for disk {}", name);
+
+    std::vector<std::future<void>> results;
+    listObjects(source_bucket, source_path, [this, &source_bucket, &source_path, &target_revision, &results](auto list_result)
+    {
+        std::vector<String> keys;
+        for (const auto & row : list_result.GetContents())
+        {
+            const String & key = row.GetKey();
+
+            /// Skip file operations objects. They will be processed separately.
+            if (key.find("/operations/") != String::npos)
+                continue;
+
+            const auto [revision, _] = extractRevisionAndOperationFromKey(key);
+            /// Filter early if it's possible to get revision from key.
+            if (revision > target_revision)
+                continue;
+
+            keys.push_back(key);
+        }
+
+        if (!keys.empty())
+        {
+            auto result = getExecutor().execute([this, &source_bucket, &source_path, keys]()
+            {
+                processRestoreFiles(source_bucket, source_path, keys);
+            });
+
+            results.push_back(std::move(result));
+        }
+
+        return true;
+    });
+
+    for (auto & result : results)
+        result.wait();
+    for (auto & result : results)
+        result.get();
+
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "Files are restored for disk {}", name);
+}
+
+void DiskS3::processRestoreFiles(const String & source_bucket, const String & source_path, Strings keys)
+{
+    for (const auto & key : keys)
+    {
+        auto head_result = headObject(source_bucket, key);
+        auto object_metadata = head_result.GetMetadata();
+
+        /// Restore file if object has 'path' in metadata.
+        auto path_entry = object_metadata.find("path");
+        if (path_entry == object_metadata.end())
+            throw Exception("Failed to restore key " + key + " because it doesn't have 'path' in metadata", ErrorCodes::S3_ERROR);
+
+        const auto & path = path_entry->second;
+
+        createDirectories(directoryPath(path));
+        auto metadata = createMeta(path);
+        auto relative_key = shrinkKey(source_path, key);
+
+        /// Copy object if we restore to different bucket / path.
+        if (bucket != source_bucket || s3_root_path != source_path)
+            copyObject(source_bucket, key, bucket, s3_root_path + relative_key);
+
+        metadata.addObject(relative_key, head_result.GetContentLength());
+        metadata.save();
+
+        LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Restored file {}", path);
+    }
+}
+
+void DiskS3::restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 target_revision)
+{
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting restore file operations for disk {}", name);
+
+    /// Enable recording file operations if we restore to different bucket / path.
+    send_metadata = bucket != source_bucket || s3_root_path != source_path;
+
+    listObjects(source_bucket, source_path + "operations/", [this, &source_bucket, &target_revision](auto list_result)
+    {
+        const String rename = "rename";
+        const String hardlink = "hardlink";
+
+        for (const auto & row : list_result.GetContents())
+        {
+            const String & key = row.GetKey();
+
+            const auto [revision, operation] = extractRevisionAndOperationFromKey(key);
+            if (revision == UNKNOWN_REVISION)
+            {
+                LOG_WARNING(&Poco::Logger::get("DiskS3"), "Skip key {} with unknown revision", key);
+                continue;
+            }
+
+            /// S3 ensures that keys will be listed in ascending UTF-8 bytes order (revision order).
+            /// We can stop processing if revision of the object is already more than required.
+            if (revision > target_revision)
+                return false;
+
+            /// Keep original revision if restore to different bucket / path.
+            if (send_metadata)
+                revision_counter = revision - 1;
+
+            auto object_metadata = headObject(source_bucket, key).GetMetadata();
+            if (operation == rename)
+            {
+                auto from_path = object_metadata["from_path"];
+                auto to_path = object_metadata["to_path"];
+                if (exists(from_path))
+                {
+                    moveFile(from_path, to_path);
+                    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Revision {}. Restored rename {} -> {}", revision, from_path, to_path);
+                }
+            }
+            else if (operation == hardlink)
+            {
+                auto src_path = object_metadata["src_path"];
+                auto dst_path = object_metadata["dst_path"];
+                if (exists(src_path))
+                {
+                    createDirectories(directoryPath(dst_path));
+                    createHardLink(src_path, dst_path);
+                    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path);
+                }
+            }
+        }
+
+        return true;
+    });
+
+    send_metadata = true;
+
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "File operations restored for disk {}", name);
+}
+
+std::tuple<UInt64, String> DiskS3::extractRevisionAndOperationFromKey(const String & key)
+{
+    UInt64 revision = UNKNOWN_REVISION;
+    String operation;
+
+    re2::RE2::FullMatch(key, key_regexp, &revision, &operation);
+
+    return {revision, operation};
+}
+
+String DiskS3::shrinkKey(const String & path, const String & key)
+{
+    if (!key.starts_with(path))
+        throw Exception("The key " + key + " prefix mismatch with given " + path, ErrorCodes::LOGICAL_ERROR);
+
+    return key.substr(path.length());
+}
+
+String DiskS3::revisionToString(UInt64 revision)
+{
+    static constexpr size_t max_digits = 19; /// UInt64 max digits in decimal representation.
+
+    /// Align revision number with leading zeroes to have strict lexicographical order of them.
+    auto revision_str = std::to_string(revision);
+    auto digits_to_align = max_digits - revision_str.length();
+    for (size_t i = 0; i < digits_to_align; ++i)
+        revision_str = "0" + revision_str;
+
+    return revision_str;
+}
+
+void DiskS3::onFreeze(const String & path)
+{
+    createDirectories(path);
+    WriteBufferFromFile revision_file_buf(metadata_path + path + "revision.txt", 32);
+    writeIntText(revision_counter.load(), revision_file_buf);
+    revision_file_buf.finalize();
 }
 
 }
diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h
index 3dbd9029fb2..5182ae4801b 100644
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@@ -1,11 +1,16 @@
 #pragma once
 
+#include <atomic>
 #include "Disks/DiskFactory.h"
 #include "Disks/Executor.h"
 #include "ProxyConfiguration.h"
 
 #include <aws/s3/S3Client.h>
+#include <aws/s3/model/HeadObjectResult.h>
+#include <aws/s3/model/ListObjectsV2Result.h>
+
 #include <Poco/DirectoryIterator.h>
+#include <re2/re2.h>
 
 
 namespace DB
@@ -25,6 +30,7 @@ public:
 
     class AwsS3KeyKeeper;
     struct Metadata;
+    struct RestoreInformation;
 
     DiskS3(
         String name_,
@@ -36,7 +42,9 @@ public:
         size_t min_upload_part_size_,
         size_t max_single_part_upload_size_,
         size_t min_bytes_for_seek_,
-        bool send_metadata_);
+        bool send_metadata_,
+        int thread_pool_size_,
+        int list_object_keys_size_);
 
     const String & getName() const override { return name; }
 
@@ -74,8 +82,6 @@ public:
 
     void replaceFile(const String & from_path, const String & to_path) override;
 
-    void copyFile(const String & from_path, const String & to_path) override;
-
     void listFiles(const String & path, std::vector<String> & file_names) override;
 
     std::unique_ptr<ReadBufferFromFileBase> readFile(
@@ -105,22 +111,47 @@ public:
 
     void setReadOnly(const String & path) override;
 
-    const String getType() const override { return "s3"; }
+    DiskType::Type getType() const override { return DiskType::Type::S3; }
 
     void shutdown() override;
 
+    /// Actions performed after disk creation.
+    void startup();
+
+    /// Restore S3 metadata files on file system.
+    void restore();
+
+    /// Dumps current revision counter into file 'revision.txt' at given path.
+    void onFreeze(const String & path) override;
+
 private:
     bool tryReserve(UInt64 bytes);
 
     void removeMeta(const String & path, AwsS3KeyKeeper & keys);
     void removeMetaRecursive(const String & path, AwsS3KeyKeeper & keys);
     void removeAws(const AwsS3KeyKeeper & keys);
-    std::optional<ObjectMetadata> createObjectMetadata(const String & path) const;
 
     Metadata readMeta(const String & path) const;
     Metadata createMeta(const String & path) const;
 
-private:
+    void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectMetadata & metadata);
+    static String revisionToString(UInt64 revision);
+
+    bool checkObjectExists(const String & prefix);
+    Aws::S3::Model::HeadObjectResult headObject(const String & source_bucket, const String & key);
+    void listObjects(const String & source_bucket, const String & source_path, std::function<bool(const Aws::S3::Model::ListObjectsV2Result &)> callback);
+    void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key);
+
+    void readRestoreInformation(RestoreInformation & restore_information);
+    void restoreFiles(const String & source_bucket, const String & source_path, UInt64 target_revision);
+    void processRestoreFiles(const String & source_bucket, const String & source_path, std::vector<String> keys);
+    void restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 target_revision);
+
+    /// Remove 'path' prefix from 'key' to get relative key.
+    /// It's needed to store keys to metadata files in RELATIVE_PATHS version.
+    static String shrinkKey(const String & path, const String & key);
+    std::tuple<UInt64, String> extractRevisionAndOperationFromKey(const String & key);
+
     const String name;
     std::shared_ptr<Aws::S3::S3Client> client;
     std::shared_ptr<S3::ProxyConfiguration> proxy_configuration;
@@ -135,6 +166,18 @@ private:
     UInt64 reserved_bytes = 0;
     UInt64 reservation_count = 0;
     std::mutex reservation_mutex;
+
+    std::atomic<UInt64> revision_counter;
+    static constexpr UInt64 LATEST_REVISION = (static_cast<UInt64>(1)) << 63;
+    static constexpr UInt64 UNKNOWN_REVISION = 0;
+
+    /// File at path {metadata_path}/restore contains metadata restore information
+    const String restore_file_name = "restore";
+    /// The number of keys listed in one request (1000 is max value)
+    int list_object_keys_size;
+
+    /// Key has format: ../../r{revision}-{operation}
+    const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+).*"};
 };
 
 }
diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp
index f9eddebdf88..3ce2f909760 100644
--- a/src/Disks/S3/registerDiskS3.cpp
+++ b/src/Disks/S3/registerDiskS3.cpp
@@ -7,6 +7,7 @@
 #include "DiskS3.h"
 #include "Disks/DiskCacheWrapper.h"
 #include "Disks/DiskFactory.h"
+#include "Storages/StorageS3Settings.h"
 #include "ProxyConfiguration.h"
 #include "ProxyListConfiguration.h"
 #include "ProxyResolverConfiguration.h"
@@ -137,6 +138,8 @@ void registerDiskS3(DiskFactory & factory)
             uri.is_virtual_hosted_style,
             config.getString(config_prefix + ".access_key_id", ""),
             config.getString(config_prefix + ".secret_access_key", ""),
+            config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""),
+            {},
             config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false))
         );
 
@@ -152,7 +155,9 @@ void registerDiskS3(DiskFactory & factory)
             context.getSettingsRef().s3_min_upload_part_size,
             context.getSettingsRef().s3_max_single_part_upload_size,
             config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
-            config.getBool(config_prefix + ".send_object_metadata", false));
+            config.getBool(config_prefix + ".send_metadata", false),
+            config.getInt(config_prefix + ".thread_pool_size", 16),
+            config.getInt(config_prefix + ".list_object_keys_size", 1000));
 
         /// This code is used only to check access to the corresponding disk.
         if (!config.getBool(config_prefix + ".skip_access_check", false))
@@ -162,6 +167,9 @@ void registerDiskS3(DiskFactory & factory)
             checkRemoveAccess(*s3disk);
         }
 
+        s3disk->restore();
+        s3disk->startup();
+
         bool cache_enabled = config.getBool(config_prefix + ".cache_enabled", true);
 
         if (cache_enabled)
diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp
index e3a937cae55..a1345879c83 100644
--- a/src/Disks/StoragePolicy.cpp
+++ b/src/Disks/StoragePolicy.cpp
@@ -93,17 +93,17 @@ StoragePolicy::StoragePolicy(String name_, Volumes volumes_, double move_factor_
 }
 
 
-StoragePolicy::StoragePolicy(const StoragePolicy & storage_policy,
+StoragePolicy::StoragePolicy(StoragePolicyPtr storage_policy,
         const Poco::Util::AbstractConfiguration & config,
         const String & config_prefix,
         DiskSelectorPtr disks)
-    : StoragePolicy(storage_policy.getName(), config, config_prefix, disks)
+    : StoragePolicy(storage_policy->getName(), config, config_prefix, disks)
 {
     for (auto & volume : volumes)
     {
-        if (storage_policy.volume_index_by_volume_name.count(volume->getName()) > 0)
+        if (storage_policy->containsVolume(volume->getName()))
         {
-            auto old_volume = storage_policy.getVolumeByName(volume->getName());
+            auto old_volume = storage_policy->getVolumeByName(volume->getName());
             try
             {
                 auto new_volume = updateVolumeFromConfig(old_volume, config, config_prefix + ".volumes." + volume->getName(), disks);
@@ -112,7 +112,7 @@ StoragePolicy::StoragePolicy(const StoragePolicy & storage_policy,
             catch (Exception & e)
             {
                 /// Default policies are allowed to be missed in configuration.
-                if (e.code() != ErrorCodes::NO_ELEMENTS_IN_CONFIG || storage_policy.getName() != DEFAULT_STORAGE_POLICY_NAME)
+                if (e.code() != ErrorCodes::NO_ELEMENTS_IN_CONFIG || storage_policy->getName() != DEFAULT_STORAGE_POLICY_NAME)
                     throw;
 
                 Poco::Util::AbstractConfiguration::Keys keys;
@@ -331,6 +331,11 @@ bool StoragePolicy::hasAnyVolumeWithDisabledMerges() const
     return false;
 }
 
+bool StoragePolicy::containsVolume(const String & volume_name) const
+{
+    return volume_index_by_volume_name.contains(volume_name);
+}
+
 StoragePolicySelector::StoragePolicySelector(
     const Poco::Util::AbstractConfiguration & config,
     const String & config_prefix,
@@ -345,6 +350,13 @@ StoragePolicySelector::StoragePolicySelector(
             throw Exception(
                 "Storage policy name can contain only alphanumeric and '_' (" + backQuote(name) + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
 
+        /*
+         * A customization point for StoragePolicy, here one can add his own policy, for example, based on policy's name
+         * if (name == "MyCustomPolicy")
+         *      policies.emplace(name, std::make_shared<CustomPolicy>(name, config, config_prefix + "." + name, disks));
+         *  else
+         */
+
         policies.emplace(name, std::make_shared<StoragePolicy>(name, config, config_prefix + "." + name, disks));
         LOG_INFO(&Poco::Logger::get("StoragePolicySelector"), "Storage policy {} loaded", backQuote(name));
     }
@@ -374,7 +386,7 @@ StoragePolicySelectorPtr StoragePolicySelector::updateFromConfig(const Poco::Uti
     /// Second pass, load.
     for (const auto & [name, policy] : policies)
     {
-        result->policies[name] = std::make_shared<StoragePolicy>(*policy, config, config_prefix + "." + name, disks);
+        result->policies[name] = std::make_shared<StoragePolicy>(policy, config, config_prefix + "." + name, disks);
     }
 
     return result;
diff --git a/src/Disks/StoragePolicy.h b/src/Disks/StoragePolicy.h
index 9135c27d1c0..6676ab19043 100644
--- a/src/Disks/StoragePolicy.h
+++ b/src/Disks/StoragePolicy.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <Disks/IStoragePolicy.h>
 #include <Disks/DiskSelector.h>
 #include <Disks/IDisk.h>
 #include <Disks/IVolume.h>
@@ -23,14 +24,11 @@
 namespace DB
 {
 
-class StoragePolicy;
-using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
-
 /**
  * Contains all information about volumes configuration for Storage.
  * Can determine appropriate Volume and Disk for each reservation.
  */
-class StoragePolicy
+class StoragePolicy : public IStoragePolicy
 {
 public:
     StoragePolicy(String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks);
@@ -38,62 +36,63 @@ public:
     StoragePolicy(String name_, Volumes volumes_, double move_factor_);
 
     StoragePolicy(
-        const StoragePolicy & storage_policy,
+        StoragePolicyPtr storage_policy,
         const Poco::Util::AbstractConfiguration & config,
         const String & config_prefix,
         DiskSelectorPtr disks
     );
 
-    bool isDefaultPolicy() const;
+    bool isDefaultPolicy() const override;
 
     /// Returns disks ordered by volumes priority
-    Disks getDisks() const;
+    Disks getDisks() const override;
 
     /// Returns any disk
     /// Used when it's not important, for example for
     /// mutations files
-    DiskPtr getAnyDisk() const;
+    DiskPtr getAnyDisk() const override;
 
-    DiskPtr getDiskByName(const String & disk_name) const;
+    DiskPtr getDiskByName(const String & disk_name) const override;
 
     /// Get free space from most free disk
-    UInt64 getMaxUnreservedFreeSpace() const;
+    UInt64 getMaxUnreservedFreeSpace() const override;
 
-    const String & getName() const { return name; }
+    const String & getName() const override{ return name; }
 
     /// Returns valid reservation or nullptr
-    ReservationPtr reserve(UInt64 bytes) const;
+    ReservationPtr reserve(UInt64 bytes) const override;
 
     /// Reserves space on any volume or throws
-    ReservationPtr reserveAndCheck(UInt64 bytes) const;
+    ReservationPtr reserveAndCheck(UInt64 bytes) const override;
 
     /// Reserves space on any volume with index > min_volume_index or returns nullptr
-    ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const;
+    ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const override;
 
     /// Find volume index, which contains disk
-    size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const;
+    size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const override;
 
     /// Reserves 0 bytes on disk with max available space
     /// Do not use this function when it is possible to predict size.
-    ReservationPtr makeEmptyReservationOnLargestDisk() const;
+    ReservationPtr makeEmptyReservationOnLargestDisk() const override;
 
-    const Volumes & getVolumes() const { return volumes; }
+    const Volumes & getVolumes() const  override{ return volumes; }
 
     /// Returns number [0., 1.] -- fraction of free space on disk
     /// which should be kept with help of background moves
-    double getMoveFactor() const { return move_factor; }
+    double getMoveFactor() const  override{ return move_factor; }
 
     /// Get volume by index.
-    VolumePtr getVolume(size_t index) const;
+    VolumePtr getVolume(size_t index) const override;
 
-    VolumePtr getVolumeByName(const String & volume_name) const;
+    VolumePtr getVolumeByName(const String & volume_name) const override;
 
     /// Checks if storage policy can be replaced by another one.
-    void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const;
+    void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const override;
 
     /// Check if we have any volume with stopped merges
-    bool hasAnyVolumeWithDisabledMerges() const;
+    bool hasAnyVolumeWithDisabledMerges() const override;
 
+    bool containsVolume(const String & volume_name) const override;
 private:
     Volumes volumes;
     const String name;
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 86cf12fbf68..f7f32cf9b6f 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -5,7 +5,6 @@
 #include <Interpreters/Context.h>
 #include <Core/Settings.h>
 #include <DataStreams/MaterializingBlockOutputStream.h>
-#include <DataStreams/SquashingBlockOutputStream.h>
 #include <DataStreams/NativeBlockInputStream.h>
 #include <Formats/FormatSettings.h>
 #include <Processors/Formats/IRowInputFormat.h>
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 3f031fa2311..c1f02c65748 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -120,7 +120,6 @@ struct FormatSettings
 
     struct
     {
-        bool write_row_delimiters = true;
         /**
          * Some buffers (kafka / rabbit) split the rows internally using callback,
          * and always send one row per message, so we can push there formats
@@ -128,7 +127,7 @@ struct FormatSettings
          * we have to enforce exporting at most one row in the format output,
          * because Protobuf without delimiters is not generally useful.
          */
-        bool allow_many_rows_no_delimiters = false;
+        bool allow_multiple_rows_without_delimiter = false;
     } protobuf;
 
     struct
diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp
index 6017f3983c6..28ba625d9fb 100644
--- a/src/Formats/JSONEachRowUtils.cpp
+++ b/src/Formats/JSONEachRowUtils.cpp
@@ -3,6 +3,11 @@
 
 namespace DB
 {
+namespace ErrorCodes
+{
+    extern const int INCORRECT_DATA;
+    extern const int LOGICAL_ERROR;
+}
 
 std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
 {
@@ -15,10 +20,18 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
 
     while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast<size_t>(pos - in.position()) < min_chunk_size))
     {
+        const auto current_object_size = memory.size() + static_cast<size_t>(pos - in.position());
+        if (current_object_size > 10 * min_chunk_size)
+            throw ParsingException("Size of JSON object is extremely large. Expected not greater than " +
+            std::to_string(min_chunk_size) + " bytes, but current is " + std::to_string(current_object_size) +
+            " bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed", ErrorCodes::INCORRECT_DATA);
+
         if (quotes)
         {
             pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end());
-            if (pos == in.buffer().end())
+            if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+            else if (pos == in.buffer().end())
                 continue;
             if (*pos == '\\')
             {
@@ -35,9 +48,11 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
         else
         {
             pos = find_first_symbols<'{', '}', '\\', '"'>(pos, in.buffer().end());
-            if (pos == in.buffer().end())
+            if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+            else if (pos == in.buffer().end())
                 continue;
-            if (*pos == '{')
+            else if (*pos == '{')
             {
                 ++balance;
                 ++pos;
diff --git a/src/Formats/MySQLBlockInputStream.cpp b/src/Formats/MySQLBlockInputStream.cpp
index 5a1af418b7d..87df0c1f4b1 100644
--- a/src/Formats/MySQLBlockInputStream.cpp
+++ b/src/Formats/MySQLBlockInputStream.cpp
@@ -146,20 +146,32 @@ Block MySQLBlockInputStream::readImpl()
             const auto value = row[position_mapping[index]];
             const auto & sample = description.sample_block.getByPosition(index);
 
+            bool is_type_nullable = description.types[index].second;
+
             if (!value.isNull())
             {
-                if (description.types[index].second)
+                if (is_type_nullable)
                 {
                     ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
                     const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
                     insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value);
-                    column_nullable.getNullMapData().emplace_back(0);
+                    column_nullable.getNullMapData().emplace_back(false);
                 }
                 else
+                {
                     insertValue(*sample.type, *columns[index], description.types[index].first, value);
+                }
             }
             else
+            {
                 insertDefaultValue(*columns[index], *sample.column);
+
+                if (is_type_nullable)
+                {
+                    ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
+                    column_nullable.getNullMapData().back() = true;
+                }
+            }
         }
 
         ++num_rows;
diff --git a/src/Formats/ProtobufColumnMatcher.cpp b/src/Formats/ProtobufColumnMatcher.cpp
deleted file mode 100644
index f4803d1af10..00000000000
--- a/src/Formats/ProtobufColumnMatcher.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-#include "ProtobufColumnMatcher.h"
-#if USE_PROTOBUF
-#include <Common/Exception.h>
-#include <google/protobuf/descriptor.pb.h>
-#include <Poco/String.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA;
-}
-
-
-namespace
-{
-    String columnNameToSearchableForm(const String & str)
-    {
-        return Poco::replace(Poco::toUpper(str), ".", "_");
-    }
-}
-
-namespace ProtobufColumnMatcher
-{
-    namespace details
-    {
-        ColumnNameMatcher::ColumnNameMatcher(const std::vector<String> & column_names) : column_usage(column_names.size())
-        {
-            column_usage.resize(column_names.size(), false);
-            for (size_t i = 0; i != column_names.size(); ++i)
-                column_name_to_index_map.emplace(columnNameToSearchableForm(column_names[i]), i);
-        }
-
-        size_t ColumnNameMatcher::findColumn(const String & field_name)
-        {
-            auto it = column_name_to_index_map.find(columnNameToSearchableForm(field_name));
-            if (it == column_name_to_index_map.end())
-                return -1;
-            size_t column_index = it->second;
-            if (column_usage[column_index])
-                return -1;
-            column_usage[column_index] = true;
-            return column_index;
-        }
-
-        void throwNoCommonColumns()
-        {
-            throw Exception("No common columns with provided protobuf schema", ErrorCodes::NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA);
-        }
-    }
-}
-
-}
-#endif
diff --git a/src/Formats/ProtobufColumnMatcher.h b/src/Formats/ProtobufColumnMatcher.h
deleted file mode 100644
index 35521be7a9b..00000000000
--- a/src/Formats/ProtobufColumnMatcher.h
+++ /dev/null
@@ -1,196 +0,0 @@
-#pragma once
-
-#if !defined(ARCADIA_BUILD)
-#    include "config_formats.h"
-#endif
-
-#if USE_PROTOBUF
-#    include <memory>
-#    include <unordered_map>
-#    include <vector>
-#    include <common/types.h>
-#    include <boost/blank.hpp>
-#    include <google/protobuf/descriptor.h>
-#    include <google/protobuf/descriptor.pb.h>
-
-namespace google
-{
-namespace protobuf
-{
-    class Descriptor;
-    class FieldDescriptor;
-}
-}
-
-
-namespace DB
-{
-namespace ProtobufColumnMatcher
-{
-    struct DefaultTraits
-    {
-        using MessageData = boost::blank;
-        using FieldData = boost::blank;
-    };
-
-    template <typename Traits = DefaultTraits>
-    struct Message;
-
-    /// Represents a field in a protobuf message.
-    template <typename Traits = DefaultTraits>
-    struct Field
-    {
-        const google::protobuf::FieldDescriptor * field_descriptor = nullptr;
-
-        /// Same as field_descriptor->number().
-        UInt32 field_number = 0;
-
-        /// Index of a column; either 'column_index' or 'nested_message' is set.
-        size_t column_index = -1;
-        std::unique_ptr<Message<Traits>> nested_message;
-
-        typename Traits::FieldData data;
-    };
-
-    /// Represents a protobuf message.
-    template <typename Traits>
-    struct Message
-    {
-        std::vector<Field<Traits>> fields;
-
-        /// Points to the parent message if this is a nested message.
-        Message * parent = nullptr;
-        size_t index_in_parent = -1;
-
-        typename Traits::MessageData data;
-    };
-
-    /// Utility function finding matching columns for each protobuf field.
-    template <typename Traits = DefaultTraits>
-    static std::unique_ptr<Message<Traits>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type);
-
-    template <typename Traits = DefaultTraits>
-    static std::unique_ptr<Message<Traits>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type,
-        std::vector<const google::protobuf::FieldDescriptor *> & field_descriptors_without_match);
-
-    namespace details
-    {
-        [[noreturn]] void throwNoCommonColumns();
-
-        class ColumnNameMatcher
-        {
-        public:
-            ColumnNameMatcher(const std::vector<String> & column_names);
-            size_t findColumn(const String & field_name);
-
-        private:
-            std::unordered_map<String, size_t> column_name_to_index_map;
-            std::vector<bool> column_usage;
-        };
-
-        template <typename Traits>
-        std::unique_ptr<Message<Traits>> matchColumnsRecursive(
-            ColumnNameMatcher & name_matcher,
-            const google::protobuf::Descriptor * message_type,
-            const String & field_name_prefix,
-            std::vector<const google::protobuf::FieldDescriptor *> * field_descriptors_without_match)
-        {
-            auto message = std::make_unique<Message<Traits>>();
-            for (int i = 0; i != message_type->field_count(); ++i)
-            {
-                const google::protobuf::FieldDescriptor * field_descriptor = message_type->field(i);
-                if ((field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_MESSAGE)
-                    || (field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_GROUP))
-                {
-                    auto nested_message = matchColumnsRecursive<Traits>(
-                        name_matcher,
-                        field_descriptor->message_type(),
-                        field_name_prefix + field_descriptor->name() + ".",
-                        field_descriptors_without_match);
-                    if (nested_message)
-                    {
-                        message->fields.emplace_back();
-                        auto & current_field = message->fields.back();
-                        current_field.field_number = field_descriptor->number();
-                        current_field.field_descriptor = field_descriptor;
-                        current_field.nested_message = std::move(nested_message);
-                        current_field.nested_message->parent = message.get();
-                    }
-                }
-                else
-                {
-                    size_t column_index = name_matcher.findColumn(field_name_prefix + field_descriptor->name());
-                    if (column_index == static_cast<size_t>(-1))
-                    {
-                        if (field_descriptors_without_match)
-                            field_descriptors_without_match->emplace_back(field_descriptor);
-                    }
-                    else
-                    {
-                        message->fields.emplace_back();
-                        auto & current_field = message->fields.back();
-                        current_field.field_number = field_descriptor->number();
-                        current_field.field_descriptor = field_descriptor;
-                        current_field.column_index = column_index;
-                    }
-                }
-            }
-
-            if (message->fields.empty())
-                return nullptr;
-
-            // Columns should be sorted by field_number, it's necessary for writing protobufs and useful reading protobufs.
-            std::sort(message->fields.begin(), message->fields.end(), [](const Field<Traits> & left, const Field<Traits> & right)
-            {
-                return left.field_number < right.field_number;
-            });
-
-            for (size_t i = 0; i != message->fields.size(); ++i)
-            {
-                auto & field = message->fields[i];
-                if (field.nested_message)
-                    field.nested_message->index_in_parent = i;
-            }
-
-            return message;
-        }
-    }
-
-    template <typename Data>
-    static std::unique_ptr<Message<Data>> matchColumnsImpl(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type,
-        std::vector<const google::protobuf::FieldDescriptor *> * field_descriptors_without_match)
-    {
-        details::ColumnNameMatcher name_matcher(column_names);
-        auto message = details::matchColumnsRecursive<Data>(name_matcher, message_type, "", field_descriptors_without_match);
-        if (!message)
-            details::throwNoCommonColumns();
-        return message;
-    }
-
-    template <typename Data>
-    static std::unique_ptr<Message<Data>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type)
-    {
-        return matchColumnsImpl<Data>(column_names, message_type, nullptr);
-    }
-
-    template <typename Data>
-    static std::unique_ptr<Message<Data>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type,
-        std::vector<const google::protobuf::FieldDescriptor *> & field_descriptors_without_match)
-    {
-        return matchColumnsImpl<Data>(column_names, message_type, &field_descriptors_without_match);
-    }
-}
-
-}
-
-#endif
diff --git a/src/Formats/ProtobufReader.cpp b/src/Formats/ProtobufReader.cpp
index 8f28d279c06..0e05b59badf 100644
--- a/src/Formats/ProtobufReader.cpp
+++ b/src/Formats/ProtobufReader.cpp
@@ -1,14 +1,7 @@
 #include "ProtobufReader.h"
 
 #if USE_PROTOBUF
-#    include <optional>
-#    include <AggregateFunctions/IAggregateFunction.h>
-#    include <DataTypes/DataTypesDecimal.h>
-#    include <IO/ReadBufferFromString.h>
-#    include <IO/ReadHelpers.h>
-#    include <IO/WriteBufferFromVector.h>
-#    include <IO/WriteHelpers.h>
-#    include <boost/numeric/conversion/cast.hpp>
+#   include <IO/ReadHelpers.h>
 
 
 namespace DB
@@ -16,7 +9,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int UNKNOWN_PROTOBUF_FORMAT;
-    extern const int PROTOBUF_BAD_CAST;
 }
 
 
@@ -41,36 +33,21 @@ namespace
     constexpr Int64 END_OF_FILE = -3;
 
     Int64 decodeZigZag(UInt64 n) { return static_cast<Int64>((n >> 1) ^ (~(n & 1) + 1)); }
-
 }
 
 
-// SimpleReader is an utility class to deserialize protobufs.
-// Knows nothing about protobuf schemas, just provides useful functions to deserialize data.
-ProtobufReader::SimpleReader::SimpleReader(ReadBuffer & in_, const bool use_length_delimiters_)
+ProtobufReader::ProtobufReader(ReadBuffer & in_)
     : in(in_)
-    , cursor(0)
-    , current_message_level(0)
-    , current_message_end(0)
-    , field_end(0)
-    , last_string_pos(-1)
-    , use_length_delimiters(use_length_delimiters_)
 {
 }
 
-[[noreturn]] void ProtobufReader::SimpleReader::throwUnknownFormat() const
-{
-    throw Exception(std::string("Protobuf messages are corrupted or don't match the provided schema.") + (use_length_delimiters ? " Please note that Protobuf stream is length-delimited: every message is prefixed by its length in varint." : ""), ErrorCodes::UNKNOWN_PROTOBUF_FORMAT);
-}
-
-bool ProtobufReader::SimpleReader::startMessage()
+void ProtobufReader::startMessage(bool with_length_delimiter_)
 {
     // Start reading a root message.
     assert(!current_message_level);
-    if (unlikely(in.eof()))
-        return false;
 
-    if (use_length_delimiters)
+    root_message_has_length_delimiter = with_length_delimiter_;
+    if (root_message_has_length_delimiter)
     {
         size_t size_of_message = readVarint();
         current_message_end = cursor + size_of_message;
@@ -80,11 +57,11 @@ bool ProtobufReader::SimpleReader::startMessage()
         current_message_end = END_OF_FILE;
     }
     ++current_message_level;
+    field_number = next_field_number = 0;
     field_end = cursor;
-    return true;
 }
 
-void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
+void ProtobufReader::endMessage(bool ignore_errors)
 {
     if (!current_message_level)
         return;
@@ -94,6 +71,8 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
     {
         if (cursor < root_message_end)
             ignore(root_message_end - cursor);
+        else if (root_message_end == END_OF_FILE)
+            ignoreAll();
         else if (ignore_errors)
             moveCursorBackward(cursor - root_message_end);
         else
@@ -104,7 +83,7 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
     parent_message_ends.clear();
 }
 
-void ProtobufReader::SimpleReader::startNestedMessage()
+void ProtobufReader::startNestedMessage()
 {
     assert(current_message_level >= 1);
     if ((cursor > field_end) && (field_end != END_OF_GROUP))
@@ -115,10 +94,11 @@ void ProtobufReader::SimpleReader::startNestedMessage()
     parent_message_ends.emplace_back(current_message_end);
     current_message_end = field_end;
     ++current_message_level;
+    field_number = next_field_number = 0;
     field_end = cursor;
 }
 
-void ProtobufReader::SimpleReader::endNestedMessage()
+void ProtobufReader::endNestedMessage()
 {
     assert(current_message_level >= 2);
     if (cursor != current_message_end)
@@ -137,12 +117,20 @@ void ProtobufReader::SimpleReader::endNestedMessage()
     --current_message_level;
     current_message_end = parent_message_ends.back();
     parent_message_ends.pop_back();
+    field_number = next_field_number = 0;
     field_end = cursor;
 }
 
-bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
+bool ProtobufReader::readFieldNumber(int & field_number_)
 {
     assert(current_message_level);
+    if (next_field_number)
+    {
+        field_number_ = field_number = next_field_number;
+        next_field_number = 0;
+        return true;
+    }
+
     if (field_end != cursor)
     {
         if (field_end == END_OF_VARINT)
@@ -183,7 +171,8 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
     if (unlikely(varint & (static_cast<UInt64>(0xFFFFFFFF) << 32)))
         throwUnknownFormat();
     UInt32 key = static_cast<UInt32>(varint);
-    field_number = (key >> 3);
+    field_number_ = field_number = (key >> 3);
+    next_field_number = 0;
     WireType wire_type = static_cast<WireType>(key & 0x07);
     switch (wire_type)
     {
@@ -224,77 +213,91 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
     throwUnknownFormat();
 }
 
-bool ProtobufReader::SimpleReader::readUInt(UInt64 & value)
+UInt64 ProtobufReader::readUInt()
 {
+    UInt64 value;
     if (field_end == END_OF_VARINT)
     {
         value = readVarint();
         field_end = cursor;
-        return true;
     }
-
-    if (unlikely(cursor >= field_end))
-        return false;
-
-    value = readVarint();
-    return true;
+    else
+    {
+        value = readVarint();
+        if (cursor < field_end)
+            next_field_number = field_number;
+        else if (unlikely(cursor) > field_end)
+            throwUnknownFormat();
+    }
+    return value;
 }
 
-bool ProtobufReader::SimpleReader::readInt(Int64 & value)
+Int64 ProtobufReader::readInt()
 {
-    UInt64 varint;
-    if (!readUInt(varint))
-        return false;
-    value = static_cast<Int64>(varint);
-    return true;
+    return static_cast<Int64>(readUInt());
 }
 
-bool ProtobufReader::SimpleReader::readSInt(Int64 & value)
+Int64 ProtobufReader::readSInt()
 {
-    UInt64 varint;
-    if (!readUInt(varint))
-        return false;
-    value = decodeZigZag(varint);
-    return true;
+    return decodeZigZag(readUInt());
 }
 
 template<typename T>
-bool ProtobufReader::SimpleReader::readFixed(T & value)
+T ProtobufReader::readFixed()
 {
-    if (unlikely(cursor >= field_end))
-        return false;
-
+    if (unlikely(cursor + static_cast<Int64>(sizeof(T)) > field_end))
+        throwUnknownFormat();
+    T value;
     readBinary(&value, sizeof(T));
-    return true;
+    if (cursor < field_end)
+        next_field_number = field_number;
+    return value;
 }
 
-bool ProtobufReader::SimpleReader::readStringInto(PaddedPODArray<UInt8> & str)
+template Int32 ProtobufReader::readFixed<Int32>();
+template UInt32 ProtobufReader::readFixed<UInt32>();
+template Int64 ProtobufReader::readFixed<Int64>();
+template UInt64 ProtobufReader::readFixed<UInt64>();
+template Float32 ProtobufReader::readFixed<Float32>();
+template Float64 ProtobufReader::readFixed<Float64>();
+
+void ProtobufReader::readString(String & str)
+{
+    if (unlikely(cursor > field_end))
+        throwUnknownFormat();
+    size_t length = field_end - cursor;
+    str.resize(length);
+    readBinary(reinterpret_cast<char*>(str.data()), length);
+}
+
+void ProtobufReader::readStringAndAppend(PaddedPODArray<UInt8> & str)
 {
-    if (unlikely(cursor == last_string_pos))
-        return false; /// We don't want to read the same empty string again.
-    last_string_pos = cursor;
     if (unlikely(cursor > field_end))
         throwUnknownFormat();
     size_t length = field_end - cursor;
     size_t old_size = str.size();
     str.resize(old_size + length);
     readBinary(reinterpret_cast<char*>(str.data() + old_size), length);
-    return true;
 }
 
-void ProtobufReader::SimpleReader::readBinary(void* data, size_t size)
+void ProtobufReader::readBinary(void* data, size_t size)
 {
     in.readStrict(reinterpret_cast<char*>(data), size);
     cursor += size;
 }
 
-void ProtobufReader::SimpleReader::ignore(UInt64 num_bytes)
+void ProtobufReader::ignore(UInt64 num_bytes)
 {
     in.ignore(num_bytes);
     cursor += num_bytes;
 }
 
-void ProtobufReader::SimpleReader::moveCursorBackward(UInt64 num_bytes)
+void ProtobufReader::ignoreAll()
+{
+    cursor += in.tryIgnore(std::numeric_limits<size_t>::max());
+}
+
+void ProtobufReader::moveCursorBackward(UInt64 num_bytes)
 {
     if (in.offset() < num_bytes)
         throwUnknownFormat();
@@ -302,7 +305,7 @@ void ProtobufReader::SimpleReader::moveCursorBackward(UInt64 num_bytes)
     cursor -= num_bytes;
 }
 
-UInt64 ProtobufReader::SimpleReader::continueReadingVarint(UInt64 first_byte)
+UInt64 ProtobufReader::continueReadingVarint(UInt64 first_byte)
 {
     UInt64 result = (first_byte & ~static_cast<UInt64>(0x80));
     char c;
@@ -342,7 +345,7 @@ UInt64 ProtobufReader::SimpleReader::continueReadingVarint(UInt64 first_byte)
     throwUnknownFormat();
 }
 
-void ProtobufReader::SimpleReader::ignoreVarint()
+void ProtobufReader::ignoreVarint()
 {
     char c;
 
@@ -379,7 +382,7 @@ void ProtobufReader::SimpleReader::ignoreVarint()
     throwUnknownFormat();
 }
 
-void ProtobufReader::SimpleReader::ignoreGroup()
+void ProtobufReader::ignoreGroup()
 {
     size_t level = 1;
     while (true)
@@ -424,803 +427,15 @@ void ProtobufReader::SimpleReader::ignoreGroup()
     }
 }
 
-// Implementation for a converter from any protobuf field type to any DB data type.
-class ProtobufReader::ConverterBaseImpl : public ProtobufReader::IConverter
+[[noreturn]] void ProtobufReader::throwUnknownFormat() const
 {
-public:
-    ConverterBaseImpl(SimpleReader & simple_reader_, const google::protobuf::FieldDescriptor * field_)
-        : simple_reader(simple_reader_), field(field_) {}
-
-    bool readStringInto(PaddedPODArray<UInt8> &) override
-    {
-        cannotConvertType("String");
-    }
-
-    bool readInt8(Int8 &) override
-    {
-        cannotConvertType("Int8");
-    }
-
-    bool readUInt8(UInt8 &) override
-    {
-        cannotConvertType("UInt8");
-    }
-
-    bool readInt16(Int16 &) override
-    {
-        cannotConvertType("Int16");
-    }
-
-    bool readUInt16(UInt16 &) override
-    {
-        cannotConvertType("UInt16");
-    }
-
-    bool readInt32(Int32 &) override
-    {
-        cannotConvertType("Int32");
-    }
-
-    bool readUInt32(UInt32 &) override
-    {
-        cannotConvertType("UInt32");
-    }
-
-    bool readInt64(Int64 &) override
-    {
-        cannotConvertType("Int64");
-    }
-
-    bool readUInt64(UInt64 &) override
-    {
-        cannotConvertType("UInt64");
-    }
-
-    bool readUInt128(UInt128 &) override
-    {
-        cannotConvertType("UInt128");
-    }
-
-    bool readInt128(Int128 &) override { cannotConvertType("Int128"); }
-    bool readInt256(Int256 &) override { cannotConvertType("Int256"); }
-    bool readUInt256(UInt256 &) override { cannotConvertType("UInt256"); }
-
-    bool readFloat32(Float32 &) override
-    {
-        cannotConvertType("Float32");
-    }
-
-    bool readFloat64(Float64 &) override
-    {
-        cannotConvertType("Float64");
-    }
-
-    void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) override {}
-    void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) override {}
-
-    bool readEnum8(Int8 &) override
-    {
-        cannotConvertType("Enum");
-    }
-
-    bool readEnum16(Int16 &) override
-    {
-        cannotConvertType("Enum");
-    }
-
-    bool readUUID(UUID &) override
-    {
-        cannotConvertType("UUID");
-    }
-
-    bool readDate(DayNum &) override
-    {
-        cannotConvertType("Date");
-    }
-
-    bool readDateTime(time_t &) override
-    {
-        cannotConvertType("DateTime");
-    }
-
-    bool readDateTime64(DateTime64 &, UInt32) override
-    {
-        cannotConvertType("DateTime64");
-    }
-
-    bool readDecimal32(Decimal32 &, UInt32, UInt32) override
-    {
-        cannotConvertType("Decimal32");
-    }
-
-    bool readDecimal64(Decimal64 &, UInt32, UInt32) override
-    {
-        cannotConvertType("Decimal64");
-    }
-
-    bool readDecimal128(Decimal128 &, UInt32, UInt32) override
-    {
-        cannotConvertType("Decimal128");
-    }
-
-    bool readDecimal256(Decimal256 &, UInt32, UInt32) override
-    {
-        cannotConvertType("Decimal256");
-    }
-
-
-    bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) override
-    {
-        cannotConvertType("AggregateFunction");
-    }
-
-protected:
-    [[noreturn]] void cannotConvertType(const String & type_name)
-    {
-        throw Exception(
-            String("Could not convert type '") + field->type_name() + "' from protobuf field '" + field->name() + "' to data type '"
-                + type_name + "'",
-            ErrorCodes::PROTOBUF_BAD_CAST);
-    }
-
-    [[noreturn]] void cannotConvertValue(const String & value, const String & type_name)
-    {
-        throw Exception(
-            "Could not convert value '" + value + "' from protobuf field '" + field->name() + "' to data type '" + type_name + "'",
-            ErrorCodes::PROTOBUF_BAD_CAST);
-    }
-
-    template <typename To, typename From>
-    To numericCast(From value)
-    {
-        if constexpr (std::is_same_v<To, From>)
-            return value;
-        To result;
-        try
-        {
-            result = boost::numeric_cast<To>(value);
-        }
-        catch (boost::numeric::bad_numeric_cast &)
-        {
-            cannotConvertValue(toString(value), TypeName<To>::get());
-        }
-        return result;
-    }
-
-    template <typename To>
-    To parseFromString(const PaddedPODArray<UInt8> & str)
-    {
-        try
-        {
-            To result;
-            ReadBufferFromString buf(str);
-            readText(result, buf);
-            return result;
-        }
-        catch (...)
-        {
-            cannotConvertValue(StringRef(str.data(), str.size()).toString(), TypeName<To>::get());
-        }
-    }
-
-    SimpleReader & simple_reader;
-    const google::protobuf::FieldDescriptor * field;
-};
-
-
-class ProtobufReader::ConverterFromString : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    bool readStringInto(PaddedPODArray<UInt8> & str) override { return simple_reader.readStringInto(str); }
-
-    bool readInt8(Int8 & value) override { return readNumeric(value); }
-    bool readUInt8(UInt8 & value) override { return readNumeric(value); }
-    bool readInt16(Int16 & value) override { return readNumeric(value); }
-    bool readUInt16(UInt16 & value) override { return readNumeric(value); }
-    bool readInt32(Int32 & value) override { return readNumeric(value); }
-    bool readUInt32(UInt32 & value) override { return readNumeric(value); }
-    bool readInt64(Int64 & value) override { return readNumeric(value); }
-    bool readUInt64(UInt64 & value) override { return readNumeric(value); }
-    bool readFloat32(Float32 & value) override { return readNumeric(value); }
-    bool readFloat64(Float64 & value) override { return readNumeric(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareEnumNameToValueMap(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareEnumNameToValueMap(name_value_pairs);
-    }
-
-    bool readEnum8(Int8 & value) override { return readEnum(value); }
-    bool readEnum16(Int16 & value) override { return readEnum(value); }
-
-    bool readUUID(UUID & uuid) override
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        readUUIDText(uuid, buf);
-        return true;
-    }
-
-    bool readDate(DayNum & date) override
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        readDateText(date, buf);
-        return true;
-    }
-
-    bool readDateTime(time_t & tm) override
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        readDateTimeText(tm, buf);
-        return true;
-    }
-
-    bool readDateTime64(DateTime64 & date_time, UInt32 scale) override
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        readDateTime64Text(date_time, scale, buf);
-        return true;
-    }
-
-    bool readDecimal32(Decimal32 & decimal, UInt32 precision, UInt32 scale) override { return readDecimal(decimal, precision, scale); }
-    bool readDecimal64(Decimal64 & decimal, UInt32 precision, UInt32 scale) override { return readDecimal(decimal, precision, scale); }
-    bool readDecimal128(Decimal128 & decimal, UInt32 precision, UInt32 scale) override { return readDecimal(decimal, precision, scale); }
-    bool readDecimal256(Decimal256 & decimal, UInt32 precision, UInt32 scale) override { return readDecimal(decimal, precision, scale); }
-
-    bool readAggregateFunction(const AggregateFunctionPtr & function, AggregateDataPtr place, Arena & arena) override
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        function->deserialize(place, buf, &arena);
-        return true;
-    }
-
-private:
-    bool readTempString()
-    {
-        temp_string.clear();
-        return simple_reader.readStringInto(temp_string);
-    }
-
-    template <typename T>
-    bool readNumeric(T & value)
-    {
-        if (!readTempString())
-            return false;
-        value = parseFromString<T>(temp_string);
-        return true;
-    }
-
-    template<typename T>
-    bool readEnum(T & value)
-    {
-        if (!readTempString())
-            return false;
-        StringRef ref(temp_string.data(), temp_string.size());
-        auto it = enum_name_to_value_map->find(ref);
-        if (it == enum_name_to_value_map->end())
-            cannotConvertValue(ref.toString(), "Enum");
-        value = static_cast<T>(it->second);
-        return true;
-    }
-
-    template <typename T>
-    bool readDecimal(Decimal<T> & decimal, UInt32 precision, UInt32 scale)
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        DataTypeDecimal<Decimal<T>>::readText(decimal, buf, precision, scale);
-        return true;
-    }
-
-    template <typename T>
-    void prepareEnumNameToValueMap(const std::vector<std::pair<String, T>> & name_value_pairs)
-    {
-        if (likely(enum_name_to_value_map.has_value()))
-            return;
-        enum_name_to_value_map.emplace();
-        for (const auto & name_value_pair : name_value_pairs)
-            enum_name_to_value_map->emplace(name_value_pair.first, name_value_pair.second);
-    }
-
-    PaddedPODArray<UInt8> temp_string;
-    std::optional<std::unordered_map<StringRef, Int16>> enum_name_to_value_map;
-};
-
-#    define PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(field_type_id) \
-        template <> \
-        std::unique_ptr<ProtobufReader::IConverter> ProtobufReader::createConverter<field_type_id>( \
-            const google::protobuf::FieldDescriptor * field) \
-        { \
-            return std::make_unique<ConverterFromString>(simple_reader, field); \
-        }
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_STRING)
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_BYTES)
-
-#    undef PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS
-
-
-template <int field_type_id, typename FromType>
-class ProtobufReader::ConverterFromNumber : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    bool readStringInto(PaddedPODArray<UInt8> & str) override
-    {
-        FromType number;
-        if (!readField(number))
-            return false;
-        WriteBufferFromVector<PaddedPODArray<UInt8>> buf(str);
-        writeText(number, buf);
-        return true;
-    }
-
-    bool readInt8(Int8 & value) override { return readNumeric(value); }
-    bool readUInt8(UInt8 & value) override { return readNumeric(value); }
-    bool readInt16(Int16 & value) override { return readNumeric(value); }
-    bool readUInt16(UInt16 & value) override { return readNumeric(value); }
-    bool readInt32(Int32 & value) override { return readNumeric(value); }
-    bool readUInt32(UInt32 & value) override { return readNumeric(value); }
-    bool readInt64(Int64 & value) override { return readNumeric(value); }
-    bool readUInt64(UInt64 & value) override { return readNumeric(value); }
-    bool readFloat32(Float32 & value) override { return readNumeric(value); }
-    bool readFloat64(Float64 & value) override { return readNumeric(value); }
-
-    bool readEnum8(Int8 & value) override { return readEnum(value); }
-    bool readEnum16(Int16 & value) override { return readEnum(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareSetOfEnumValues(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareSetOfEnumValues(name_value_pairs);
-    }
-
-    bool readDate(DayNum & date) override
-    {
-        UInt16 number;
-        if (!readNumeric(number))
-            return false;
-        date = DayNum(number);
-        return true;
-    }
-
-    bool readDateTime(time_t & tm) override
-    {
-        UInt32 number;
-        if (!readNumeric(number))
-            return false;
-        tm = number;
-        return true;
-    }
-
-    bool readDateTime64(DateTime64 & date_time, UInt32 scale) override
-    {
-        return readDecimal(date_time, scale);
-    }
-
-    bool readDecimal32(Decimal32 & decimal, UInt32, UInt32 scale) override { return readDecimal(decimal, scale); }
-    bool readDecimal64(Decimal64 & decimal, UInt32, UInt32 scale) override { return readDecimal(decimal, scale); }
-    bool readDecimal128(Decimal128 & decimal, UInt32, UInt32 scale) override { return readDecimal(decimal, scale); }
-
-private:
-    template <typename To>
-    bool readNumeric(To & value)
-    {
-        FromType number;
-        if (!readField(number))
-            return false;
-        value = numericCast<To>(number);
-        return true;
-    }
-
-    template<typename EnumType>
-    bool readEnum(EnumType & value)
-    {
-        if constexpr (!is_integer_v<FromType>)
-            cannotConvertType("Enum"); // It's not correct to convert floating point to enum.
-        FromType number;
-        if (!readField(number))
-            return false;
-        value = numericCast<EnumType>(number);
-        if (set_of_enum_values->find(value) == set_of_enum_values->end())
-            cannotConvertValue(toString(value), "Enum");
-        return true;
-    }
-
-    template<typename EnumType>
-    void prepareSetOfEnumValues(const std::vector<std::pair<String, EnumType>> & name_value_pairs)
-    {
-        if (likely(set_of_enum_values.has_value()))
-            return;
-        set_of_enum_values.emplace();
-        for (const auto & name_value_pair : name_value_pairs)
-            set_of_enum_values->emplace(name_value_pair.second);
-    }
-
-    template <typename S>
-    bool readDecimal(Decimal<S> & decimal, UInt32 scale)
-    {
-        FromType number;
-        if (!readField(number))
-            return false;
-        decimal.value = convertToDecimal<DataTypeNumber<FromType>, DataTypeDecimal<Decimal<S>>>(number, scale);
-        return true;
-    }
-
-    bool readField(FromType & value)
-    {
-        if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT32) && std::is_same_v<FromType, Int64>)
-                   || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT64) && std::is_same_v<FromType, Int64>))
-        {
-            return simple_reader.readInt(value);
-        }
-        else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT32) && std::is_same_v<FromType, UInt64>)
-                        || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT64) && std::is_same_v<FromType, UInt64>))
-        {
-            return simple_reader.readUInt(value);
-        }
-
-        else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT32) && std::is_same_v<FromType, Int64>)
-                        || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT64) && std::is_same_v<FromType, Int64>))
-        {
-            return simple_reader.readSInt(value);
-        }
-        else
-        {
-            static_assert(((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED32) && std::is_same_v<FromType, UInt32>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED32) && std::is_same_v<FromType, Int32>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED64) && std::is_same_v<FromType, UInt64>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED64) && std::is_same_v<FromType, Int64>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FLOAT) && std::is_same_v<FromType, float>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_DOUBLE) && std::is_same_v<FromType, double>));
-            return simple_reader.readFixed(value);
-        }
-    }
-
-    std::optional<std::unordered_set<Int16>> set_of_enum_values;
-};
-
-#    define PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(field_type_id, field_type) \
-        template <> \
-        std::unique_ptr<ProtobufReader::IConverter> ProtobufReader::createConverter<field_type_id>( \
-            const google::protobuf::FieldDescriptor * field) \
-        { \
-            return std::make_unique<ConverterFromNumber<field_type_id, field_type>>(simple_reader, field); /* NOLINT */ \
-        }
-
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT32, Int64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT32, Int64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT32, UInt64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT64, Int64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT64, Int64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT64, UInt64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED32, UInt32);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED32, Int32);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED64, UInt64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED64, Int64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FLOAT, float);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_DOUBLE, double);
-
-#    undef PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS
-
-
-class ProtobufReader::ConverterFromBool : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    bool readStringInto(PaddedPODArray<UInt8> & str) override
-    {
-        bool b;
-        if (!readField(b))
-            return false;
-        StringRef ref(b ? "true" : "false");
-        str.insert(ref.data, ref.data + ref.size);
-        return true;
-    }
-
-    bool readInt8(Int8 & value) override { return readNumeric(value); }
-    bool readUInt8(UInt8 & value) override { return readNumeric(value); }
-    bool readInt16(Int16 & value) override { return readNumeric(value); }
-    bool readUInt16(UInt16 & value) override { return readNumeric(value); }
-    bool readInt32(Int32 & value) override { return readNumeric(value); }
-    bool readUInt32(UInt32 & value) override { return readNumeric(value); }
-    bool readInt64(Int64 & value) override { return readNumeric(value); }
-    bool readUInt64(UInt64 & value) override { return readNumeric(value); }
-    bool readFloat32(Float32 & value) override { return readNumeric(value); }
-    bool readFloat64(Float64 & value) override { return readNumeric(value); }
-    bool readDecimal32(Decimal32 & decimal, UInt32, UInt32) override { return readNumeric(decimal.value); }
-    bool readDecimal64(Decimal64 & decimal, UInt32, UInt32) override { return readNumeric(decimal.value); }
-    bool readDecimal128(Decimal128 & decimal, UInt32, UInt32) override { return readNumeric(decimal.value); }
-
-private:
-    template<typename T>
-    bool readNumeric(T & value)
-    {
-        bool b;
-        if (!readField(b))
-            return false;
-        value = b ? 1 : 0;
-        return true;
-    }
-
-    bool readField(bool & b)
-    {
-        UInt64 number;
-        if (!simple_reader.readUInt(number))
-            return false;
-        b = static_cast<bool>(number);
-        return true;
-    }
-};
-
-template <>
-std::unique_ptr<ProtobufReader::IConverter> ProtobufReader::createConverter<google::protobuf::FieldDescriptor::TYPE_BOOL>(
-    const google::protobuf::FieldDescriptor * field)
-{
-    return std::make_unique<ConverterFromBool>(simple_reader, field);
+    throw Exception(
+        std::string("Protobuf messages are corrupted or don't match the provided schema.")
+            + (root_message_has_length_delimiter
+                   ? " Please note that Protobuf stream is length-delimited: every message is prefixed by its length in varint."
+                   : ""),
+        ErrorCodes::UNKNOWN_PROTOBUF_FORMAT);
 }
-
-
-class ProtobufReader::ConverterFromEnum : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    bool readStringInto(PaddedPODArray<UInt8> & str) override
-    {
-        prepareEnumPbNumberToNameMap();
-        Int64 pbnumber;
-        if (!readField(pbnumber))
-            return false;
-        auto it = enum_pbnumber_to_name_map->find(pbnumber);
-        if (it == enum_pbnumber_to_name_map->end())
-            cannotConvertValue(toString(pbnumber), "Enum");
-        const auto & ref = it->second;
-        str.insert(ref.data, ref.data + ref.size);
-        return true;
-    }
-
-    bool readInt8(Int8 & value) override { return readNumeric(value); }
-    bool readUInt8(UInt8 & value) override { return readNumeric(value); }
-    bool readInt16(Int16 & value) override { return readNumeric(value); }
-    bool readUInt16(UInt16 & value) override { return readNumeric(value); }
-    bool readInt32(Int32 & value) override { return readNumeric(value); }
-    bool readUInt32(UInt32 & value) override { return readNumeric(value); }
-    bool readInt64(Int64 & value) override { return readNumeric(value); }
-    bool readUInt64(UInt64 & value) override { return readNumeric(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareEnumPbNumberToValueMap(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareEnumPbNumberToValueMap(name_value_pairs);
-    }
-
-    bool readEnum8(Int8 & value) override { return readEnum(value); }
-    bool readEnum16(Int16 & value) override { return readEnum(value); }
-
-private:
-    template <typename T>
-    bool readNumeric(T & value)
-    {
-        Int64 pbnumber;
-        if (!readField(pbnumber))
-            return false;
-        value = numericCast<T>(pbnumber);
-        return true;
-    }
-
-    template<typename T>
-    bool readEnum(T & value)
-    {
-        Int64 pbnumber;
-        if (!readField(pbnumber))
-            return false;
-        if (enum_pbnumber_always_equals_value)
-            value = static_cast<T>(pbnumber);
-        else
-        {
-            auto it = enum_pbnumber_to_value_map->find(pbnumber);
-            if (it == enum_pbnumber_to_value_map->end())
-                cannotConvertValue(toString(pbnumber), "Enum");
-            value = static_cast<T>(it->second);
-        }
-        return true;
-    }
-
-    void prepareEnumPbNumberToNameMap()
-    {
-        if (likely(enum_pbnumber_to_name_map.has_value()))
-            return;
-        enum_pbnumber_to_name_map.emplace();
-        const auto * enum_type = field->enum_type();
-        for (int i = 0; i != enum_type->value_count(); ++i)
-        {
-            const auto * enum_value = enum_type->value(i);
-            enum_pbnumber_to_name_map->emplace(enum_value->number(), enum_value->name());
-        }
-    }
-
-    template <typename T>
-    void prepareEnumPbNumberToValueMap(const std::vector<std::pair<String, T>> & name_value_pairs)
-    {
-        if (likely(enum_pbnumber_to_value_map.has_value()))
-            return;
-        enum_pbnumber_to_value_map.emplace();
-        enum_pbnumber_always_equals_value = true;
-        for (const auto & name_value_pair : name_value_pairs)
-        {
-            Int16 value = name_value_pair.second; // NOLINT
-            const auto * enum_descriptor = field->enum_type()->FindValueByName(name_value_pair.first);
-            if (enum_descriptor)
-            {
-                enum_pbnumber_to_value_map->emplace(enum_descriptor->number(), value);
-                if (enum_descriptor->number() != value)
-                    enum_pbnumber_always_equals_value = false;
-            }
-            else
-                enum_pbnumber_always_equals_value = false;
-        }
-    }
-
-    bool readField(Int64 & enum_pbnumber)
-    {
-        return simple_reader.readInt(enum_pbnumber);
-    }
-
-    std::optional<std::unordered_map<Int64, StringRef>> enum_pbnumber_to_name_map;
-    std::optional<std::unordered_map<Int64, Int16>> enum_pbnumber_to_value_map;
-    bool enum_pbnumber_always_equals_value;
-};
-
-template <>
-std::unique_ptr<ProtobufReader::IConverter> ProtobufReader::createConverter<google::protobuf::FieldDescriptor::TYPE_ENUM>(
-    const google::protobuf::FieldDescriptor * field)
-{
-    return std::make_unique<ConverterFromEnum>(simple_reader, field);
-}
-
-
-ProtobufReader::ProtobufReader(
-    ReadBuffer & in_, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_)
-    : simple_reader(in_, use_length_delimiters_)
-{
-    root_message = ProtobufColumnMatcher::matchColumns<ColumnMatcherTraits>(column_names, message_type);
-    setTraitsDataAfterMatchingColumns(root_message.get());
-}
-
-ProtobufReader::~ProtobufReader() = default;
-
-void ProtobufReader::setTraitsDataAfterMatchingColumns(Message * message)
-{
-    for (Field & field : message->fields)
-    {
-        if (field.nested_message)
-        {
-            setTraitsDataAfterMatchingColumns(field.nested_message.get());
-            continue;
-        }
-        switch (field.field_descriptor->type())
-        {
-#    define PROTOBUF_READER_CONVERTER_CREATING_CASE(field_type_id) \
-        case field_type_id: \
-            field.data.converter = createConverter<field_type_id>(field.field_descriptor); \
-            break
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_STRING);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BYTES);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT32);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT32);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT32);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED32);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED32);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT64);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT64);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT64);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED64);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED64);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FLOAT);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_DOUBLE);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BOOL);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_ENUM);
-#    undef PROTOBUF_READER_CONVERTER_CREATING_CASE
-            default:
-                __builtin_unreachable();
-        }
-        message->data.field_number_to_field_map.emplace(field.field_number, &field);
-    }
-}
-
-bool ProtobufReader::startMessage()
-{
-    if (!simple_reader.startMessage())
-        return false;
-    current_message = root_message.get();
-    current_field_index = 0;
-    return true;
-}
-
-void ProtobufReader::endMessage(bool try_ignore_errors)
-{
-    simple_reader.endMessage(try_ignore_errors);
-    current_message = nullptr;
-    current_converter = nullptr;
-}
-
-bool ProtobufReader::readColumnIndex(size_t & column_index)
-{
-    while (true)
-    {
-        UInt32 field_number;
-        if (!simple_reader.readFieldNumber(field_number))
-        {
-            if (!current_message->parent)
-            {
-                current_converter = nullptr;
-                return false;
-            }
-            simple_reader.endNestedMessage();
-            current_field_index = current_message->index_in_parent;
-            current_message = current_message->parent;
-            continue;
-        }
-
-        const Field * field = nullptr;
-        for (; current_field_index < current_message->fields.size(); ++current_field_index)
-        {
-            const Field & f = current_message->fields[current_field_index];
-            if (f.field_number == field_number)
-            {
-                field = &f;
-                break;
-            }
-            if (f.field_number > field_number)
-                break;
-        }
-
-        if (!field)
-        {
-            const auto & field_number_to_field_map = current_message->data.field_number_to_field_map;
-            auto it = field_number_to_field_map.find(field_number);
-            if (it == field_number_to_field_map.end())
-                continue;
-            field = it->second;
-        }
-
-        if (field->nested_message)
-        {
-            simple_reader.startNestedMessage();
-            current_message = field->nested_message.get();
-            current_field_index = 0;
-            continue;
-        }
-
-        column_index = field->column_index;
-        current_converter = field->data.converter.get();
-        return true;
-    }
-}
-
 }
 
 #endif
diff --git a/src/Formats/ProtobufReader.h b/src/Formats/ProtobufReader.h
index b2a0714a57a..31d6f9a08e0 100644
--- a/src/Formats/ProtobufReader.h
+++ b/src/Formats/ProtobufReader.h
@@ -1,258 +1,72 @@
 #pragma once
 
-#include <common/DayNum.h>
-#include <Common/PODArray.h>
-#include <Common/UInt128.h>
-#include <Core/UUID.h>
-
 #if !defined(ARCADIA_BUILD)
-#    include "config_formats.h"
+#   include "config_formats.h"
 #endif
 
 #if USE_PROTOBUF
-#    include <memory>
-#    include <IO/ReadBuffer.h>
-#    include <boost/noncopyable.hpp>
-#    include "ProtobufColumnMatcher.h"
+#   include <Common/PODArray.h>
+#   include <IO/ReadBuffer.h>
 
-namespace google
-{
-namespace protobuf
-{
-    class Descriptor;
-}
-}
 
 namespace DB
 {
-class Arena;
-class IAggregateFunction;
 class ReadBuffer;
-using AggregateDataPtr = char *;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
-
-
-/** Deserializes a protobuf, tries to cast data types if necessarily.
-  */
-class ProtobufReader : private boost::noncopyable
-{
-public:
-    ProtobufReader(ReadBuffer & in_, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_);
-    ~ProtobufReader();
-
-    /// Should be called when we start reading a new message.
-    bool startMessage();
-
-    /// Ends reading a message.
-    void endMessage(bool ignore_errors = false);
-
-    /// Reads the column index.
-    /// The function returns false if there are no more columns to read (call endMessage() in this case).
-    bool readColumnIndex(size_t & column_index);
-
-    /// Reads a value which should be put to column at index received with readColumnIndex().
-    /// The function returns false if there are no more values to read now (call readColumnIndex() in this case).
-    bool readNumber(Int8 & value) { return current_converter->readInt8(value); }
-    bool readNumber(UInt8 & value) { return current_converter->readUInt8(value); }
-    bool readNumber(Int16 & value) { return current_converter->readInt16(value); }
-    bool readNumber(UInt16 & value) { return current_converter->readUInt16(value); }
-    bool readNumber(Int32 & value) { return current_converter->readInt32(value); }
-    bool readNumber(UInt32 & value) { return current_converter->readUInt32(value); }
-    bool readNumber(Int64 & value) { return current_converter->readInt64(value); }
-    bool readNumber(UInt64 & value) { return current_converter->readUInt64(value); }
-    bool readNumber(Int128 & value) { return current_converter->readInt128(value); }
-    bool readNumber(UInt128 & value) { return current_converter->readUInt128(value); }
-    bool readNumber(Int256 & value) { return current_converter->readInt256(value); }
-    bool readNumber(UInt256 & value) { return current_converter->readUInt256(value); }
-    bool readNumber(Float32 & value) { return current_converter->readFloat32(value); }
-    bool readNumber(Float64 & value) { return current_converter->readFloat64(value); }
-
-    bool readStringInto(PaddedPODArray<UInt8> & str) { return current_converter->readStringInto(str); }
-
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & name_value_pairs) { current_converter->prepareEnumMapping8(name_value_pairs); }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & name_value_pairs) { current_converter->prepareEnumMapping16(name_value_pairs); }
-    bool readEnum(Int8 & value) { return current_converter->readEnum8(value); }
-    bool readEnum(Int16 & value) { return current_converter->readEnum16(value); }
-
-    bool readUUID(UUID & uuid) { return current_converter->readUUID(uuid); }
-    bool readDate(DayNum & date) { return current_converter->readDate(date); }
-    bool readDateTime(time_t & tm) { return current_converter->readDateTime(tm); }
-    bool readDateTime64(DateTime64 & tm, UInt32 scale) { return current_converter->readDateTime64(tm, scale); }
-
-    bool readDecimal(Decimal32 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal32(decimal, precision, scale); }
-    bool readDecimal(Decimal64 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal64(decimal, precision, scale); }
-    bool readDecimal(Decimal128 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal128(decimal, precision, scale); }
-    bool readDecimal(Decimal256 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal256(decimal, precision, scale); }
-
-    bool readAggregateFunction(const AggregateFunctionPtr & function, AggregateDataPtr place, Arena & arena) { return current_converter->readAggregateFunction(function, place, arena); }
-
-    /// Call it after calling one of the read*() function to determine if there are more values available for reading.
-    bool ALWAYS_INLINE canReadMoreValues() const { return simple_reader.canReadMoreValues(); }
-
-private:
-    class SimpleReader
-    {
-    public:
-        SimpleReader(ReadBuffer & in_, const bool use_length_delimiters_);
-        bool startMessage();
-        void endMessage(bool ignore_errors);
-        void startNestedMessage();
-        void endNestedMessage();
-        bool readFieldNumber(UInt32 & field_number);
-        bool readInt(Int64 & value);
-        bool readSInt(Int64 & value);
-        bool readUInt(UInt64 & value);
-        template<typename T> bool readFixed(T & value);
-        bool readStringInto(PaddedPODArray<UInt8> & str);
-
-        bool ALWAYS_INLINE canReadMoreValues() const { return cursor < field_end; }
-
-    private:
-        void readBinary(void * data, size_t size);
-        void ignore(UInt64 num_bytes);
-        void moveCursorBackward(UInt64 num_bytes);
-
-        UInt64 ALWAYS_INLINE readVarint()
-        {
-            char c;
-            in.readStrict(c);
-            UInt64 first_byte = static_cast<UInt8>(c);
-            ++cursor;
-            if (likely(!(c & 0x80)))
-                return first_byte;
-            return continueReadingVarint(first_byte);
-        }
-
-        UInt64 continueReadingVarint(UInt64 first_byte);
-        void ignoreVarint();
-        void ignoreGroup();
-        [[noreturn]] void throwUnknownFormat() const;
-
-        ReadBuffer & in;
-        Int64 cursor;
-        size_t current_message_level;
-        Int64 current_message_end;
-        std::vector<Int64> parent_message_ends;
-        Int64 field_end;
-        Int64 last_string_pos;
-        const bool use_length_delimiters;
-    };
-
-    class IConverter
-    {
-    public:
-       virtual ~IConverter() = default;
-       virtual bool readStringInto(PaddedPODArray<UInt8> &) = 0;
-       virtual bool readInt8(Int8&) = 0;
-       virtual bool readUInt8(UInt8 &) = 0;
-       virtual bool readInt16(Int16 &) = 0;
-       virtual bool readUInt16(UInt16 &) = 0;
-       virtual bool readInt32(Int32 &) = 0;
-       virtual bool readUInt32(UInt32 &) = 0;
-       virtual bool readInt64(Int64 &) = 0;
-       virtual bool readUInt64(UInt64 &) = 0;
-       virtual bool readInt128(Int128 &) = 0;
-       virtual bool readUInt128(UInt128 &) = 0;
-
-       virtual bool readInt256(Int256 &) = 0;
-       virtual bool readUInt256(UInt256 &) = 0;
-
-       virtual bool readFloat32(Float32 &) = 0;
-       virtual bool readFloat64(Float64 &) = 0;
-       virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) = 0;
-       virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) = 0;
-       virtual bool readEnum8(Int8 &) = 0;
-       virtual bool readEnum16(Int16 &) = 0;
-       virtual bool readUUID(UUID &) = 0;
-       virtual bool readDate(DayNum &) = 0;
-       virtual bool readDateTime(time_t &) = 0;
-       virtual bool readDateTime64(DateTime64 &, UInt32) = 0;
-       virtual bool readDecimal32(Decimal32 &, UInt32, UInt32) = 0;
-       virtual bool readDecimal64(Decimal64 &, UInt32, UInt32) = 0;
-       virtual bool readDecimal128(Decimal128 &, UInt32, UInt32) = 0;
-       virtual bool readDecimal256(Decimal256 &, UInt32, UInt32) = 0;
-       virtual bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) = 0;
-    };
-
-    class ConverterBaseImpl;
-    class ConverterFromString;
-    template<int field_type_id, typename FromType> class ConverterFromNumber;
-    class ConverterFromBool;
-    class ConverterFromEnum;
-
-    struct ColumnMatcherTraits
-    {
-        struct FieldData
-        {
-            std::unique_ptr<IConverter> converter;
-        };
-        struct MessageData
-        {
-            std::unordered_map<UInt32, const ProtobufColumnMatcher::Field<ColumnMatcherTraits>*> field_number_to_field_map;
-        };
-    };
-    using Message = ProtobufColumnMatcher::Message<ColumnMatcherTraits>;
-    using Field = ProtobufColumnMatcher::Field<ColumnMatcherTraits>;
-
-    void setTraitsDataAfterMatchingColumns(Message * message);
-
-    template <int field_type_id>
-    std::unique_ptr<IConverter> createConverter(const google::protobuf::FieldDescriptor * field);
-
-    SimpleReader simple_reader;
-    std::unique_ptr<Message> root_message;
-    Message* current_message = nullptr;
-    size_t current_field_index = 0;
-    IConverter* current_converter = nullptr;
-};
-
-}
-
-#else
-
-namespace DB
-{
-class Arena;
-class IAggregateFunction;
-class ReadBuffer;
-using AggregateDataPtr = char *;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
 
+/// Utility class for reading in the Protobuf format.
+/// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
 class ProtobufReader
 {
 public:
-    bool startMessage() { return false; }
-    void endMessage() {}
-    bool readColumnIndex(size_t &) { return false; }
-    bool readNumber(Int8 &) { return false; }
-    bool readNumber(UInt8 &) { return false; }
-    bool readNumber(Int16 &) { return false; }
-    bool readNumber(UInt16 &) { return false; }
-    bool readNumber(Int32 &) { return false; }
-    bool readNumber(UInt32 &) { return false; }
-    bool readNumber(Int64 &) { return false; }
-    bool readNumber(UInt64 &) { return false; }
-    bool readNumber(Int128 &) { return false; }
-    bool readNumber(UInt128 &) { return false; }
-    bool readNumber(Int256 &) { return false; }
-    bool readNumber(UInt256 &) { return false; }
-    bool readNumber(Float32 &) { return false; }
-    bool readNumber(Float64 &) { return false; }
-    bool readStringInto(PaddedPODArray<UInt8> &) { return false; }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> &) {}
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> &) {}
-    bool readEnum(Int8 &) { return false; }
-    bool readEnum(Int16 &) { return false; }
-    bool readUUID(UUID &) { return false; }
-    bool readDate(DayNum &) { return false; }
-    bool readDateTime(time_t &) { return false; }
-    bool readDateTime64(DateTime64 & /*tm*/, UInt32 /*scale*/) { return false; }
-    bool readDecimal(Decimal32 &, UInt32, UInt32) { return false; }
-    bool readDecimal(Decimal64 &, UInt32, UInt32) { return false; }
-    bool readDecimal(Decimal128 &, UInt32, UInt32) { return false; }
-    bool readDecimal(Decimal256 &, UInt32, UInt32) { return false; }
-    bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) { return false; }
-    bool canReadMoreValues() const { return false; }
+    ProtobufReader(ReadBuffer & in_);
+
+    void startMessage(bool with_length_delimiter_);
+    void endMessage(bool ignore_errors);
+    void startNestedMessage();
+    void endNestedMessage();
+
+    bool readFieldNumber(int & field_number);
+    Int64 readInt();
+    Int64 readSInt();
+    UInt64 readUInt();
+    template<typename T> T readFixed();
+
+    void readString(String & str);
+    void readStringAndAppend(PaddedPODArray<UInt8> & str);
+
+    bool eof() const { return in.eof(); }
+
+private:
+    void readBinary(void * data, size_t size);
+    void ignore(UInt64 num_bytes);
+    void ignoreAll();
+    void moveCursorBackward(UInt64 num_bytes);
+
+    UInt64 ALWAYS_INLINE readVarint()
+    {
+        char c;
+        in.readStrict(c);
+        UInt64 first_byte = static_cast<UInt8>(c);
+        ++cursor;
+        if (likely(!(c & 0x80)))
+            return first_byte;
+        return continueReadingVarint(first_byte);
+    }
+
+    UInt64 continueReadingVarint(UInt64 first_byte);
+    void ignoreVarint();
+    void ignoreGroup();
+    [[noreturn]] void throwUnknownFormat() const;
+
+    ReadBuffer & in;
+    Int64 cursor = 0;
+    bool root_message_has_length_delimiter = false;
+    size_t current_message_level = 0;
+    Int64 current_message_end = 0;
+    std::vector<Int64> parent_message_ends;
+    int field_number = 0;
+    int next_field_number = 0;
+    Int64 field_end = 0;
 };
 
 }
diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp
new file mode 100644
index 00000000000..82149460773
--- /dev/null
+++ b/src/Formats/ProtobufSerializer.cpp
@@ -0,0 +1,2921 @@
+#include <Formats/ProtobufSerializer.h>
+
+#if USE_PROTOBUF
+#   include <Columns/ColumnAggregateFunction.h>
+#   include <Columns/ColumnArray.h>
+#   include <Columns/ColumnDecimal.h>
+#   include <Columns/ColumnLowCardinality.h>
+#   include <Columns/ColumnMap.h>
+#   include <Columns/ColumnNullable.h>
+#   include <Columns/ColumnFixedString.h>
+#   include <Columns/ColumnString.h>
+#   include <Columns/ColumnTuple.h>
+#   include <Columns/ColumnVector.h>
+#   include <Common/PODArray.h>
+#   include <Common/quoteString.h>
+#   include <Core/DecimalComparison.h>
+#   include <DataTypes/DataTypeAggregateFunction.h>
+#   include <DataTypes/DataTypeArray.h>
+#   include <DataTypes/DataTypesDecimal.h>
+#   include <DataTypes/DataTypeDateTime64.h>
+#   include <DataTypes/DataTypeEnum.h>
+#   include <DataTypes/DataTypeFixedString.h>
+#   include <DataTypes/DataTypeLowCardinality.h>
+#   include <DataTypes/DataTypeMap.h>
+#   include <DataTypes/DataTypeNullable.h>
+#   include <DataTypes/DataTypeTuple.h>
+#   include <Formats/ProtobufReader.h>
+#   include <Formats/ProtobufWriter.h>
+#   include <IO/ReadBufferFromString.h>
+#   include <IO/ReadHelpers.h>
+#   include <IO/WriteBufferFromString.h>
+#   include <IO/WriteHelpers.h>
+#   include <ext/range.h>
+#   include <google/protobuf/descriptor.h>
+#   include <google/protobuf/descriptor.pb.h>
+#   include <boost/algorithm/string.hpp>
+#   include <boost/container/flat_map.hpp>
+#   include <boost/container/flat_set.hpp>
+#   include <boost/numeric/conversion/cast.hpp>
+#   include <boost/range/algorithm.hpp>
+
+#   include <common/logger_useful.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS;
+    extern const int MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD;
+    extern const int NO_COLUMN_SERIALIZED_TO_REQUIRED_PROTOBUF_FIELD;
+    extern const int DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD;
+    extern const int PROTOBUF_FIELD_NOT_REPEATED;
+    extern const int PROTOBUF_BAD_CAST;
+    extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+    using FieldDescriptor = google::protobuf::FieldDescriptor;
+    using MessageDescriptor = google::protobuf::Descriptor;
+    using FieldTypeId = google::protobuf::FieldDescriptor::Type;
+
+
+    /// Compares column's name with protobuf field's name.
+    /// This comparison is case-insensitive and ignores the difference between '.' and '_'
+    struct ColumnNameWithProtobufFieldNameComparator
+    {
+        static bool equals(char c1, char c2)
+        {
+            return convertChar(c1) == convertChar(c2);
+        }
+
+        static bool equals(const std::string_view & s1, const std::string_view & s2)
+        {
+            return (s1.length() == s2.length())
+                && std::equal(s1.begin(), s1.end(), s2.begin(), [](char c1, char c2) { return convertChar(c1) == convertChar(c2); });
+        }
+
+        static bool less(const std::string_view & s1, const std::string_view & s2)
+        {
+            return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), s2.end(), [](char c1, char c2) { return convertChar(c1) < convertChar(c2); });
+        }
+
+        static bool startsWith(const std::string_view & s1, const std::string_view & s2)
+        {
+            return (s1.length() >= s2.length()) && equals(s1.substr(0, s2.length()), s2);
+        }
+
+        static char convertChar(char c)
+        {
+            c = tolower(c);
+            if (c == '.')
+                c = '_';
+            return c;
+        }
+    };
+
+
+    // Should we omit null values (zero for numbers / empty string for strings) while storing them.
+    bool shouldSkipZeroOrEmpty(const FieldDescriptor & field_descriptor)
+    {
+        if (!field_descriptor.is_optional())
+            return false;
+        if (field_descriptor.containing_type()->options().map_entry())
+            return false;
+        return field_descriptor.message_type() || (field_descriptor.file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3);
+    }
+
+    // Should we pack repeated values while storing them.
+    bool shouldPackRepeated(const FieldDescriptor & field_descriptor)
+    {
+        if (!field_descriptor.is_repeated())
+            return false;
+        switch (field_descriptor.type())
+        {
+            case FieldTypeId::TYPE_INT32:
+            case FieldTypeId::TYPE_UINT32:
+            case FieldTypeId::TYPE_SINT32:
+            case FieldTypeId::TYPE_INT64:
+            case FieldTypeId::TYPE_UINT64:
+            case FieldTypeId::TYPE_SINT64:
+            case FieldTypeId::TYPE_FIXED32:
+            case FieldTypeId::TYPE_SFIXED32:
+            case FieldTypeId::TYPE_FIXED64:
+            case FieldTypeId::TYPE_SFIXED64:
+            case FieldTypeId::TYPE_FLOAT:
+            case FieldTypeId::TYPE_DOUBLE:
+            case FieldTypeId::TYPE_BOOL:
+            case FieldTypeId::TYPE_ENUM:
+                break;
+            default:
+                return false;
+        }
+        if (field_descriptor.options().has_packed())
+            return field_descriptor.options().packed();
+        return field_descriptor.file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3;
+    }
+
+
+    struct ProtobufReaderOrWriter
+    {
+        ProtobufReaderOrWriter(ProtobufReader & reader_) : reader(&reader_) {} // NOLINT(google-explicit-constructor)
+        ProtobufReaderOrWriter(ProtobufWriter & writer_) : writer(&writer_) {} // NOLINT(google-explicit-constructor)
+        ProtobufReader * const reader = nullptr;
+        ProtobufWriter * const writer = nullptr;
+    };
+
+
+    /// Base class for all serializers which serialize a single value.
+    class ProtobufSerializerSingleValue : public ProtobufSerializer
+    {
+    protected:
+        ProtobufSerializerSingleValue(const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_)
+            : field_descriptor(field_descriptor_)
+            , field_typeid(field_descriptor_.type())
+            , field_tag(field_descriptor.number())
+            , reader(reader_or_writer_.reader)
+            , writer(reader_or_writer_.writer)
+            , skip_zero_or_empty(shouldSkipZeroOrEmpty(field_descriptor))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0];
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0]->getPtr();
+        }
+
+        template <typename NumberType>
+        void writeInt(NumberType value)
+        {
+            auto casted = castNumber<Int64>(value);
+            if (casted || !skip_zero_or_empty)
+                writer->writeInt(field_tag, casted);
+        }
+
+        template <typename NumberType>
+        void writeSInt(NumberType value)
+        {
+            auto casted = castNumber<Int64>(value);
+            if (casted || !skip_zero_or_empty)
+                writer->writeSInt(field_tag, casted);
+        }
+
+        template <typename NumberType>
+        void writeUInt(NumberType value)
+        {
+            auto casted = castNumber<UInt64>(value);
+            if (casted || !skip_zero_or_empty)
+                writer->writeUInt(field_tag, casted);
+        }
+
+        template <typename FieldType, typename NumberType>
+        void writeFixed(NumberType value)
+        {
+            auto casted = castNumber<FieldType>(value);
+            if (casted || !skip_zero_or_empty)
+                writer->writeFixed(field_tag, casted);
+        }
+
+        Int64 readInt() { return reader->readInt(); }
+        Int64 readSInt() { return reader->readSInt(); }
+        UInt64 readUInt() { return reader->readUInt(); }
+
+        template <typename FieldType>
+        FieldType readFixed()
+        {
+            return reader->readFixed<FieldType>();
+        }
+
+        void writeStr(const std::string_view & str)
+        {
+            if (!str.empty() || !skip_zero_or_empty)
+                writer->writeString(field_tag, str);
+        }
+
+        void readStr(String & str) { reader->readString(str); }
+        void readStrAndAppend(PaddedPODArray<UInt8> & str) { reader->readStringAndAppend(str); }
+
+        template <typename DestType>
+        DestType parseFromStr(const std::string_view & str) const
+        {
+            try
+            {
+                DestType result;
+                ReadBufferFromMemory buf(str.data(), str.length());
+                readText(result, buf);
+                return result;
+            }
+            catch (...)
+            {
+                cannotConvertValue(str, "String", TypeName<DestType>::get());
+            }
+        }
+
+        template <typename DestType, typename SrcType>
+        DestType castNumber(SrcType value) const
+        {
+            if constexpr (std::is_same_v<DestType, SrcType>)
+                return value;
+            DestType result;
+            try
+            {
+                /// TODO: use accurate::convertNumeric() maybe?
+                result = boost::numeric_cast<DestType>(value);
+            }
+            catch (boost::numeric::bad_numeric_cast &)
+            {
+                cannotConvertValue(toString(value), TypeName<SrcType>::get(), TypeName<DestType>::get());
+            }
+            return result;
+        }
+
+        [[noreturn]] void cannotConvertValue(const std::string_view & src_value, const std::string_view & src_type_name, const std::string_view & dest_type_name) const
+        {
+            throw Exception(
+                "Could not convert value '" + String{src_value} + "' from type " + String{src_type_name} + " to type " + String{dest_type_name} +
+                    " while " + (reader ? "reading" : "writing") + " field " + field_descriptor.name(),
+                ErrorCodes::PROTOBUF_BAD_CAST);
+        }
+
+        const FieldDescriptor & field_descriptor;
+        const FieldTypeId field_typeid;
+        const int field_tag;
+        ProtobufReader * const reader;
+        ProtobufWriter * const writer;
+        ColumnPtr column;
+
+    private:
+        const bool skip_zero_or_empty;
+    };
+
+
+    /// Serializes any ColumnVector<NumberType> to a field of any type except TYPE_MESSAGE, TYPE_GROUP.
+    /// NumberType must be one of the following types: Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64,
+    /// Int128, UInt128, Int256, UInt256, Float32, Float64.
+    /// And the field's type cannot be TYPE_ENUM if NumberType is Float32 or Float64.
+    template <typename NumberType>
+    class ProtobufSerializerNumber : public ProtobufSerializerSingleValue
+    {
+    public:
+        using ColumnType = ColumnVector<NumberType>;
+
+        ProtobufSerializerNumber(const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
+        {
+            setFunctions();
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_vector = assert_cast<const ColumnType &>(*column);
+            write_function(column_vector.getElement(row_num));
+        }
+
+        void readRow(size_t row_num) override
+        {
+            NumberType value = read_function();
+            auto & column_vector = assert_cast<ColumnType &>(column->assumeMutableRef());
+            if (row_num < column_vector.size())
+                column_vector.getElement(row_num) = value;
+            else
+                column_vector.insertValue(value);
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_vector = assert_cast<ColumnType &>(column->assumeMutableRef());
+            if (row_num < column_vector.size())
+                return;
+            column_vector.insertValue(getDefaultNumber());
+        }
+
+    private:
+        void setFunctions()
+        {
+            switch (field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                {
+                    write_function = [this](NumberType value) { writeInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT32:
+                {
+                    write_function = [this](NumberType value) { writeSInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readSInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT32:
+                {
+                    write_function = [this](NumberType value) { writeUInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readUInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_INT64:
+                {
+                    write_function = [this](NumberType value) { writeInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT64:
+                {
+                    write_function = [this](NumberType value) { writeSInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readSInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT64:
+                {
+                    write_function = [this](NumberType value) { writeUInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readUInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED32:
+                {
+                    write_function = [this](NumberType value) { writeFixed<UInt32>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<UInt32>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED32:
+                {
+                    write_function = [this](NumberType value) { writeFixed<Int32>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<Int32>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED64:
+                {
+                    write_function = [this](NumberType value) { writeFixed<UInt64>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<UInt64>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED64:
+                {
+                    write_function = [this](NumberType value) { writeFixed<Int64>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<Int64>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FLOAT:
+                {
+                    write_function = [this](NumberType value) { writeFixed<Float32>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<Float32>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_float()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_DOUBLE:
+                {
+                    write_function = [this](NumberType value) { writeFixed<Float64>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<Float64>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_double()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_BOOL:
+                {
+                    write_function = [this](NumberType value)
+                    {
+                        if (value == 0)
+                            writeUInt(0);
+                        else if (value == 1)
+                            writeUInt(1);
+                        else
+                            cannotConvertValue(toString(value), TypeName<NumberType>::get(), field_descriptor.type_name());
+                    };
+
+                    read_function = [this]() -> NumberType
+                    {
+                        UInt64 u64 = readUInt();
+                        if (u64 < 2)
+                            return static_cast<NumberType>(u64);
+                        else
+                            cannotConvertValue(toString(u64), field_descriptor.type_name(), TypeName<NumberType>::get());
+                    };
+
+                    default_function = [this]() -> NumberType { return static_cast<NumberType>(field_descriptor.default_value_bool()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    write_function = [this](NumberType value)
+                    {
+                        WriteBufferFromString buf{text_buffer};
+                        writeText(value, buf);
+                        buf.finalize();
+                        writeStr(text_buffer);
+                    };
+
+                    read_function = [this]() -> NumberType
+                    {
+                        readStr(text_buffer);
+                        return parseFromStr<NumberType>(text_buffer);
+                    };
+
+                    default_function = [this]() -> NumberType { return parseFromStr<NumberType>(field_descriptor.default_value_string()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_ENUM:
+                {
+                    if (std::is_floating_point_v<NumberType>)
+                        failedToSetFunctions();
+
+                    write_function = [this](NumberType value)
+                    {
+                        int number = castNumber<int>(value);
+                        checkProtobufEnumValue(number);
+                        writeInt(number);
+                    };
+
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_enum()->number()); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        [[noreturn]] void failedToSetFunctions() const
+        {
+            throw Exception(
+                "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                    + " for serialization of the data type " + quoteString(TypeName<NumberType>::get()),
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+
+        NumberType getDefaultNumber()
+        {
+            if (!default_number)
+                default_number = default_function();
+            return *default_number;
+        }
+
+        void checkProtobufEnumValue(int value) const
+        {
+            const auto * enum_value_descriptor = field_descriptor.enum_type()->FindValueByNumber(value);
+            if (!enum_value_descriptor)
+                cannotConvertValue(toString(value), TypeName<NumberType>::get(), field_descriptor.type_name());
+        }
+
+    protected:
+        std::function<void(NumberType)> write_function;
+        std::function<NumberType()> read_function;
+        std::function<NumberType()> default_function;
+        String text_buffer;
+
+    private:
+        std::optional<NumberType> default_number;
+    };
+
+
+    /// Serializes ColumnString or ColumnFixedString to a field of any type except TYPE_MESSAGE, TYPE_GROUP.
+    template <bool is_fixed_string>
+    class ProtobufSerializerString : public ProtobufSerializerSingleValue
+    {
+    public:
+        using ColumnType = std::conditional_t<is_fixed_string, ColumnFixedString, ColumnString>;
+        using StringDataType = std::conditional_t<is_fixed_string, DataTypeFixedString, DataTypeString>;
+
+        ProtobufSerializerString(
+            const StringDataType & string_data_type_,
+            const google::protobuf::FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
+        {
+            static_assert(is_fixed_string, "This constructor for FixedString only");
+            n = string_data_type_.getN();
+            setFunctions();
+            prepareEnumMapping();
+        }
+
+        ProtobufSerializerString(
+            const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
+        {
+            static_assert(!is_fixed_string, "This constructor for String only");
+            setFunctions();
+            prepareEnumMapping();
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_string = assert_cast<const ColumnType &>(*column);
+            write_function(std::string_view{column_string.getDataAt(row_num)});
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_string = assert_cast<ColumnType &>(column->assumeMutableRef());
+            const size_t old_size = column_string.size();
+            typename ColumnType::Chars & data = column_string.getChars();
+            const size_t old_data_size = data.size();
+
+            if (row_num < old_size)
+            {
+                text_buffer.clear();
+                read_function(text_buffer);
+            }
+            else
+            {
+                try
+                {
+                    read_function(data);
+                }
+                catch (...)
+                {
+                    data.resize_assume_reserved(old_data_size);
+                    throw;
+                }
+            }
+
+            if constexpr (is_fixed_string)
+            {
+                if (row_num < old_size)
+                {
+                    ColumnFixedString::alignStringLength(text_buffer, n, 0);
+                    memcpy(data.data() + row_num * n, text_buffer.data(), n);
+                }
+                else
+                    ColumnFixedString::alignStringLength(data, n, old_data_size);
+            }
+            else
+            {
+                if (row_num < old_size)
+                {
+                    if (row_num != old_size - 1)
+                        throw Exception("Cannot replace a string in the middle of ColumnString", ErrorCodes::LOGICAL_ERROR);
+                    column_string.popBack(1);
+                }
+                try
+                {
+                    data.push_back(0 /* terminating zero */);
+                    column_string.getOffsets().push_back(data.size());
+                }
+                catch (...)
+                {
+                    data.resize_assume_reserved(old_data_size);
+                    column_string.getOffsets().resize_assume_reserved(old_size);
+                    throw;
+                }
+            }
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_string = assert_cast<ColumnType &>(column->assumeMutableRef());
+            const size_t old_size = column_string.size();
+            if (row_num < old_size)
+                return;
+
+            const auto & default_str = getDefaultString();
+            typename ColumnType::Chars & data = column_string.getChars();
+            const size_t old_data_size = data.size();
+            try
+            {
+                data.insert(default_str.data(), default_str.data() + default_str.size());
+            }
+            catch (...)
+            {
+                data.resize_assume_reserved(old_data_size);
+                throw;
+            }
+
+            if constexpr (!is_fixed_string)
+            {
+                try
+                {
+                    data.push_back(0 /* terminating zero */);
+                    column_string.getOffsets().push_back(data.size());
+                }
+                catch (...)
+                {
+                    data.resize_assume_reserved(old_data_size);
+                    column_string.getOffsets().resize_assume_reserved(old_size);
+                    throw;
+                }
+            }
+        }
+
+    private:
+        void setFunctions()
+        {
+            switch (field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                {
+                    write_function = [this](const std::string_view & str) { writeInt(parseFromStr<Int32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT32:
+                {
+                    write_function = [this](const std::string_view & str) { writeSInt(parseFromStr<Int32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readSInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT32:
+                {
+                    write_function = [this](const std::string_view & str) { writeUInt(parseFromStr<UInt32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readUInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_INT64:
+                {
+                    write_function = [this](const std::string_view & str) { writeInt(parseFromStr<Int64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT64:
+                {
+                    write_function = [this](const std::string_view & str) { writeSInt(parseFromStr<Int64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readSInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT64:
+                {
+                    write_function = [this](const std::string_view & str) { writeUInt(parseFromStr<UInt64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readUInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED32:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<UInt32>(parseFromStr<UInt32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<UInt32>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED32:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<Int32>(parseFromStr<Int32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<Int32>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED64:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<UInt64>(parseFromStr<UInt64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<UInt64>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED64:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<Int64>(parseFromStr<Int64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<Int64>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FLOAT:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<Float32>(parseFromStr<Float32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<Float32>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_float()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_DOUBLE:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<Float64>(parseFromStr<Float64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<Float64>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_double()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_BOOL:
+                {
+                    write_function = [this](const std::string_view & str)
+                    {
+                        if (str == "true")
+                            writeUInt(1);
+                        else if (str == "false")
+                            writeUInt(0);
+                        else
+                            cannotConvertValue(str, "String", field_descriptor.type_name());
+                    };
+
+                    read_function = [this](PaddedPODArray<UInt8> & str)
+                    {
+                        UInt64 u64 = readUInt();
+                        if (u64 < 2)
+                        {
+                            std::string_view ref(u64 ? "true" : "false");
+                            str.insert(ref.data(), ref.data() + ref.length());
+                        }
+                        else
+                            cannotConvertValue(toString(u64), field_descriptor.type_name(), "String");
+                    };
+
+                    default_function = [this]() -> String
+                    {
+                        return field_descriptor.default_value_bool() ? "true" : "false";
+                    };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    write_function = [this](const std::string_view & str) { writeStr(str); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { readStrAndAppend(str); };
+                    default_function = [this]() -> String { return field_descriptor.default_value_string(); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_ENUM:
+                {
+                    write_function = [this](const std::string_view & str) { writeInt(stringToProtobufEnumValue(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { protobufEnumValueToStringAppend(readInt(), str); };
+                    default_function = [this]() -> String { return field_descriptor.default_value_enum()->name(); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        [[noreturn]] void failedToSetFunctions()
+        {
+            throw Exception(
+                "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                    + " for serialization of the data type " + quoteString(is_fixed_string ? "FixedString" : "String"),
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+
+        const PaddedPODArray<UInt8> & getDefaultString()
+        {
+            if (!default_string)
+            {
+                PaddedPODArray<UInt8> arr;
+                auto str = default_function();
+                arr.insert(str.data(), str.data() + str.size());
+                if constexpr (is_fixed_string)
+                    ColumnFixedString::alignStringLength(arr, n, 0);
+                default_string = std::move(arr);
+            }
+            return *default_string;
+        }
+
+        template <typename NumberType>
+        void toStringAppend(NumberType value, PaddedPODArray<UInt8> & str)
+        {
+            WriteBufferFromVector buf{str, WriteBufferFromVector<PaddedPODArray<UInt8>>::AppendModeTag{}};
+            writeText(value, buf);
+        }
+
+        void prepareEnumMapping()
+        {
+            if ((field_typeid == google::protobuf::FieldDescriptor::TYPE_ENUM) && writer)
+            {
+                const auto & enum_descriptor = *field_descriptor.enum_type();
+                for (int i = 0; i != enum_descriptor.value_count(); ++i)
+                {
+                    const auto & enum_value_descriptor = *enum_descriptor.value(i);
+                    string_to_protobuf_enum_value_map.emplace(enum_value_descriptor.name(), enum_value_descriptor.number());
+                }
+            }
+        }
+
+        int stringToProtobufEnumValue(const std::string_view & str) const
+        {
+            auto it = string_to_protobuf_enum_value_map.find(str);
+            if (it == string_to_protobuf_enum_value_map.end())
+                cannotConvertValue(str, "String", field_descriptor.type_name());
+            return it->second;
+        }
+
+        std::string_view protobufEnumValueToString(int value) const
+        {
+            const auto * enum_value_descriptor = field_descriptor.enum_type()->FindValueByNumber(value);
+            if (!enum_value_descriptor)
+                cannotConvertValue(toString(value), field_descriptor.type_name(), "String");
+            return enum_value_descriptor->name();
+        }
+
+        void protobufEnumValueToStringAppend(int value, PaddedPODArray<UInt8> & str) const
+        {
+            auto name = protobufEnumValueToString(value);
+            str.insert(name.data(), name.data() + name.length());
+        }
+
+        size_t n = 0;
+        std::function<void(const std::string_view &)> write_function;
+        std::function<void(PaddedPODArray<UInt8> &)> read_function;
+        std::function<String()> default_function;
+        std::unordered_map<std::string_view, int> string_to_protobuf_enum_value_map;
+        PaddedPODArray<UInt8> text_buffer;
+        std::optional<PaddedPODArray<UInt8>> default_string;
+    };
+
+
+    /// Serializes ColumnVector<NumberType> containing enum values to a field of any type
+    /// except TYPE_MESSAGE, TYPE_GROUP, TYPE_FLOAT, TYPE_DOUBLE, TYPE_BOOL.
+    /// NumberType can be either Int8 or Int16.
+    template <typename NumberType>
+    class ProtobufSerializerEnum : public ProtobufSerializerNumber<NumberType>
+    {
+    public:
+        using ColumnType = ColumnVector<NumberType>;
+        using EnumDataType = DataTypeEnum<NumberType>;
+        using BaseClass = ProtobufSerializerNumber<NumberType>;
+
+        ProtobufSerializerEnum(
+            const std::shared_ptr<const EnumDataType> & enum_data_type_,
+            const FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : BaseClass(field_descriptor_, reader_or_writer_), enum_data_type(enum_data_type_)
+        {
+            assert(enum_data_type);
+            setFunctions();
+            prepareEnumMapping();
+        }
+
+    private:
+        void setFunctions()
+        {
+            switch (this->field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                case FieldTypeId::TYPE_SINT32:
+                case FieldTypeId::TYPE_UINT32:
+                case FieldTypeId::TYPE_INT64:
+                case FieldTypeId::TYPE_SINT64:
+                case FieldTypeId::TYPE_UINT64:
+                case FieldTypeId::TYPE_FIXED32:
+                case FieldTypeId::TYPE_SFIXED32:
+                case FieldTypeId::TYPE_FIXED64:
+                case FieldTypeId::TYPE_SFIXED64:
+                {
+                    auto base_read_function = this->read_function;
+                    this->read_function = [this, base_read_function]() -> NumberType
+                    {
+                        NumberType value = base_read_function();
+                        checkEnumDataTypeValue(value);
+                        return value;
+                    };
+
+                    auto base_default_function = this->default_function;
+                    this->default_function = [this, base_default_function]() -> NumberType
+                    {
+                        auto value = base_default_function();
+                        checkEnumDataTypeValue(value);
+                        return value;
+                    };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    this->write_function = [this](NumberType value)
+                    {
+                        writeStr(enumDataTypeValueToString(value));
+                    };
+
+                    this->read_function = [this]() -> NumberType
+                    {
+                        readStr(this->text_buffer);
+                        return stringToEnumDataTypeValue(this->text_buffer);
+                    };
+
+                    this->default_function = [this]() -> NumberType
+                    {
+                        return stringToEnumDataTypeValue(this->field_descriptor.default_value_string());
+                    };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_ENUM:
+                {
+                    this->write_function = [this](NumberType value) { writeInt(enumDataTypeValueToProtobufEnumValue(value)); };
+                    this->read_function = [this]() -> NumberType { return protobufEnumValueToEnumDataTypeValue(readInt()); };
+                    this->default_function = [this]() -> NumberType { return protobufEnumValueToEnumDataTypeValue(this->field_descriptor.default_value_enum()->number()); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        [[noreturn]] void failedToSetFunctions()
+        {
+            throw Exception(
+                "The field " + quoteString(this->field_descriptor.full_name()) + " has an incompatible type " + this->field_descriptor.type_name()
+                    + " for serialization of the data type " + quoteString(enum_data_type->getName()),
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+
+        void checkEnumDataTypeValue(NumberType value)
+        {
+            enum_data_type->findByValue(value); /// Throws an exception if the value isn't defined in the DataTypeEnum.
+        }
+
+        std::string_view enumDataTypeValueToString(NumberType value) const { return std::string_view{enum_data_type->getNameForValue(value)}; }
+        NumberType stringToEnumDataTypeValue(const String & str) const { return enum_data_type->getValue(str); }
+
+        void prepareEnumMapping()
+        {
+            if (this->field_typeid != FieldTypeId::TYPE_ENUM)
+                return;
+
+            const auto & enum_descriptor = *this->field_descriptor.enum_type();
+
+            /// We have two mappings:
+            /// enum_data_type: "string->NumberType" and protobuf_enum: string->int".
+            /// And here we want to make from those two mapping a new mapping "NumberType->int" (if we're writing protobuf data),
+            /// or "int->NumberType" (if we're reading protobuf data).
+
+            auto add_to_mapping = [&](NumberType enum_data_type_value, int protobuf_enum_value)
+            {
+                if (this->writer)
+                    enum_data_type_value_to_protobuf_enum_value_map.emplace(enum_data_type_value, protobuf_enum_value);
+                else
+                    protobuf_enum_value_to_enum_data_type_value_map.emplace(protobuf_enum_value, enum_data_type_value);
+            };
+
+            auto iless = [](const std::string_view & s1, const std::string_view & s2) { return ColumnNameWithProtobufFieldNameComparator::less(s1, s2); };
+            boost::container::flat_map<std::string_view, int, decltype(iless)> string_to_protobuf_enum_value_map;
+            typename decltype(string_to_protobuf_enum_value_map)::sequence_type string_to_protobuf_enum_value_seq;
+            for (int i : ext::range(enum_descriptor.value_count()))
+                string_to_protobuf_enum_value_seq.emplace_back(enum_descriptor.value(i)->name(), enum_descriptor.value(i)->number());
+            string_to_protobuf_enum_value_map.adopt_sequence(std::move(string_to_protobuf_enum_value_seq));
+
+            std::vector<NumberType> not_found_by_name_values;
+            not_found_by_name_values.reserve(enum_data_type->getValues().size());
+
+            /// Find mapping between enum_data_type and protobuf_enum by name (case insensitively),
+            /// i.e. we add to the mapping
+            /// NumberType(enum_data_type) -> "NAME"(enum_data_type) ->
+            /// -> "NAME"(protobuf_enum, same name) -> int(protobuf_enum)
+            for (const auto & [name, value] : enum_data_type->getValues())
+            {
+                auto it = string_to_protobuf_enum_value_map.find(name);
+                if (it != string_to_protobuf_enum_value_map.end())
+                    add_to_mapping(value, it->second);
+                else
+                    not_found_by_name_values.push_back(value);
+            }
+
+            if (!not_found_by_name_values.empty())
+            {
+                /// Find mapping between two enum_data_type and protobuf_enum by value.
+                /// If the same value has different names in enum_data_type and protobuf_enum
+                /// we can still add it to our mapping, i.e. we add to the mapping
+                /// NumberType(enum_data_type) -> int(protobuf_enum, same value)
+                for (NumberType value : not_found_by_name_values)
+                {
+                    if (enum_descriptor.FindValueByNumber(value))
+                        add_to_mapping(value, value);
+                }
+            }
+
+            size_t num_mapped_values = this->writer ? enum_data_type_value_to_protobuf_enum_value_map.size()
+                                                    : protobuf_enum_value_to_enum_data_type_value_map.size();
+
+            if (!num_mapped_values && !enum_data_type->getValues().empty() && enum_descriptor.value_count())
+            {
+                throw Exception(
+                    "Couldn't find mapping between data type " + enum_data_type->getName() + " and the enum " + quoteString(enum_descriptor.full_name())
+                        + " in the protobuf schema",
+                    ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+            }
+        }
+
+        int enumDataTypeValueToProtobufEnumValue(NumberType value) const
+        {
+            auto it = enum_data_type_value_to_protobuf_enum_value_map.find(value);
+            if (it == enum_data_type_value_to_protobuf_enum_value_map.end())
+                cannotConvertValue(toString(value), enum_data_type->getName(), this->field_descriptor.type_name());
+            return it->second;
+        }
+
+        NumberType protobufEnumValueToEnumDataTypeValue(int value) const
+        {
+            auto it = protobuf_enum_value_to_enum_data_type_value_map.find(value);
+            if (it == protobuf_enum_value_to_enum_data_type_value_map.end())
+               cannotConvertValue(toString(value), this->field_descriptor.type_name(), enum_data_type->getName());
+            return it->second;
+        }
+
+        Int64 readInt() { return ProtobufSerializerSingleValue::readInt(); }
+        void writeInt(Int64 value) { ProtobufSerializerSingleValue::writeInt(value); }
+        void writeStr(const std::string_view & str) { ProtobufSerializerSingleValue::writeStr(str); }
+        void readStr(String & str) { ProtobufSerializerSingleValue::readStr(str); }
+        [[noreturn]] void cannotConvertValue(const std::string_view & src_value, const std::string_view & src_type_name, const std::string_view & dest_type_name) const { ProtobufSerializerSingleValue::cannotConvertValue(src_value, src_type_name, dest_type_name); }
+
+        const std::shared_ptr<const EnumDataType> enum_data_type;
+        std::unordered_map<NumberType, int> enum_data_type_value_to_protobuf_enum_value_map;
+        std::unordered_map<int, NumberType> protobuf_enum_value_to_enum_data_type_value_map;
+    };
+
+
+    /// Serializes a ColumnDecimal<DecimalType> to any field except TYPE_MESSAGE, TYPE_GROUP, TYPE_ENUM.
+    /// DecimalType must be one of the following types: Decimal32, Decimal64, Decimal128, Decimal256, DateTime64.
+    template <typename DecimalType>
+    class ProtobufSerializerDecimal : public ProtobufSerializerSingleValue
+    {
+    public:
+        using ColumnType = ColumnDecimal<DecimalType>;
+
+        ProtobufSerializerDecimal(
+            const DataTypeDecimalBase<DecimalType> & decimal_data_type_,
+            const FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
+            , precision(decimal_data_type_.getPrecision())
+            , scale(decimal_data_type_.getScale())
+        {
+            setFunctions();
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_decimal = assert_cast<const ColumnType &>(*column);
+            write_function(column_decimal.getElement(row_num));
+        }
+
+        void readRow(size_t row_num) override
+        {
+            DecimalType decimal = read_function();
+            auto & column_decimal = assert_cast<ColumnType &>(column->assumeMutableRef());
+            if (row_num < column_decimal.size())
+                column_decimal.getElement(row_num) = decimal;
+            else
+                column_decimal.insertValue(decimal);
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_decimal = assert_cast<ColumnType &>(column->assumeMutableRef());
+            if (row_num < column_decimal.size())
+                return;
+            column_decimal.insertValue(getDefaultDecimal());
+        }
+
+    private:
+        void setFunctions()
+        {
+            switch (field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeInt(decimalToNumber<Int32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT32:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeSInt(decimalToNumber<Int32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readSInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT32:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeUInt(decimalToNumber<UInt32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readUInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_INT64:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeInt(decimalToNumber<Int64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT64:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeSInt(decimalToNumber<Int64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readSInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT64:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeUInt(decimalToNumber<UInt64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readUInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED32:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<UInt32>(decimalToNumber<UInt32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<UInt32>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED32:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<Int32>(decimalToNumber<Int32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<Int32>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED64:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<UInt64>(decimalToNumber<UInt64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<UInt64>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED64:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<Int64>(decimalToNumber<Int64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<Int64>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FLOAT:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<Float32>(decimalToNumber<Float32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<Float32>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_float()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_DOUBLE:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<Float64>(decimalToNumber<Float64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<Float64>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_double()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_BOOL:
+                {
+                    if (std::is_same_v<DecimalType, DateTime64>)
+                        failedToSetFunctions();
+                    else
+                    {
+                        write_function = [this](const DecimalType & decimal)
+                        {
+                            if (decimal.value == 0)
+                                writeInt(0);
+                            else if (DecimalComparison<DecimalType, int, EqualsOp>::compare(decimal, 1, scale, 0))
+                                writeInt(1);
+                            else
+                            {
+                                WriteBufferFromOwnString buf;
+                                writeText(decimal, scale, buf);
+                                cannotConvertValue(buf.str(), TypeName<DecimalType>::get(), field_descriptor.type_name());
+                            }
+                        };
+
+                        read_function = [this]() -> DecimalType
+                        {
+                            UInt64 u64 = readUInt();
+                            if (u64 < 2)
+                                return numberToDecimal(static_cast<UInt64>(u64 != 0));
+                            else
+                                cannotConvertValue(toString(u64), field_descriptor.type_name(), TypeName<DecimalType>::get());
+                        };
+
+                        default_function = [this]() -> DecimalType
+                        {
+                            return numberToDecimal(static_cast<Int64>(field_descriptor.default_value_bool()));
+                        };
+                    }
+                    break;
+                }
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    write_function = [this](const DecimalType & decimal)
+                    {
+                        decimalToString(decimal, text_buffer);
+                        writeStr(text_buffer);
+                    };
+
+                    read_function = [this]() -> DecimalType
+                    {
+                        readStr(text_buffer);
+                        return stringToDecimal(text_buffer);
+                    };
+
+                    default_function = [this]() -> DecimalType { return stringToDecimal(field_descriptor.default_value_string()); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        [[noreturn]] void failedToSetFunctions()
+        {
+            throw Exception(
+                "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                    + " for serialization of the data type " + quoteString(TypeName<DecimalType>::get()),
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+
+        DecimalType getDefaultDecimal()
+        {
+            if (!default_decimal)
+                default_decimal = default_function();
+            return *default_decimal;
+        }
+
+        template <typename NumberType>
+        DecimalType numberToDecimal(NumberType value) const
+        {
+            return convertToDecimal<DataTypeNumber<NumberType>, DataTypeDecimal<DecimalType>>(value, scale);
+        }
+
+        template <typename NumberType>
+        NumberType decimalToNumber(const DecimalType & decimal) const
+        {
+            return DecimalUtils::convertTo<NumberType>(decimal, scale);
+        }
+
+        void decimalToString(const DecimalType & decimal, String & str) const
+        {
+            WriteBufferFromString buf{str};
+            if constexpr (std::is_same_v<DecimalType, DateTime64>)
+               writeDateTimeText(decimal, scale, buf);
+            else
+                writeText(decimal, scale, buf);
+        }
+
+        DecimalType stringToDecimal(const String & str) const
+        {
+            ReadBufferFromString buf(str);
+            DecimalType decimal{0};
+            if constexpr (std::is_same_v<DecimalType, DateTime64>)
+                readDateTime64Text(decimal, scale, buf);
+            else
+                DataTypeDecimal<DecimalType>::readText(decimal, buf, precision, scale);
+            return decimal;
+        }
+
+        const UInt32 precision;
+        const UInt32 scale;
+        std::function<void(const DecimalType &)> write_function;
+        std::function<DecimalType()> read_function;
+        std::function<DecimalType()> default_function;
+        std::optional<DecimalType> default_decimal;
+        String text_buffer;
+    };
+
+    using ProtobufSerializerDateTime64 = ProtobufSerializerDecimal<DateTime64>;
+
+
+    /// Serializes a ColumnVector<UInt16> containing dates to a field of any type except TYPE_MESSAGE, TYPE_GROUP, TYPE_BOOL, TYPE_ENUM.
+    class ProtobufSerializerDate : public ProtobufSerializerNumber<UInt16>
+    {
+    public:
+        ProtobufSerializerDate(
+            const FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerNumber<UInt16>(field_descriptor_, reader_or_writer_)
+        {
+            setFunctions();
+        }
+
+    private:
+        void setFunctions()
+        {
+            switch (field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                case FieldTypeId::TYPE_SINT32:
+                case FieldTypeId::TYPE_UINT32:
+                case FieldTypeId::TYPE_INT64:
+                case FieldTypeId::TYPE_SINT64:
+                case FieldTypeId::TYPE_UINT64:
+                case FieldTypeId::TYPE_FIXED32:
+                case FieldTypeId::TYPE_SFIXED32:
+                case FieldTypeId::TYPE_FIXED64:
+                case FieldTypeId::TYPE_SFIXED64:
+                case FieldTypeId::TYPE_FLOAT:
+                case FieldTypeId::TYPE_DOUBLE:
+                    break; /// already set in ProtobufSerializerNumber<UInt16>::setFunctions().
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    write_function = [this](UInt16 value)
+                    {
+                        dateToString(static_cast<DayNum>(value), text_buffer);
+                        writeStr(text_buffer);
+                    };
+
+                    read_function = [this]() -> UInt16
+                    {
+                        readStr(text_buffer);
+                        return stringToDate(text_buffer);
+                    };
+
+                    default_function = [this]() -> UInt16 { return stringToDate(field_descriptor.default_value_string()); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        static void dateToString(DayNum date, String & str)
+        {
+            WriteBufferFromString buf{str};
+            writeText(date, buf);
+        }
+
+        static DayNum stringToDate(const String & str)
+        {
+            DayNum date;
+            ReadBufferFromString buf{str};
+            readDateText(date, buf);
+            return date;
+        }
+
+        [[noreturn]] void failedToSetFunctions()
+        {
+            throw Exception(
+                "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                    + " for serialization of the data type 'Date'",
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+    };
+
+
+    /// Serializes a ColumnVector<UInt32> containing dates to a field of any type except TYPE_MESSAGE, TYPE_GROUP, TYPE_BOOL, TYPE_ENUM.
+    class ProtobufSerializerDateTime : public ProtobufSerializerNumber<UInt32>
+    {
+    public:
+        ProtobufSerializerDateTime(
+            const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerNumber<UInt32>(field_descriptor_, reader_or_writer_)
+        {
+            setFunctions();
+        }
+
+    protected:
+        void setFunctions()
+        {
+            switch (field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                case FieldTypeId::TYPE_SINT32:
+                case FieldTypeId::TYPE_UINT32:
+                case FieldTypeId::TYPE_INT64:
+                case FieldTypeId::TYPE_SINT64:
+                case FieldTypeId::TYPE_UINT64:
+                case FieldTypeId::TYPE_FIXED32:
+                case FieldTypeId::TYPE_SFIXED32:
+                case FieldTypeId::TYPE_FIXED64:
+                case FieldTypeId::TYPE_SFIXED64:
+                case FieldTypeId::TYPE_FLOAT:
+                case FieldTypeId::TYPE_DOUBLE:
+                    break; /// already set in ProtobufSerializerNumber<UInt32>::setFunctions().
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    write_function = [this](UInt32 value)
+                    {
+                        dateTimeToString(value, text_buffer);
+                        writeStr(text_buffer);
+                    };
+
+                    read_function = [this]() -> UInt32
+                    {
+                        readStr(text_buffer);
+                        return stringToDateTime(text_buffer);
+                    };
+
+                    default_function = [this]() -> UInt32 { return stringToDateTime(field_descriptor.default_value_string()); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        static void dateTimeToString(time_t tm, String & str)
+        {
+            WriteBufferFromString buf{str};
+            writeDateTimeText(tm, buf);
+        }
+
+        static time_t stringToDateTime(const String & str)
+        {
+            ReadBufferFromString buf{str};
+            time_t tm = 0;
+            readDateTimeText(tm, buf);
+            return tm;
+        }
+
+        [[noreturn]] void failedToSetFunctions()
+        {
+            throw Exception(
+                "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                    + " for serialization of the data type 'DateTime'",
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+    };
+
+
+    /// Serializes a ColumnVector<UInt128> containing UUIDs to a field of type TYPE_STRING or TYPE_BYTES.
+    class ProtobufSerializerUUID : public ProtobufSerializerNumber<UInt128>
+    {
+    public:
+        ProtobufSerializerUUID(
+            const google::protobuf::FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerNumber<UInt128>(field_descriptor_, reader_or_writer_)
+        {
+            setFunctions();
+        }
+
+    private:
+        void setFunctions()
+        {
+            if ((field_typeid != FieldTypeId::TYPE_STRING) && (field_typeid != FieldTypeId::TYPE_BYTES))
+            {
+                throw Exception(
+                    "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                        + " for serialization of the data type UUID",
+                    ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+            }
+
+            write_function = [this](UInt128 value)
+            {
+                uuidToString(static_cast<UUID>(value), text_buffer);
+                writeStr(text_buffer);
+            };
+
+            read_function = [this]() -> UInt128
+            {
+                readStr(text_buffer);
+                return stringToUUID(text_buffer);
+            };
+
+            default_function = [this]() -> UInt128 { return stringToUUID(field_descriptor.default_value_string()); };
+        }
+
+        static void uuidToString(const UUID & uuid, String & str)
+        {
+            WriteBufferFromString buf{str};
+            writeText(uuid, buf);
+        }
+
+        static UUID stringToUUID(const String & str)
+        {
+            ReadBufferFromString buf{str};
+            UUID uuid;
+            readUUIDText(uuid, buf);
+            return uuid;
+        }
+    };
+
+
+    using ProtobufSerializerInterval = ProtobufSerializerNumber<Int64>;
+
+
+    /// Serializes a ColumnAggregateFunction to a field of type TYPE_STRING or TYPE_BYTES.
+    class ProtobufSerializerAggregateFunction : public ProtobufSerializerSingleValue
+    {
+    public:
+        ProtobufSerializerAggregateFunction(
+            const std::shared_ptr<const DataTypeAggregateFunction> & aggregate_function_data_type_,
+            const google::protobuf::FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
+            , aggregate_function_data_type(aggregate_function_data_type_)
+            , aggregate_function(aggregate_function_data_type->getFunction())
+        {
+            if ((field_typeid != FieldTypeId::TYPE_STRING) && (field_typeid != FieldTypeId::TYPE_BYTES))
+            {
+                throw Exception(
+                    "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                        + " for serialization of the data type " + quoteString(aggregate_function_data_type->getName()),
+                    ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+            }
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_af = assert_cast<const ColumnAggregateFunction &>(*column);
+            dataToString(column_af.getData()[row_num], text_buffer);
+            writeStr(text_buffer);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_af = assert_cast<ColumnAggregateFunction &>(column->assumeMutableRef());
+            Arena & arena = column_af.createOrGetArena();
+            AggregateDataPtr data;
+            readStr(text_buffer);
+            data = stringToData(text_buffer, arena);
+
+            if (row_num < column_af.size())
+            {
+                auto * old_data = std::exchange(column_af.getData()[row_num], data);
+                aggregate_function->destroy(old_data);
+            }
+            else
+                column_af.getData().push_back(data);
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_af = assert_cast<ColumnAggregateFunction &>(column->assumeMutableRef());
+            if (row_num < column_af.size())
+                return;
+
+            Arena & arena = column_af.createOrGetArena();
+            AggregateDataPtr data = stringToData(field_descriptor.default_value_string(), arena);
+            column_af.getData().push_back(data);
+        }
+
+    private:
+        void dataToString(ConstAggregateDataPtr data, String & str) const
+        {
+            WriteBufferFromString buf{str};
+            aggregate_function->serialize(data, buf);
+        }
+
+        AggregateDataPtr stringToData(const String & str, Arena & arena) const
+        {
+            size_t size_of_state = aggregate_function->sizeOfData();
+            AggregateDataPtr data = arena.alignedAlloc(size_of_state, aggregate_function->alignOfData());
+            try
+            {
+                aggregate_function->create(data);
+                ReadBufferFromMemory buf(str.data(), str.length());
+                aggregate_function->deserialize(data, buf, &arena);
+                return data;
+            }
+            catch (...)
+            {
+                aggregate_function->destroy(data);
+                throw;
+            }
+        }
+
+        const std::shared_ptr<const DataTypeAggregateFunction> aggregate_function_data_type;
+        const AggregateFunctionPtr aggregate_function;
+        String text_buffer;
+    };
+
+
+    /// Serializes a ColumnNullable.
+    class ProtobufSerializerNullable : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerNullable(std::unique_ptr<ProtobufSerializer> nested_serializer_)
+            : nested_serializer(std::move(nested_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0];
+            const auto & column_nullable = assert_cast<const ColumnNullable &>(*column);
+            ColumnPtr nested_column = column_nullable.getNestedColumnPtr();
+            nested_serializer->setColumns(&nested_column, 1);
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_nullable = assert_cast<const ColumnNullable &>(*column);
+            const auto & null_map = column_nullable.getNullMapData();
+            if (!null_map[row_num])
+                nested_serializer->writeRow(row_num);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_nullable = assert_cast<ColumnNullable &>(column->assumeMutableRef());
+            auto & nested_column = column_nullable.getNestedColumn();
+            auto & null_map = column_nullable.getNullMapData();
+            size_t old_size = null_map.size();
+
+            nested_serializer->readRow(row_num);
+
+            if (row_num < old_size)
+            {
+                null_map[row_num] = false;
+            }
+            else
+            {
+                size_t new_size = nested_column.size();
+                if (new_size != old_size + 1)
+                    throw Exception("Size of ColumnNullable is unexpected", ErrorCodes::LOGICAL_ERROR);
+                try
+                {
+                    null_map.push_back(false);
+                }
+                catch (...)
+                {
+                    nested_column.popBack(1);
+                    throw;
+                }
+            }
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_nullable = assert_cast<ColumnNullable &>(column->assumeMutableRef());
+            if (row_num < column_nullable.size())
+                return;
+            column_nullable.insertDefault();
+        }
+
+    private:
+        const std::unique_ptr<ProtobufSerializer> nested_serializer;
+        ColumnPtr column;
+    };
+
+
+    /// Serializes a ColumnMap.
+    class ProtobufSerializerMap : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerMap(std::unique_ptr<ProtobufSerializer> nested_serializer_)
+            : nested_serializer(std::move(nested_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            const auto & column_map = assert_cast<const ColumnMap &>(*columns[0]);
+            ColumnPtr nested_column = column_map.getNestedColumnPtr();
+            nested_serializer->setColumns(&nested_column, 1);
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override { nested_serializer->writeRow(row_num); }
+        void readRow(size_t row_num) override { nested_serializer->readRow(row_num); }
+        void insertDefaults(size_t row_num) override { nested_serializer->insertDefaults(row_num); }
+
+    private:
+        const std::unique_ptr<ProtobufSerializer> nested_serializer;
+    };
+
+
+    /// Serializes a ColumnLowCardinality.
+    class ProtobufSerializerLowCardinality : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerLowCardinality(std::unique_ptr<ProtobufSerializer> nested_serializer_)
+            : nested_serializer(std::move(nested_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0];
+            const auto & column_lc = assert_cast<const ColumnLowCardinality &>(*column);
+            ColumnPtr nested_column = column_lc.getDictionary().getNestedColumn();
+            nested_serializer->setColumns(&nested_column, 1);
+            read_value_column_set = false;
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_lc = assert_cast<const ColumnLowCardinality &>(*column);
+            size_t unique_row_number = column_lc.getIndexes().getUInt(row_num);
+            nested_serializer->writeRow(unique_row_number);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_lc = assert_cast<ColumnLowCardinality &>(column->assumeMutableRef());
+
+            if (!read_value_column_set)
+            {
+                if (!read_value_column)
+                {
+                    ColumnPtr nested_column = column_lc.getDictionary().getNestedColumn();
+                    read_value_column = nested_column->cloneEmpty();
+                }
+                nested_serializer->setColumns(&read_value_column, 1);
+                read_value_column_set = true;
+            }
+
+            read_value_column->popBack(read_value_column->size());
+            nested_serializer->readRow(0);
+
+            if (row_num < column_lc.size())
+            {
+                if (row_num != column_lc.size() - 1)
+                    throw Exception("Cannot replace an element in the middle of ColumnLowCardinality", ErrorCodes::LOGICAL_ERROR);
+                column_lc.popBack(1);
+            }
+
+            column_lc.insertFromFullColumn(*read_value_column, 0);
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_lc = assert_cast<ColumnLowCardinality &>(column->assumeMutableRef());
+            if (row_num < column_lc.size())
+                return;
+
+            if (!default_value_column)
+            {
+                ColumnPtr nested_column = column_lc.getDictionary().getNestedColumn();
+                default_value_column = nested_column->cloneEmpty();
+                nested_serializer->setColumns(&default_value_column, 1);
+                nested_serializer->insertDefaults(0);
+                read_value_column_set = false;
+            }
+
+            column_lc.insertFromFullColumn(*default_value_column, 0);
+        }
+
+    private:
+        const std::unique_ptr<ProtobufSerializer> nested_serializer;
+        ColumnPtr column;
+        MutableColumnPtr read_value_column;
+        bool read_value_column_set = false;
+        MutableColumnPtr default_value_column;
+    };
+
+
+    /// Serializes a ColumnArray to a repeated field.
+    class ProtobufSerializerArray : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerArray(std::unique_ptr<ProtobufSerializer> element_serializer_)
+            : element_serializer(std::move(element_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0];
+            const auto & column_array = assert_cast<const ColumnArray &>(*column);
+            ColumnPtr data_column = column_array.getDataPtr();
+            element_serializer->setColumns(&data_column, 1);
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_array = assert_cast<const ColumnArray &>(*column);
+            const auto & offsets = column_array.getOffsets();
+            for (size_t i : ext::range(offsets[row_num - 1], offsets[row_num]))
+                element_serializer->writeRow(i);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_array = assert_cast<ColumnArray &>(column->assumeMutableRef());
+            auto & offsets = column_array.getOffsets();
+            size_t old_size = offsets.size();
+            if (row_num + 1 < old_size)
+                throw Exception("Cannot replace an element in the middle of ColumnArray", ErrorCodes::LOGICAL_ERROR);
+            auto data_column = column_array.getDataPtr();
+            size_t old_data_size = data_column->size();
+
+            try
+            {
+                element_serializer->readRow(old_data_size);
+                size_t data_size = data_column->size();
+                if (data_size != old_data_size + 1)
+                    throw Exception("Size of ColumnArray is unexpected", ErrorCodes::LOGICAL_ERROR);
+
+                if (row_num < old_size)
+                    offsets.back() = data_size;
+                else
+                    offsets.push_back(data_size);
+            }
+            catch (...)
+            {
+                if (data_column->size() > old_data_size)
+                    data_column->assumeMutableRef().popBack(data_column->size() - old_data_size);
+                if (offsets.size() > old_size)
+                    column_array.getOffsetsColumn().popBack(offsets.size() - old_size);
+                throw;
+            }
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_array = assert_cast<ColumnArray &>(column->assumeMutableRef());
+            if (row_num < column_array.size())
+                return;
+            column_array.insertDefault();
+        }
+
+    private:
+        const std::unique_ptr<ProtobufSerializer> element_serializer;
+        ColumnPtr column;
+    };
+
+
+    /// Serializes a ColumnTuple as a repeated field (just like we serialize arrays).
+    class ProtobufSerializerTupleAsArray : public ProtobufSerializer
+    {
+    public:
+        ProtobufSerializerTupleAsArray(
+            const std::shared_ptr<const DataTypeTuple> & tuple_data_type_,
+            const FieldDescriptor & field_descriptor_,
+            std::vector<std::unique_ptr<ProtobufSerializer>> element_serializers_)
+            : tuple_data_type(tuple_data_type_)
+            , tuple_size(tuple_data_type->getElements().size())
+            , field_descriptor(field_descriptor_)
+            , element_serializers(std::move(element_serializers_))
+        {
+            assert(tuple_size);
+            assert(tuple_size == element_serializers.size());
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0];
+            const auto & column_tuple = assert_cast<const ColumnTuple &>(*column);
+            for (size_t i : ext::range(tuple_size))
+            {
+                auto element_column = column_tuple.getColumnPtr(i);
+                element_serializers[i]->setColumns(&element_column, 1);
+            }
+            current_element_index = 0;
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            for (size_t i : ext::range(tuple_size))
+                element_serializers[i]->writeRow(row_num);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_tuple = assert_cast<ColumnTuple &>(column->assumeMutableRef());
+
+            size_t old_size = column_tuple.size();
+            if (row_num >= old_size)
+                current_element_index = 0;
+
+            insertDefaults(row_num);
+
+            if (current_element_index >= tuple_size)
+            {
+                throw Exception(
+                    "Too many (" + std::to_string(current_element_index) + ") elements was read from the field "
+                        + field_descriptor.full_name() + " to fit in the data type " + tuple_data_type->getName(),
+                    ErrorCodes::PROTOBUF_BAD_CAST);
+            }
+
+            element_serializers[current_element_index]->readRow(row_num);
+            ++current_element_index;
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_tuple = assert_cast<ColumnTuple &>(column->assumeMutableRef());
+            size_t old_size = column_tuple.size();
+
+            if (row_num > old_size)
+                return;
+
+            try
+            {
+                for (size_t i : ext::range(tuple_size))
+                    element_serializers[i]->insertDefaults(row_num);
+            }
+            catch (...)
+            {
+                for (size_t i : ext::range(tuple_size))
+                {
+                    auto element_column = column_tuple.getColumnPtr(i)->assumeMutable();
+                    if (element_column->size() > old_size)
+                        element_column->popBack(element_column->size() - old_size);
+                }
+                throw;
+            }
+        }
+
+    private:
+        const std::shared_ptr<const DataTypeTuple> tuple_data_type;
+        const size_t tuple_size;
+        const FieldDescriptor & field_descriptor;
+        const std::vector<std::unique_ptr<ProtobufSerializer>> element_serializers;
+        ColumnPtr column;
+        size_t current_element_index = 0;
+    };
+
+
+    /// Serializes a message (root or nested) in the protobuf schema.
+    class ProtobufSerializerMessage : public ProtobufSerializer
+    {
+    public:
+        struct FieldDesc
+        {
+            size_t column_index;
+            size_t num_columns;
+            const FieldDescriptor * field_descriptor;
+            std::unique_ptr<ProtobufSerializer> field_serializer;
+        };
+
+        ProtobufSerializerMessage(
+            std::vector<FieldDesc> field_descs_,
+            const FieldDescriptor * parent_field_descriptor_,
+            bool with_length_delimiter_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : parent_field_descriptor(parent_field_descriptor_)
+            , with_length_delimiter(with_length_delimiter_)
+            , should_skip_if_empty(parent_field_descriptor ? shouldSkipZeroOrEmpty(*parent_field_descriptor) : false)
+            , reader(reader_or_writer_.reader)
+            , writer(reader_or_writer_.writer)
+        {
+            field_infos.reserve(field_descs_.size());
+            for (auto & desc : field_descs_)
+                field_infos.emplace_back(desc.column_index, desc.num_columns, *desc.field_descriptor, std::move(desc.field_serializer));
+
+            std::sort(field_infos.begin(), field_infos.end(),
+                      [](const FieldInfo & lhs, const FieldInfo & rhs) { return lhs.field_tag < rhs.field_tag; });
+
+            for (size_t i : ext::range(field_infos.size()))
+                field_index_by_field_tag.emplace(field_infos[i].field_tag, i);
+        }
+
+        void setColumns(const ColumnPtr * columns_, size_t num_columns_) override
+        {
+            columns.assign(columns_, columns_ + num_columns_);
+
+            for (const FieldInfo & info : field_infos)
+                info.field_serializer->setColumns(columns.data() + info.column_index, info.num_columns);
+
+            if (reader)
+            {
+                missing_column_indices.clear();
+                missing_column_indices.reserve(num_columns_);
+                size_t current_idx = 0;
+                for (const FieldInfo & info : field_infos)
+                {
+                    while (current_idx < info.column_index)
+                        missing_column_indices.push_back(current_idx++);
+                    current_idx = info.column_index + info.num_columns;
+                }
+                while (current_idx < num_columns_)
+                    missing_column_indices.push_back(current_idx++);
+            }
+        }
+
+        void setColumns(const MutableColumnPtr * columns_, size_t num_columns_) override
+        {
+            Columns cols;
+            cols.reserve(num_columns_);
+            for (size_t i : ext::range(num_columns_))
+                cols.push_back(columns_[i]->getPtr());
+            setColumns(cols.data(), cols.size());
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            if (parent_field_descriptor)
+                writer->startNestedMessage();
+            else
+                writer->startMessage();
+
+            for (const FieldInfo & info : field_infos)
+            {
+                if (info.should_pack_repeated)
+                    writer->startRepeatedPack();
+                info.field_serializer->writeRow(row_num);
+                if (info.should_pack_repeated)
+                    writer->endRepeatedPack(info.field_tag, true);
+            }
+
+            if (parent_field_descriptor)
+            {
+                bool is_group = (parent_field_descriptor->type() == FieldTypeId::TYPE_GROUP);
+                writer->endNestedMessage(parent_field_descriptor->number(), is_group, should_skip_if_empty);
+            }
+            else
+                writer->endMessage(with_length_delimiter);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            if (parent_field_descriptor)
+                reader->startNestedMessage();
+            else
+                reader->startMessage(with_length_delimiter);
+
+            if (!field_infos.empty())
+            {
+                last_field_index = 0;
+                last_field_tag = field_infos[0].field_tag;
+                size_t old_size = columns.empty() ? 0 : columns[0]->size();
+
+                try
+                {
+                    int field_tag;
+                    while (reader->readFieldNumber(field_tag))
+                    {
+                        size_t field_index = findFieldIndexByFieldTag(field_tag);
+                        if (field_index == static_cast<size_t>(-1))
+                            continue;
+                        auto * field_serializer = field_infos[field_index].field_serializer.get();
+                        field_serializer->readRow(row_num);
+                        field_infos[field_index].field_read = true;
+                    }
+
+                    for (auto & info : field_infos)
+                    {
+                        if (info.field_read)
+                            info.field_read = false;
+                        else
+                            info.field_serializer->insertDefaults(row_num);
+                    }
+                }
+                catch (...)
+                {
+                    for (auto & column : columns)
+                    {
+                        if (column->size() > old_size)
+                            column->assumeMutableRef().popBack(column->size() - old_size);
+                    }
+                    throw;
+                }
+            }
+
+            if (parent_field_descriptor)
+                reader->endNestedMessage();
+            else
+                reader->endMessage(false);
+            addDefaultsToMissingColumns(row_num);
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            for (const FieldInfo & info : field_infos)
+                info.field_serializer->insertDefaults(row_num);
+            addDefaultsToMissingColumns(row_num);
+        }
+
+    private:
+        size_t findFieldIndexByFieldTag(int field_tag)
+        {
+            while (true)
+            {
+                if (field_tag == last_field_tag)
+                    return last_field_index;
+                if (field_tag < last_field_tag)
+                    break;
+                if (++last_field_index >= field_infos.size())
+                    break;
+                last_field_tag = field_infos[last_field_index].field_tag;
+            }
+            last_field_tag = field_tag;
+            auto it = field_index_by_field_tag.find(field_tag);
+            if (it == field_index_by_field_tag.end())
+                last_field_index = static_cast<size_t>(-1);
+            else
+                last_field_index = it->second;
+            return last_field_index;
+        }
+
+        void addDefaultsToMissingColumns(size_t row_num)
+        {
+            for (size_t column_idx : missing_column_indices)
+            {
+                auto & column = columns[column_idx];
+                size_t old_size = column->size();
+                if (row_num >= old_size)
+                    column->assumeMutableRef().insertDefault();
+            }
+        }
+
+        struct FieldInfo
+        {
+            FieldInfo(
+                size_t column_index_,
+                size_t num_columns_,
+                const FieldDescriptor & field_descriptor_,
+                std::unique_ptr<ProtobufSerializer> field_serializer_)
+                : column_index(column_index_)
+                , num_columns(num_columns_)
+                , field_descriptor(&field_descriptor_)
+                , field_tag(field_descriptor_.number())
+                , should_pack_repeated(shouldPackRepeated(field_descriptor_))
+                , field_serializer(std::move(field_serializer_))
+            {
+            }
+            size_t column_index;
+            size_t num_columns;
+            const FieldDescriptor * field_descriptor;
+            int field_tag;
+            bool should_pack_repeated;
+            std::unique_ptr<ProtobufSerializer> field_serializer;
+            bool field_read = false;
+        };
+
+        const FieldDescriptor * const parent_field_descriptor;
+        const bool with_length_delimiter;
+        const bool should_skip_if_empty;
+        ProtobufReader * const reader;
+        ProtobufWriter * const writer;
+        std::vector<FieldInfo> field_infos;
+        std::unordered_map<int, size_t> field_index_by_field_tag;
+        Columns columns;
+        std::vector<size_t> missing_column_indices;
+        int last_field_tag = 0;
+        size_t last_field_index = static_cast<size_t>(-1);
+    };
+
+
+    /// Serializes a tuple with explicit names as a nested message.
+    class ProtobufSerializerTupleAsNestedMessage : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerTupleAsNestedMessage(std::unique_ptr<ProtobufSerializerMessage> nested_message_serializer_)
+            : nested_message_serializer(std::move(nested_message_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            const auto & column_tuple = assert_cast<const ColumnTuple &>(*columns[0]);
+            size_t tuple_size = column_tuple.tupleSize();
+            assert(tuple_size);
+            Columns element_columns;
+            element_columns.reserve(tuple_size);
+            for (size_t i : ext::range(tuple_size))
+                element_columns.emplace_back(column_tuple.getColumnPtr(i));
+            nested_message_serializer->setColumns(element_columns.data(), element_columns.size());
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override { nested_message_serializer->writeRow(row_num); }
+        void readRow(size_t row_num) override { nested_message_serializer->readRow(row_num); }
+        void insertDefaults(size_t row_num) override { nested_message_serializer->insertDefaults(row_num); }
+
+    private:
+        const std::unique_ptr<ProtobufSerializerMessage> nested_message_serializer;
+    };
+
+
+    /// Serializes a flattened Nested data type (an array of tuples with explicit names)
+    /// as a repeated nested message.
+    class ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages(
+            std::unique_ptr<ProtobufSerializerMessage> nested_message_serializer_)
+            : nested_message_serializer(std::move(nested_message_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, size_t num_columns) override
+        {
+            assert(num_columns);
+            data_columns.clear();
+            data_columns.reserve(num_columns);
+            offset_columns.clear();
+            offset_columns.reserve(num_columns);
+
+            for (size_t i : ext::range(num_columns))
+            {
+                const auto & column_array = assert_cast<const ColumnArray &>(*columns[i]);
+                data_columns.emplace_back(column_array.getDataPtr());
+                offset_columns.emplace_back(column_array.getOffsetsPtr());
+            }
+
+            std::sort(offset_columns.begin(), offset_columns.end());
+            offset_columns.erase(std::unique(offset_columns.begin(), offset_columns.end()), offset_columns.end());
+
+            nested_message_serializer->setColumns(data_columns.data(), data_columns.size());
+        }
+
+        void setColumns(const MutableColumnPtr * columns, size_t num_columns) override
+        {
+            Columns cols;
+            cols.reserve(num_columns);
+            for (size_t i : ext::range(num_columns))
+                cols.push_back(columns[i]->getPtr());
+            setColumns(cols.data(), cols.size());
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & offset_column0 = assert_cast<const ColumnArray::ColumnOffsets &>(*offset_columns[0]);
+            size_t start_offset = offset_column0.getElement(row_num - 1);
+            size_t end_offset = offset_column0.getElement(row_num);
+            for (size_t i : ext::range(1, offset_columns.size()))
+            {
+                const auto & offset_column = assert_cast<const ColumnArray::ColumnOffsets &>(*offset_columns[i]);
+                if (offset_column.getElement(row_num) != end_offset)
+                    throw Exception("Components of FlattenedNested have different sizes", ErrorCodes::PROTOBUF_BAD_CAST);
+            }
+            for (size_t i : ext::range(start_offset, end_offset))
+                nested_message_serializer->writeRow(i);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            size_t old_size = offset_columns[0]->size();
+            if (row_num + 1 < old_size)
+                throw Exception("Cannot replace an element in the middle of ColumnArray", ErrorCodes::LOGICAL_ERROR);
+
+            size_t old_data_size = data_columns[0]->size();
+
+            try
+            {
+                nested_message_serializer->readRow(old_data_size);
+                size_t data_size = data_columns[0]->size();
+                if (data_size != old_data_size + 1)
+                    throw Exception("Unexpected number of elements of ColumnArray has been read", ErrorCodes::LOGICAL_ERROR);
+
+                if (row_num < old_size)
+                {
+                    for (auto & offset_column : offset_columns)
+                        assert_cast<ColumnArray::ColumnOffsets &>(offset_column->assumeMutableRef()).getData().back() = data_size;
+                }
+                else
+                {
+                    for (auto & offset_column : offset_columns)
+                        assert_cast<ColumnArray::ColumnOffsets &>(offset_column->assumeMutableRef()).getData().push_back(data_size);
+                }
+            }
+            catch (...)
+            {
+                for (auto & data_column : data_columns)
+                {
+                    if (data_column->size() > old_data_size)
+                        data_column->assumeMutableRef().popBack(data_column->size() - old_data_size);
+                }
+                for (auto & offset_column : offset_columns)
+                {
+                    if (offset_column->size() > old_size)
+                        offset_column->assumeMutableRef().popBack(offset_column->size() - old_size);
+                }
+                throw;
+            }
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            size_t old_size = offset_columns[0]->size();
+            if (row_num < old_size)
+                return;
+
+            try
+            {
+                size_t data_size = data_columns[0]->size();
+                for (auto & offset_column : offset_columns)
+                    assert_cast<ColumnArray::ColumnOffsets &>(offset_column->assumeMutableRef()).getData().push_back(data_size);
+            }
+            catch (...)
+            {
+                for (auto & offset_column : offset_columns)
+                {
+                    if (offset_column->size() > old_size)
+                        offset_column->assumeMutableRef().popBack(offset_column->size() - old_size);
+                }
+                throw;
+            }
+        }
+
+    private:
+        const std::unique_ptr<ProtobufSerializerMessage> nested_message_serializer;
+        Columns data_columns;
+        Columns offset_columns;
+    };
+
+
+    /// Produces a tree of ProtobufSerializers which serializes a row as a protobuf message.
+    class ProtobufSerializerBuilder
+    {
+    public:
+        explicit ProtobufSerializerBuilder(const ProtobufReaderOrWriter & reader_or_writer_) : reader_or_writer(reader_or_writer_) {}
+
+        std::unique_ptr<ProtobufSerializerMessage> buildMessageSerializer(
+            const Strings & column_names,
+            const DataTypes & data_types,
+            std::vector<size_t> & missing_column_indices,
+            const MessageDescriptor & message_descriptor,
+            bool with_length_delimiter)
+        {
+            std::vector<size_t> used_column_indices;
+            auto serializer = buildMessageSerializerImpl(
+                /* num_columns = */ column_names.size(),
+                column_names.data(),
+                data_types.data(),
+                used_column_indices,
+                message_descriptor,
+                with_length_delimiter,
+                /* parent_field_descriptor = */ nullptr);
+
+            if (!serializer)
+            {
+                throw Exception(
+                    "Not found matches between the names of the columns {" + boost::algorithm::join(column_names, ", ")
+                        + "} and the fields {" + boost::algorithm::join(getFieldNames(message_descriptor), ", ") + "} of the message "
+                        + quoteString(message_descriptor.full_name()) + " in the protobuf schema",
+                    ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS);
+            }
+
+            missing_column_indices.clear();
+            missing_column_indices.reserve(column_names.size() - used_column_indices.size());
+            boost::range::set_difference(ext::range(column_names.size()), used_column_indices,
+                                         std::back_inserter(missing_column_indices));
+
+            return serializer;
+        }
+
+    private:
+        /// Collects all field names from the message (used only to format error messages).
+        static Strings getFieldNames(const MessageDescriptor & message_descriptor)
+        {
+            Strings field_names;
+            field_names.reserve(message_descriptor.field_count());
+            for (int i : ext::range(message_descriptor.field_count()))
+                field_names.emplace_back(message_descriptor.field(i)->name());
+            return field_names;
+        }
+
+        static bool columnNameEqualsToFieldName(const std::string_view & column_name, const FieldDescriptor & field_descriptor)
+        {
+            std::string_view suffix;
+            return columnNameStartsWithFieldName(column_name, field_descriptor, suffix) && suffix.empty();
+        }
+
+        /// Checks if a passed column's name starts with a specified field's name.
+        /// The function also assigns `suffix` to the rest part of the column's name
+        /// which doesn't match to the field's name.
+        /// The function requires that rest part of the column's name to be started with a dot '.' or underline '_',
+        /// but doesn't include those '.' or '_' characters into `suffix`.
+        static bool columnNameStartsWithFieldName(const std::string_view & column_name, const FieldDescriptor & field_descriptor, std::string_view & suffix)
+        {
+            size_t matching_length = 0;
+            const MessageDescriptor & containing_type = *field_descriptor.containing_type();
+            if (containing_type.options().map_entry())
+            {
+                /// Special case. Elements of the data type Map are named as "keys" and "values",
+                /// but they're internally named as "key" and "value" in protobuf schema.
+                if (field_descriptor.number() == 1)
+                {
+                    if (ColumnNameWithProtobufFieldNameComparator::startsWith(column_name, "keys"))
+                        matching_length = strlen("keys");
+                    else if (ColumnNameWithProtobufFieldNameComparator::startsWith(column_name, "key"))
+                        matching_length = strlen("key");
+                }
+                else if (field_descriptor.number() == 2)
+                {
+                    if (ColumnNameWithProtobufFieldNameComparator::startsWith(column_name, "values"))
+                        matching_length = strlen("values");
+                    else if (ColumnNameWithProtobufFieldNameComparator::startsWith(column_name, "value"))
+                        matching_length = strlen("value");
+                }
+            }
+            if (!matching_length && ColumnNameWithProtobufFieldNameComparator::startsWith(column_name, field_descriptor.name()))
+            {
+                matching_length = field_descriptor.name().length();
+            }
+            if (column_name.length() == matching_length)
+                return true;
+            if ((column_name.length() < matching_length + 2) || !field_descriptor.message_type())
+                return false;
+            char first_char_after_matching = column_name[matching_length];
+            if (!ColumnNameWithProtobufFieldNameComparator::equals(first_char_after_matching, '.'))
+                return false;
+            suffix = column_name.substr(matching_length + 1);
+            return true;
+        }
+
+        /// Finds fields in the protobuf message which can be considered as matching
+        /// for a specified column's name. The found fields can be nested messages,
+        /// for that case suffixes are also returned.
+        /// This is only the first filter, buildMessageSerializerImpl() does other checks after calling this function.
+        static bool findFieldsByColumnName(
+            const std::string_view & column_name,
+            const MessageDescriptor & message_descriptor,
+            std::vector<std::pair<const FieldDescriptor *, std::string_view /* suffix */>> & out_field_descriptors_with_suffixes)
+        {
+            out_field_descriptors_with_suffixes.clear();
+
+            /// Find all fields which have the same name as column's name (case-insensitively); i.e. we're checking
+            /// field_name == column_name.
+            for (int i : ext::range(message_descriptor.field_count()))
+            {
+                const auto & field_descriptor = *message_descriptor.field(i);
+                if (columnNameEqualsToFieldName(column_name, field_descriptor))
+                {
+                    out_field_descriptors_with_suffixes.emplace_back(&field_descriptor, std::string_view{});
+                    break;
+                }
+            }
+
+            if (!out_field_descriptors_with_suffixes.empty())
+                return true; /// We have an exact match, no need to compare prefixes.
+
+            /// Find all fields which name is used as prefix in column's name; i.e. we're checking
+            /// column_name == field_name + '.' + nested_message_field_name
+            for (int i : ext::range(message_descriptor.field_count()))
+            {
+                const auto & field_descriptor = *message_descriptor.field(i);
+                std::string_view suffix;
+                if (columnNameStartsWithFieldName(column_name, field_descriptor, suffix))
+                {
+                    out_field_descriptors_with_suffixes.emplace_back(&field_descriptor, suffix);
+                }
+            }
+
+            /// Shorter suffixes first.
+            std::sort(out_field_descriptors_with_suffixes.begin(), out_field_descriptors_with_suffixes.end(),
+                      [](const std::pair<const FieldDescriptor *, std::string_view /* suffix */> & f1,
+                         const std::pair<const FieldDescriptor *, std::string_view /* suffix */> & f2)
+            {
+                return f1.second.length() < f2.second.length();
+            });
+
+            return !out_field_descriptors_with_suffixes.empty();
+        }
+
+        /// Builds a serializer for a protobuf message (root or nested).
+        template <typename StringOrStringViewT>
+        std::unique_ptr<ProtobufSerializerMessage> buildMessageSerializerImpl(
+            size_t num_columns,
+            const StringOrStringViewT * column_names,
+            const DataTypePtr * data_types,
+            std::vector<size_t> & used_column_indices,
+            const MessageDescriptor & message_descriptor,
+            bool with_length_delimiter,
+            const FieldDescriptor * parent_field_descriptor)
+        {
+            std::vector<ProtobufSerializerMessage::FieldDesc> field_descs;
+            boost::container::flat_map<const FieldDescriptor *, std::string_view> field_descriptors_in_use;
+
+            used_column_indices.clear();
+            used_column_indices.reserve(num_columns);
+
+            auto add_field_serializer = [&](size_t column_index_,
+                                            const std::string_view & column_name_,
+                                            size_t num_columns_,
+                                            const FieldDescriptor & field_descriptor_,
+                                            std::unique_ptr<ProtobufSerializer> field_serializer_)
+            {
+                auto it = field_descriptors_in_use.find(&field_descriptor_);
+                if (it != field_descriptors_in_use.end())
+                {
+                    throw Exception(
+                        "Multiple columns (" + backQuote(StringRef{field_descriptors_in_use[&field_descriptor_]}) + ", "
+                            + backQuote(StringRef{column_name_}) + ") cannot be serialized to a single protobuf field "
+                            + quoteString(field_descriptor_.full_name()),
+                        ErrorCodes::MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD);
+                }
+
+                field_descs.push_back({column_index_, num_columns_, &field_descriptor_, std::move(field_serializer_)});
+                field_descriptors_in_use.emplace(&field_descriptor_, column_name_);
+            };
+
+            std::vector<std::pair<const FieldDescriptor *, std::string_view>> field_descriptors_with_suffixes;
+
+            /// We're going through all the passed columns.
+            size_t column_idx = 0;
+            size_t next_column_idx = 1;
+            for (; column_idx != num_columns; column_idx = next_column_idx++)
+            {
+                auto column_name = column_names[column_idx];
+                const auto & data_type = data_types[column_idx];
+
+                if (!findFieldsByColumnName(column_name, message_descriptor, field_descriptors_with_suffixes))
+                    continue;
+
+                if ((field_descriptors_with_suffixes.size() == 1) && field_descriptors_with_suffixes[0].second.empty())
+                {
+                    /// Simple case: one column is serialized as one field.
+                    const auto & field_descriptor = *field_descriptors_with_suffixes[0].first;
+                    auto field_serializer = buildFieldSerializer(column_name, data_type, field_descriptor, field_descriptor.is_repeated());
+
+                    if (field_serializer)
+                    {
+                        add_field_serializer(column_idx, column_name, 1, field_descriptor, std::move(field_serializer));
+                        used_column_indices.push_back(column_idx);
+                        continue;
+                    }
+                }
+
+                for (const auto & [field_descriptor, suffix] : field_descriptors_with_suffixes)
+                {
+                    if (!suffix.empty())
+                    {
+                        /// Complex case: one or more columns are serialized as a nested message.
+                        std::vector<std::string_view> names_relative_to_nested_message;
+                        names_relative_to_nested_message.reserve(num_columns - column_idx);
+                        names_relative_to_nested_message.emplace_back(suffix);
+
+                        for (size_t j : ext::range(column_idx + 1, num_columns))
+                        {
+                            std::string_view next_suffix;
+                            if (!columnNameStartsWithFieldName(column_names[j], *field_descriptor, next_suffix))
+                                break;
+                            names_relative_to_nested_message.emplace_back(next_suffix);
+                        }
+
+                        /// Now we have up to `names_relative_to_nested_message.size()` sequential columns
+                        /// which can be serialized as a nested message.
+
+                        /// Calculate how many of those sequential columns are arrays.
+                        size_t num_arrays = 0;
+                        for (size_t j : ext::range(column_idx, column_idx + names_relative_to_nested_message.size()))
+                        {
+                            if (data_types[j]->getTypeId() != TypeIndex::Array)
+                                break;
+                            ++num_arrays;
+                        }
+
+                        /// We will try to serialize the sequential columns as one nested message,
+                        /// then, if failed, as an array of nested messages (on condition those columns are array).
+                        bool has_fallback_to_array_of_nested_messages = num_arrays && field_descriptor->is_repeated();
+
+                        /// Try to serialize the sequential columns as one nested message.
+                        try
+                        {
+                            std::vector<size_t> used_column_indices_in_nested;
+                            auto nested_message_serializer = buildMessageSerializerImpl(
+                                names_relative_to_nested_message.size(),
+                                names_relative_to_nested_message.data(),
+                                &data_types[column_idx],
+                                used_column_indices_in_nested,
+                                *field_descriptor->message_type(),
+                                false,
+                                field_descriptor);
+
+                            if (nested_message_serializer)
+                            {
+                                for (size_t & idx_in_nested : used_column_indices_in_nested)
+                                    used_column_indices.push_back(idx_in_nested + column_idx);
+
+                                next_column_idx = used_column_indices.back() + 1;
+                                add_field_serializer(column_idx, column_name, next_column_idx - column_idx, *field_descriptor, std::move(nested_message_serializer));
+                                break;
+                            }
+                        }
+                        catch (Exception & e)
+                        {
+                            if ((e.code() != ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED) || !has_fallback_to_array_of_nested_messages)
+                                throw;
+                        }
+
+                        if (has_fallback_to_array_of_nested_messages)
+                        {
+                            /// Try to serialize the sequential columns as an array of nested messages.
+                            DataTypes array_nested_data_types;
+                            array_nested_data_types.reserve(num_arrays);
+                            for (size_t j : ext::range(column_idx, column_idx + num_arrays))
+                                array_nested_data_types.emplace_back(assert_cast<const DataTypeArray &>(*data_types[j]).getNestedType());
+
+                            std::vector<size_t> used_column_indices_in_nested;
+                            auto nested_message_serializer = buildMessageSerializerImpl(
+                                array_nested_data_types.size(),
+                                names_relative_to_nested_message.data(),
+                                array_nested_data_types.data(),
+                                used_column_indices_in_nested,
+                                *field_descriptor->message_type(),
+                                false,
+                                field_descriptor);
+
+                            if (nested_message_serializer)
+                            {
+                                auto field_serializer = std::make_unique<ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages>(std::move(nested_message_serializer));
+
+                                for (size_t & idx_in_nested : used_column_indices_in_nested)
+                                    used_column_indices.push_back(idx_in_nested + column_idx);
+
+                                next_column_idx = used_column_indices.back() + 1;
+                                add_field_serializer(column_idx, column_name, next_column_idx - column_idx, *field_descriptor, std::move(field_serializer));
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+
+            /// Check that we've found matching columns for all the required fields.
+            if ((message_descriptor.file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO2)
+                    && reader_or_writer.writer)
+            {
+                for (int i : ext::range(message_descriptor.field_count()))
+                {
+                    const auto & field_descriptor = *message_descriptor.field(i);
+                    if (field_descriptor.is_required() && !field_descriptors_in_use.count(&field_descriptor))
+                        throw Exception(
+                            "Field " + quoteString(field_descriptor.full_name()) + " is required to be set",
+                            ErrorCodes::NO_COLUMN_SERIALIZED_TO_REQUIRED_PROTOBUF_FIELD);
+                }
+            }
+
+            if (field_descs.empty())
+                return nullptr;
+
+            return std::make_unique<ProtobufSerializerMessage>(
+                std::move(field_descs), parent_field_descriptor, with_length_delimiter, reader_or_writer);
+        }
+
+        /// Builds a serializer for one-to-one match:
+        /// one column is serialized as one field in the protobuf message.
+        std::unique_ptr<ProtobufSerializer> buildFieldSerializer(
+            const std::string_view & column_name,
+            const DataTypePtr & data_type,
+            const FieldDescriptor & field_descriptor,
+            bool allow_repeat)
+        {
+            auto data_type_id = data_type->getTypeId();
+            switch (data_type_id)
+            {
+                case TypeIndex::UInt8: return std::make_unique<ProtobufSerializerNumber<UInt8>>(field_descriptor, reader_or_writer);
+                case TypeIndex::UInt16: return std::make_unique<ProtobufSerializerNumber<UInt16>>(field_descriptor, reader_or_writer);
+                case TypeIndex::UInt32: return std::make_unique<ProtobufSerializerNumber<UInt32>>(field_descriptor, reader_or_writer);
+                case TypeIndex::UInt64: return std::make_unique<ProtobufSerializerNumber<UInt64>>(field_descriptor, reader_or_writer);
+                case TypeIndex::UInt128: return std::make_unique<ProtobufSerializerNumber<UInt128>>(field_descriptor, reader_or_writer);
+                case TypeIndex::UInt256: return std::make_unique<ProtobufSerializerNumber<UInt256>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int8: return std::make_unique<ProtobufSerializerNumber<Int8>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int16: return std::make_unique<ProtobufSerializerNumber<Int16>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int32: return std::make_unique<ProtobufSerializerNumber<Int32>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int64: return std::make_unique<ProtobufSerializerNumber<Int64>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int128: return std::make_unique<ProtobufSerializerNumber<Int128>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int256: return std::make_unique<ProtobufSerializerNumber<Int256>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Float32: return std::make_unique<ProtobufSerializerNumber<Float32>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Float64: return std::make_unique<ProtobufSerializerNumber<Float64>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Date: return std::make_unique<ProtobufSerializerDate>(field_descriptor, reader_or_writer);
+                case TypeIndex::DateTime: return std::make_unique<ProtobufSerializerDateTime>(field_descriptor, reader_or_writer);
+                case TypeIndex::DateTime64: return std::make_unique<ProtobufSerializerDateTime64>(assert_cast<const DataTypeDateTime64 &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::String: return std::make_unique<ProtobufSerializerString<false>>(field_descriptor, reader_or_writer);
+                case TypeIndex::FixedString: return std::make_unique<ProtobufSerializerString<true>>(assert_cast<const DataTypeFixedString &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Enum8: return std::make_unique<ProtobufSerializerEnum<Int8>>(typeid_cast<std::shared_ptr<const DataTypeEnum8>>(data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Enum16: return std::make_unique<ProtobufSerializerEnum<Int16>>(typeid_cast<std::shared_ptr<const DataTypeEnum16>>(data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Decimal32: return std::make_unique<ProtobufSerializerDecimal<Decimal32>>(assert_cast<const DataTypeDecimal<Decimal32> &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Decimal64: return std::make_unique<ProtobufSerializerDecimal<Decimal64>>(assert_cast<const DataTypeDecimal<Decimal64> &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Decimal128: return std::make_unique<ProtobufSerializerDecimal<Decimal128>>(assert_cast<const DataTypeDecimal<Decimal128> &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Decimal256: return std::make_unique<ProtobufSerializerDecimal<Decimal256>>(assert_cast<const DataTypeDecimal<Decimal256> &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::UUID: return std::make_unique<ProtobufSerializerUUID>(field_descriptor, reader_or_writer);
+                case TypeIndex::Interval: return std::make_unique<ProtobufSerializerInterval>(field_descriptor, reader_or_writer);
+                case TypeIndex::AggregateFunction: return std::make_unique<ProtobufSerializerAggregateFunction>(typeid_cast<std::shared_ptr<const DataTypeAggregateFunction>>(data_type), field_descriptor, reader_or_writer);
+
+                case TypeIndex::Nullable:
+                {
+                    const auto & nullable_data_type = assert_cast<const DataTypeNullable &>(*data_type);
+                    auto nested_serializer = buildFieldSerializer(column_name, nullable_data_type.getNestedType(), field_descriptor, allow_repeat);
+                    if (!nested_serializer)
+                        return nullptr;
+                    return std::make_unique<ProtobufSerializerNullable>(std::move(nested_serializer));
+                }
+
+                case TypeIndex::LowCardinality:
+                {
+                    const auto & low_cardinality_data_type = assert_cast<const DataTypeLowCardinality &>(*data_type);
+                    auto nested_serializer
+                        = buildFieldSerializer(column_name, low_cardinality_data_type.getDictionaryType(), field_descriptor, allow_repeat);
+                    if (!nested_serializer)
+                        return nullptr;
+                    return std::make_unique<ProtobufSerializerLowCardinality>(std::move(nested_serializer));
+                }
+
+                case TypeIndex::Map:
+                {
+                    const auto & map_data_type = assert_cast<const DataTypeMap &>(*data_type);
+                    auto nested_serializer = buildFieldSerializer(column_name, map_data_type.getNestedType(), field_descriptor, allow_repeat);
+                    if (!nested_serializer)
+                        return nullptr;
+                    return std::make_unique<ProtobufSerializerMap>(std::move(nested_serializer));
+                }
+
+                case TypeIndex::Array:
+                {
+                    /// Array is serialized as a repeated field.
+                    const auto & array_data_type = assert_cast<const DataTypeArray &>(*data_type);
+
+                    if (!allow_repeat)
+                    {
+                        throw Exception(
+                            "The field " + quoteString(field_descriptor.full_name())
+                                + " must be repeated in the protobuf schema to match the column " + backQuote(StringRef{column_name}),
+                            ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
+                    }
+
+                    auto nested_serializer = buildFieldSerializer(column_name, array_data_type.getNestedType(), field_descriptor,
+                                                                  /* allow_repeat = */ false); // We do our repeating now, so for nested type we forget about the repeating.
+                    if (!nested_serializer)
+                        return nullptr;
+                    return std::make_unique<ProtobufSerializerArray>(std::move(nested_serializer));
+                }
+
+                case TypeIndex::Tuple:
+                {
+                    /// Tuple is serialized in one of two ways:
+                    /// 1) If the tuple has explicit names then it can be serialized as a nested message.
+                    /// 2) Any tuple can be serialized as a repeated field, just like Array.
+                    const auto & tuple_data_type = assert_cast<const DataTypeTuple &>(*data_type);
+                    size_t size_of_tuple = tuple_data_type.getElements().size();
+
+                    if (tuple_data_type.haveExplicitNames() && field_descriptor.message_type())
+                    {
+                        /// Try to serialize as a nested message.
+                        std::vector<size_t> used_column_indices;
+                        auto nested_message_serializer = buildMessageSerializerImpl(
+                            size_of_tuple,
+                            tuple_data_type.getElementNames().data(),
+                            tuple_data_type.getElements().data(),
+                            used_column_indices,
+                            *field_descriptor.message_type(),
+                            false,
+                            &field_descriptor);
+
+                        if (!nested_message_serializer)
+                        {
+                            throw Exception(
+                                "Not found matches between the names of the tuple's elements {"
+                                    + boost::algorithm::join(tuple_data_type.getElementNames(), ", ") + "} and the fields {"
+                                    + boost::algorithm::join(getFieldNames(*field_descriptor.message_type()), ", ") + "} of the message "
+                                    + quoteString(field_descriptor.message_type()->full_name()) + " in the protobuf schema",
+                                ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS);
+                        }
+
+                        return std::make_unique<ProtobufSerializerTupleAsNestedMessage>(std::move(nested_message_serializer));
+                    }
+
+                    /// Serialize as a repeated field.
+                    if (!allow_repeat && (size_of_tuple > 1))
+                    {
+                        throw Exception(
+                            "The field " + quoteString(field_descriptor.full_name())
+                                + " must be repeated in the protobuf schema to match the column " + backQuote(StringRef{column_name}),
+                            ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
+                    }
+
+                    std::vector<std::unique_ptr<ProtobufSerializer>> nested_serializers;
+                    for (const auto & nested_data_type : tuple_data_type.getElements())
+                    {
+                        auto nested_serializer = buildFieldSerializer(column_name, nested_data_type, field_descriptor,
+                                                                      /* allow_repeat = */ false); // We do our repeating now, so for nested type we forget about the repeating.
+                        if (!nested_serializer)
+                            break;
+                        nested_serializers.push_back(std::move(nested_serializer));
+                    }
+
+                    if (nested_serializers.size() != size_of_tuple)
+                        return nullptr;
+
+                    return std::make_unique<ProtobufSerializerTupleAsArray>(
+                        typeid_cast<std::shared_ptr<const DataTypeTuple>>(data_type),
+                        field_descriptor,
+                        std::move(nested_serializers));
+                }
+
+                default:
+                    throw Exception("Unknown data type: " + data_type->getName(), ErrorCodes::LOGICAL_ERROR);
+            }
+        }
+
+        const ProtobufReaderOrWriter reader_or_writer;
+    };
+}
+
+
+std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
+    const Strings & column_names,
+    const DataTypes & data_types,
+    std::vector<size_t> & missing_column_indices,
+    const google::protobuf::Descriptor & message_descriptor,
+    bool with_length_delimiter,
+    ProtobufReader & reader)
+{
+    return ProtobufSerializerBuilder(reader).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter);
+}
+
+std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
+    const Strings & column_names,
+    const DataTypes & data_types,
+    const google::protobuf::Descriptor & message_descriptor,
+    bool with_length_delimiter,
+    ProtobufWriter & writer)
+{
+    std::vector<size_t> missing_column_indices;
+    return ProtobufSerializerBuilder(writer).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter);
+}
+}
+#endif
diff --git a/src/Formats/ProtobufSerializer.h b/src/Formats/ProtobufSerializer.h
new file mode 100644
index 00000000000..86a2f2f36dd
--- /dev/null
+++ b/src/Formats/ProtobufSerializer.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#if !defined(ARCADIA_BUILD)
+#    include "config_formats.h"
+#endif
+
+#if USE_PROTOBUF
+#   include <Columns/IColumn.h>
+
+
+namespace google::protobuf { class Descriptor; }
+
+namespace DB
+{
+class ProtobufReader;
+class ProtobufWriter;
+class IDataType;
+using DataTypePtr = std::shared_ptr<const IDataType>;
+using DataTypes = std::vector<DataTypePtr>;
+
+
+/// Utility class, does all the work for serialization in the Protobuf format.
+class ProtobufSerializer
+{
+public:
+    virtual ~ProtobufSerializer() = default;
+
+    virtual void setColumns(const ColumnPtr * columns, size_t num_columns) = 0;
+    virtual void writeRow(size_t row_num) = 0;
+
+    virtual void setColumns(const MutableColumnPtr * columns, size_t num_columns) = 0;
+    virtual void readRow(size_t row_num) = 0;
+    virtual void insertDefaults(size_t row_num) = 0;
+
+    static std::unique_ptr<ProtobufSerializer> create(
+        const Strings & column_names,
+        const DataTypes & data_types,
+        std::vector<size_t> & missing_column_indices,
+        const google::protobuf::Descriptor & message_descriptor,
+        bool with_length_delimiter,
+        ProtobufReader & reader);
+
+    static std::unique_ptr<ProtobufSerializer> create(
+        const Strings & column_names,
+        const DataTypes & data_types,
+        const google::protobuf::Descriptor & message_descriptor,
+        bool with_length_delimiter,
+        ProtobufWriter & writer);
+};
+
+}
+#endif
diff --git a/src/Formats/ProtobufWriter.cpp b/src/Formats/ProtobufWriter.cpp
index e62d8fc4a58..ece4f78b1c8 100644
--- a/src/Formats/ProtobufWriter.cpp
+++ b/src/Formats/ProtobufWriter.cpp
@@ -1,29 +1,11 @@
 #include "ProtobufWriter.h"
 
 #if USE_PROTOBUF
-#    include <cassert>
-#    include <optional>
-#    include <math.h>
-#    include <AggregateFunctions/IAggregateFunction.h>
-#    include <DataTypes/DataTypesDecimal.h>
-#    include <IO/ReadHelpers.h>
-#    include <IO/WriteHelpers.h>
-#    include <boost/numeric/conversion/cast.hpp>
-#    include <google/protobuf/descriptor.h>
-#    include <google/protobuf/descriptor.pb.h>
+#   include <IO/WriteHelpers.h>
 
 
 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-    extern const int NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD;
-    extern const int PROTOBUF_BAD_CAST;
-    extern const int PROTOBUF_FIELD_NOT_REPEATED;
-}
-
-
 namespace
 {
     constexpr size_t MAX_VARINT_SIZE = 10;
@@ -81,66 +63,24 @@ namespace
     }
 
     void writeFieldNumber(UInt32 field_number, WireType wire_type, PODArray<UInt8> & buf) { writeVarint((field_number << 3) | wire_type, buf); }
-
-    // Should we pack repeated values while storing them.
-    // It depends on type of the field in the protobuf schema and the syntax of that schema.
-    bool shouldPackRepeated(const google::protobuf::FieldDescriptor * field)
-    {
-        if (!field->is_repeated())
-            return false;
-        switch (field->type())
-        {
-            case google::protobuf::FieldDescriptor::TYPE_INT32:
-            case google::protobuf::FieldDescriptor::TYPE_UINT32:
-            case google::protobuf::FieldDescriptor::TYPE_SINT32:
-            case google::protobuf::FieldDescriptor::TYPE_INT64:
-            case google::protobuf::FieldDescriptor::TYPE_UINT64:
-            case google::protobuf::FieldDescriptor::TYPE_SINT64:
-            case google::protobuf::FieldDescriptor::TYPE_FIXED32:
-            case google::protobuf::FieldDescriptor::TYPE_SFIXED32:
-            case google::protobuf::FieldDescriptor::TYPE_FIXED64:
-            case google::protobuf::FieldDescriptor::TYPE_SFIXED64:
-            case google::protobuf::FieldDescriptor::TYPE_FLOAT:
-            case google::protobuf::FieldDescriptor::TYPE_DOUBLE:
-            case google::protobuf::FieldDescriptor::TYPE_BOOL:
-            case google::protobuf::FieldDescriptor::TYPE_ENUM:
-                break;
-            default:
-                return false;
-        }
-        if (field->options().has_packed())
-            return field->options().packed();
-        return field->file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3;
-    }
-
-    // Should we omit null values (zero for numbers / empty string for strings) while storing them.
-    bool shouldSkipNullValue(const google::protobuf::FieldDescriptor * field)
-    {
-        return field->is_optional() && (field->file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3);
-    }
 }
 
 
-// SimpleWriter is an utility class to serialize protobufs.
-// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
-ProtobufWriter::SimpleWriter::SimpleWriter(WriteBuffer & out_, const bool use_length_delimiters_)
+ProtobufWriter::ProtobufWriter(WriteBuffer & out_)
     : out(out_)
-    , current_piece_start(0)
-    , num_bytes_skipped(0)
-    , use_length_delimiters(use_length_delimiters_)
 {
 }
 
-ProtobufWriter::SimpleWriter::~SimpleWriter() = default;
+ProtobufWriter::~ProtobufWriter() = default;
 
-void ProtobufWriter::SimpleWriter::startMessage()
+void ProtobufWriter::startMessage()
 {
 }
 
-void ProtobufWriter::SimpleWriter::endMessage()
+void ProtobufWriter::endMessage(bool with_length_delimiter)
 {
     pieces.emplace_back(current_piece_start, buffer.size());
-    if (use_length_delimiters)
+    if (with_length_delimiter)
     {
         size_t size_of_message = buffer.size() - num_bytes_skipped;
         writeVarint(size_of_message, out);
@@ -154,7 +94,7 @@ void ProtobufWriter::SimpleWriter::endMessage()
     current_piece_start = 0;
 }
 
-void ProtobufWriter::SimpleWriter::startNestedMessage()
+void ProtobufWriter::startNestedMessage()
 {
     nested_infos.emplace_back(pieces.size(), num_bytes_skipped);
     pieces.emplace_back(current_piece_start, buffer.size());
@@ -167,7 +107,7 @@ void ProtobufWriter::SimpleWriter::startNestedMessage()
     num_bytes_skipped = NESTED_MESSAGE_PADDING;
 }
 
-void ProtobufWriter::SimpleWriter::endNestedMessage(UInt32 field_number, bool is_group, bool skip_if_empty)
+void ProtobufWriter::endNestedMessage(int field_number, bool is_group, bool skip_if_empty)
 {
     const auto & nested_info = nested_infos.back();
     size_t num_pieces_at_start = nested_info.num_pieces_at_start;
@@ -203,8 +143,13 @@ void ProtobufWriter::SimpleWriter::endNestedMessage(UInt32 field_number, bool is
     num_bytes_skipped += num_bytes_skipped_at_start - num_bytes_inserted;
 }
 
-void ProtobufWriter::SimpleWriter::writeUInt(UInt32 field_number, UInt64 value)
+void ProtobufWriter::writeUInt(int field_number, UInt64 value)
 {
+    if (in_repeated_pack)
+    {
+        writeVarint(value, buffer);
+        return;
+    }
     size_t old_size = buffer.size();
     buffer.reserve(old_size + 2 * MAX_VARINT_SIZE);
     UInt8 * ptr = buffer.data() + old_size;
@@ -213,20 +158,27 @@ void ProtobufWriter::SimpleWriter::writeUInt(UInt32 field_number, UInt64 value)
     buffer.resize_assume_reserved(ptr - buffer.data());
 }
 
-void ProtobufWriter::SimpleWriter::writeInt(UInt32 field_number, Int64 value)
+void ProtobufWriter::writeInt(int field_number, Int64 value)
 {
     writeUInt(field_number, static_cast<UInt64>(value));
 }
 
-void ProtobufWriter::SimpleWriter::writeSInt(UInt32 field_number, Int64 value)
+void ProtobufWriter::writeSInt(int field_number, Int64 value)
 {
     writeUInt(field_number, encodeZigZag(value));
 }
 
 template <typename T>
-void ProtobufWriter::SimpleWriter::writeFixed(UInt32 field_number, T value)
+void ProtobufWriter::writeFixed(int field_number, T value)
 {
     static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
+    if (in_repeated_pack)
+    {
+        size_t old_size = buffer.size();
+        buffer.resize(old_size + sizeof(T));
+        memcpy(buffer.data() + old_size, &value, sizeof(T));
+        return;
+    }
     constexpr WireType wire_type = (sizeof(T) == 4) ? BITS32 : BITS64;
     size_t old_size = buffer.size();
     buffer.reserve(old_size + MAX_VARINT_SIZE + sizeof(T));
@@ -237,19 +189,27 @@ void ProtobufWriter::SimpleWriter::writeFixed(UInt32 field_number, T value)
     buffer.resize_assume_reserved(ptr - buffer.data());
 }
 
-void ProtobufWriter::SimpleWriter::writeString(UInt32 field_number, const StringRef & str)
+template void ProtobufWriter::writeFixed<Int32>(int field_number, Int32 value);
+template void ProtobufWriter::writeFixed<UInt32>(int field_number, UInt32 value);
+template void ProtobufWriter::writeFixed<Int64>(int field_number, Int64 value);
+template void ProtobufWriter::writeFixed<UInt64>(int field_number, UInt64 value);
+template void ProtobufWriter::writeFixed<Float32>(int field_number, Float32 value);
+template void ProtobufWriter::writeFixed<Float64>(int field_number, Float64 value);
+
+void ProtobufWriter::writeString(int field_number, const std::string_view & str)
 {
+    size_t length = str.length();
     size_t old_size = buffer.size();
-    buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + str.size);
+    buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + length);
     UInt8 * ptr = buffer.data() + old_size;
     ptr = writeFieldNumber(field_number, LENGTH_DELIMITED, ptr);
-    ptr = writeVarint(str.size, ptr);
-    memcpy(ptr, str.data, str.size);
-    ptr += str.size;
+    ptr = writeVarint(length, ptr);
+    memcpy(ptr, str.data(), length);
+    ptr += length;
     buffer.resize_assume_reserved(ptr - buffer.data());
 }
 
-void ProtobufWriter::SimpleWriter::startRepeatedPack()
+void ProtobufWriter::startRepeatedPack()
 {
     pieces.emplace_back(current_piece_start, buffer.size());
 
@@ -259,17 +219,19 @@ void ProtobufWriter::SimpleWriter::startRepeatedPack()
     current_piece_start = buffer.size() + REPEATED_PACK_PADDING;
     buffer.resize(current_piece_start);
     num_bytes_skipped += REPEATED_PACK_PADDING;
+    in_repeated_pack = true;
 }
 
-void ProtobufWriter::SimpleWriter::endRepeatedPack(UInt32 field_number)
+void ProtobufWriter::endRepeatedPack(int field_number, bool skip_if_empty)
 {
     size_t size = buffer.size() - current_piece_start;
-    if (!size)
+    if (!size && skip_if_empty)
     {
         current_piece_start = pieces.back().start;
         buffer.resize(pieces.back().end);
         pieces.pop_back();
         num_bytes_skipped -= REPEATED_PACK_PADDING;
+        in_repeated_pack = false;
         return;
     }
     UInt8 * ptr = &buffer[pieces.back().end];
@@ -278,726 +240,7 @@ void ProtobufWriter::SimpleWriter::endRepeatedPack(UInt32 field_number)
     size_t num_bytes_inserted = endptr - ptr;
     pieces.back().end += num_bytes_inserted;
     num_bytes_skipped -= num_bytes_inserted;
-}
-
-void ProtobufWriter::SimpleWriter::addUIntToRepeatedPack(UInt64 value)
-{
-    writeVarint(value, buffer);
-}
-
-void ProtobufWriter::SimpleWriter::addIntToRepeatedPack(Int64 value)
-{
-    writeVarint(static_cast<UInt64>(value), buffer);
-}
-
-void ProtobufWriter::SimpleWriter::addSIntToRepeatedPack(Int64 value)
-{
-    writeVarint(encodeZigZag(value), buffer);
-}
-
-template <typename T>
-void ProtobufWriter::SimpleWriter::addFixedToRepeatedPack(T value)
-{
-    static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
-    size_t old_size = buffer.size();
-    buffer.resize(old_size + sizeof(T));
-    memcpy(buffer.data() + old_size, &value, sizeof(T));
-}
-
-
-// Implementation for a converter from any DB data type to any protobuf field type.
-class ProtobufWriter::ConverterBaseImpl : public IConverter
-{
-public:
-    ConverterBaseImpl(SimpleWriter & simple_writer_, const google::protobuf::FieldDescriptor * field_)
-        : simple_writer(simple_writer_), field(field_)
-    {
-        field_number = field->number();
-    }
-
-    virtual void writeString(const StringRef &) override { cannotConvertType("String"); }
-    virtual void writeInt8(Int8) override { cannotConvertType("Int8"); }
-    virtual void writeUInt8(UInt8) override { cannotConvertType("UInt8"); }
-    virtual void writeInt16(Int16) override { cannotConvertType("Int16"); }
-    virtual void writeUInt16(UInt16) override { cannotConvertType("UInt16"); }
-    virtual void writeInt32(Int32) override { cannotConvertType("Int32"); }
-    virtual void writeUInt32(UInt32) override { cannotConvertType("UInt32"); }
-    virtual void writeInt64(Int64) override { cannotConvertType("Int64"); }
-    virtual void writeUInt64(UInt64) override { cannotConvertType("UInt64"); }
-    virtual void writeInt128(Int128) override { cannotConvertType("Int128"); }
-    virtual void writeUInt128(const UInt128 &) override { cannotConvertType("UInt128"); }
-    virtual void writeInt256(const Int256 &) override { cannotConvertType("Int256"); }
-    virtual void writeUInt256(const UInt256 &) override { cannotConvertType("UInt256"); }
-    virtual void writeFloat32(Float32) override { cannotConvertType("Float32"); }
-    virtual void writeFloat64(Float64) override { cannotConvertType("Float64"); }
-    virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) override {}
-    virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) override {}
-    virtual void writeEnum8(Int8) override { cannotConvertType("Enum"); }
-    virtual void writeEnum16(Int16) override { cannotConvertType("Enum"); }
-    virtual void writeUUID(const UUID &) override { cannotConvertType("UUID"); }
-    virtual void writeDate(DayNum) override { cannotConvertType("Date"); }
-    virtual void writeDateTime(time_t) override { cannotConvertType("DateTime"); }
-    virtual void writeDateTime64(DateTime64, UInt32) override { cannotConvertType("DateTime64"); }
-    virtual void writeDecimal32(Decimal32, UInt32) override { cannotConvertType("Decimal32"); }
-    virtual void writeDecimal64(Decimal64, UInt32) override { cannotConvertType("Decimal64"); }
-    virtual void writeDecimal128(const Decimal128 &, UInt32) override { cannotConvertType("Decimal128"); }
-    virtual void writeDecimal256(const Decimal256 &, UInt32) override { cannotConvertType("Decimal256"); }
-
-    virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) override { cannotConvertType("AggregateFunction"); }
-
-protected:
-    [[noreturn]] void cannotConvertType(const String & type_name)
-    {
-        throw Exception(
-            "Could not convert data type '" + type_name + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
-            ErrorCodes::PROTOBUF_BAD_CAST);
-    }
-
-    [[noreturn]] void cannotConvertValue(const String & value)
-    {
-        throw Exception(
-            "Could not convert value '" + value + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
-            ErrorCodes::PROTOBUF_BAD_CAST);
-    }
-
-    template <typename To, typename From>
-    To numericCast(From value)
-    {
-        if constexpr (std::is_same_v<To, From>)
-            return value;
-        To result;
-        try
-        {
-            result = boost::numeric_cast<To>(value);
-        }
-        catch (boost::numeric::bad_numeric_cast &)
-        {
-            cannotConvertValue(toString(value));
-        }
-        return result;
-    }
-
-    template <typename To>
-    To parseFromString(const StringRef & str)
-    {
-        To result;
-        try
-        {
-            result = ::DB::parse<To>(str.data, str.size);
-        }
-        catch (...)
-        {
-            cannotConvertValue(str.toString());
-        }
-        return result;
-    }
-
-    SimpleWriter & simple_writer;
-    const google::protobuf::FieldDescriptor * field;
-    UInt32 field_number;
-};
-
-
-template <bool skip_null_value>
-class ProtobufWriter::ConverterToString : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override { writeField(str); }
-
-    void writeInt8(Int8 value) override { convertToStringAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { convertToStringAndWriteField(value); }
-    void writeInt16(Int16 value) override { convertToStringAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { convertToStringAndWriteField(value); }
-    void writeInt32(Int32 value) override { convertToStringAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { convertToStringAndWriteField(value); }
-    void writeInt64(Int64 value) override { convertToStringAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { convertToStringAndWriteField(value); }
-    void writeFloat32(Float32 value) override { convertToStringAndWriteField(value); }
-    void writeFloat64(Float64 value) override { convertToStringAndWriteField(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareEnumValueToNameMap(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareEnumValueToNameMap(name_value_pairs);
-    }
-
-    void writeEnum8(Int8 value) override { writeEnum16(value); }
-
-    void writeEnum16(Int16 value) override
-    {
-        auto it = enum_value_to_name_map->find(value);
-        if (it == enum_value_to_name_map->end())
-            cannotConvertValue(toString(value));
-        writeField(it->second);
-    }
-
-    void writeUUID(const UUID & uuid) override { convertToStringAndWriteField(uuid); }
-    void writeDate(DayNum date) override { convertToStringAndWriteField(date); }
-
-    void writeDateTime(time_t tm) override
-    {
-        writeDateTimeText(tm, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    void writeDateTime64(DateTime64 date_time, UInt32 scale) override
-    {
-        writeDateTimeText(date_time, scale, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    void writeDecimal32(Decimal32 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal64(Decimal64 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal128(const Decimal128 & decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-
-    void writeAggregateFunction(const AggregateFunctionPtr & function, ConstAggregateDataPtr place) override
-    {
-        function->serialize(place, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-private:
-    template <typename T>
-    void convertToStringAndWriteField(T value)
-    {
-        writeText(value, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    template <typename T>
-    void writeDecimal(const Decimal<T> & decimal, UInt32 scale)
-    {
-        writeText(decimal, scale, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    template <typename T>
-    void prepareEnumValueToNameMap(const std::vector<std::pair<String, T>> & name_value_pairs)
-    {
-        if (enum_value_to_name_map.has_value())
-            return;
-        enum_value_to_name_map.emplace();
-        for (const auto & name_value_pair : name_value_pairs)
-            enum_value_to_name_map->emplace(name_value_pair.second, name_value_pair.first);
-    }
-
-    void writeField(const StringRef & str)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (!str.size)
-                return;
-        }
-        simple_writer.writeString(field_number, str);
-    }
-
-    WriteBufferFromOwnString text_buffer;
-    std::optional<std::unordered_map<Int16, String>> enum_value_to_name_map;
-};
-
-#    define PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(field_type_id) \
-        template <> \
-        std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<field_type_id>( \
-            const google::protobuf::FieldDescriptor * field) \
-        { \
-            if (shouldSkipNullValue(field)) \
-                return std::make_unique<ConverterToString<true>>(simple_writer, field); \
-            else \
-                return std::make_unique<ConverterToString<false>>(simple_writer, field); \
-        }
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_STRING)
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_BYTES)
-#    undef PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS
-
-
-template <int field_type_id, typename ToType, bool skip_null_value, bool pack_repeated>
-class ProtobufWriter::ConverterToNumber : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override { writeField(parseFromString<ToType>(str)); }
-
-    void writeInt8(Int8 value) override { castNumericAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { castNumericAndWriteField(value); }
-    void writeInt16(Int16 value) override { castNumericAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { castNumericAndWriteField(value); }
-    void writeInt32(Int32 value) override { castNumericAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { castNumericAndWriteField(value); }
-    void writeInt64(Int64 value) override { castNumericAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { castNumericAndWriteField(value); }
-    void writeFloat32(Float32 value) override { castNumericAndWriteField(value); }
-    void writeFloat64(Float64 value) override { castNumericAndWriteField(value); }
-
-    void writeEnum8(Int8 value) override { writeEnum16(value); }
-
-    void writeEnum16(Int16 value) override
-    {
-        if constexpr (!is_integer_v<ToType>)
-            cannotConvertType("Enum"); // It's not correct to convert enum to floating point.
-        castNumericAndWriteField(value);
-    }
-
-    void writeDate(DayNum date) override { castNumericAndWriteField(static_cast<UInt16>(date)); }
-    void writeDateTime(time_t tm) override { castNumericAndWriteField(tm); }
-    void writeDateTime64(DateTime64 date_time, UInt32 scale) override { writeDecimal(date_time, scale); }
-    void writeDecimal32(Decimal32 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal64(Decimal64 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal128(const Decimal128 & decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-
-private:
-    template <typename FromType>
-    void castNumericAndWriteField(FromType value)
-    {
-        writeField(numericCast<ToType>(value));
-    }
-
-    template <typename S>
-    void writeDecimal(const Decimal<S> & decimal, UInt32 scale)
-    {
-        castNumericAndWriteField(DecimalUtils::convertTo<ToType>(decimal, scale));
-    }
-
-    void writeField(ToType value)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (value == 0)
-                return;
-        }
-        if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT32) && std::is_same_v<ToType, Int32>)
-                   || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT64) && std::is_same_v<ToType, Int64>))
-        {
-            if constexpr (pack_repeated)
-                simple_writer.addIntToRepeatedPack(value);
-            else
-                simple_writer.writeInt(field_number, value);
-        }
-        else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT32) && std::is_same_v<ToType, Int32>)
-                        || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT64) && std::is_same_v<ToType, Int64>))
-        {
-            if constexpr (pack_repeated)
-                simple_writer.addSIntToRepeatedPack(value);
-            else
-                simple_writer.writeSInt(field_number, value);
-        }
-        else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT32) && std::is_same_v<ToType, UInt32>)
-                        || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT64) && std::is_same_v<ToType, UInt64>))
-        {
-            if constexpr (pack_repeated)
-                simple_writer.addUIntToRepeatedPack(value);
-            else
-                simple_writer.writeUInt(field_number, value);
-        }
-        else
-        {
-            static_assert(((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED32) && std::is_same_v<ToType, UInt32>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED32) && std::is_same_v<ToType, Int32>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED64) && std::is_same_v<ToType, UInt64>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED64) && std::is_same_v<ToType, Int64>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FLOAT) && std::is_same_v<ToType, float>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_DOUBLE) && std::is_same_v<ToType, double>));
-            if constexpr (pack_repeated)
-                simple_writer.addFixedToRepeatedPack(value);
-            else
-                simple_writer.writeFixed(field_number, value);
-        }
-    }
-};
-
-#    define PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(field_type_id, field_type) \
-        template <> \
-        std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<field_type_id>( \
-            const google::protobuf::FieldDescriptor * field) \
-        { \
-            if (shouldSkipNullValue(field)) \
-                return std::make_unique<ConverterToNumber<field_type_id, field_type, true, false>>(simple_writer, field); \
-            else if (shouldPackRepeated(field)) \
-                return std::make_unique<ConverterToNumber<field_type_id, field_type, false, true>>(simple_writer, field); \
-            else \
-                return std::make_unique<ConverterToNumber<field_type_id, field_type, false, false>>(simple_writer, field); \
-        }
-
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT32, Int32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT32, Int32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT32, UInt32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT64, Int64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT64, Int64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT64, UInt64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED32, UInt32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED32, Int32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED64, UInt64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED64, Int64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FLOAT, float);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_DOUBLE, double);
-#    undef PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS
-
-
-template <bool skip_null_value, bool pack_repeated>
-class ProtobufWriter::ConverterToBool : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override
-    {
-        if (str == "true")
-            writeField(true);
-        else if (str == "false")
-            writeField(false);
-        else
-            cannotConvertValue(str.toString());
-    }
-
-    void writeInt8(Int8 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { convertToBoolAndWriteField(value); }
-    void writeInt16(Int16 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { convertToBoolAndWriteField(value); }
-    void writeInt32(Int32 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { convertToBoolAndWriteField(value); }
-    void writeInt64(Int64 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { convertToBoolAndWriteField(value); }
-    void writeFloat32(Float32 value) override { convertToBoolAndWriteField(value); }
-    void writeFloat64(Float64 value) override { convertToBoolAndWriteField(value); }
-    void writeDecimal32(Decimal32 decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
-    void writeDecimal64(Decimal64 decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
-    void writeDecimal128(const Decimal128 & decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
-
-private:
-    template <typename T>
-    void convertToBoolAndWriteField(T value)
-    {
-        writeField(static_cast<bool>(value));
-    }
-
-    void writeField(bool b)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (!b)
-                return;
-        }
-        if constexpr (pack_repeated)
-            simple_writer.addUIntToRepeatedPack(b);
-        else
-            simple_writer.writeUInt(field_number, b);
-    }
-};
-
-template <>
-std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<google::protobuf::FieldDescriptor::TYPE_BOOL>(
-    const google::protobuf::FieldDescriptor * field)
-{
-    if (shouldSkipNullValue(field))
-        return std::make_unique<ConverterToBool<true, false>>(simple_writer, field);
-    else if (shouldPackRepeated(field))
-        return std::make_unique<ConverterToBool<false, true>>(simple_writer, field);
-    else
-        return std::make_unique<ConverterToBool<false, false>>(simple_writer, field);
-}
-
-
-template <bool skip_null_value, bool pack_repeated>
-class ProtobufWriter::ConverterToEnum : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override
-    {
-        prepareEnumNameToPbNumberMap();
-        auto it = enum_name_to_pbnumber_map->find(str);
-        if (it == enum_name_to_pbnumber_map->end())
-            cannotConvertValue(str.toString());
-        writeField(it->second);
-    }
-
-    void writeInt8(Int8 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { convertToEnumAndWriteField(value); }
-    void writeInt16(Int16 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { convertToEnumAndWriteField(value); }
-    void writeInt32(Int32 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { convertToEnumAndWriteField(value); }
-    void writeInt64(Int64 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { convertToEnumAndWriteField(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareEnumValueToPbNumberMap(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareEnumValueToPbNumberMap(name_value_pairs);
-    }
-
-    void writeEnum8(Int8 value) override { writeEnum16(value); }
-
-    void writeEnum16(Int16 value) override
-    {
-        int pbnumber;
-        if (enum_value_always_equals_pbnumber)
-            pbnumber = value;
-        else
-        {
-            auto it = enum_value_to_pbnumber_map->find(value);
-            if (it == enum_value_to_pbnumber_map->end())
-                cannotConvertValue(toString(value));
-            pbnumber = it->second;
-        }
-        writeField(pbnumber);
-    }
-
-private:
-    template <typename T>
-    void convertToEnumAndWriteField(T value)
-    {
-        const auto * enum_descriptor = field->enum_type()->FindValueByNumber(numericCast<int>(value));
-        if (!enum_descriptor)
-            cannotConvertValue(toString(value));
-        writeField(enum_descriptor->number());
-    }
-
-    void prepareEnumNameToPbNumberMap()
-    {
-        if (enum_name_to_pbnumber_map.has_value())
-            return;
-        enum_name_to_pbnumber_map.emplace();
-        const auto * enum_type = field->enum_type();
-        for (int i = 0; i != enum_type->value_count(); ++i)
-        {
-            const auto * enum_value = enum_type->value(i);
-            enum_name_to_pbnumber_map->emplace(enum_value->name(), enum_value->number());
-        }
-    }
-
-    template <typename T>
-    void prepareEnumValueToPbNumberMap(const std::vector<std::pair<String, T>> & name_value_pairs)
-    {
-        if (enum_value_to_pbnumber_map.has_value())
-            return;
-        enum_value_to_pbnumber_map.emplace();
-        enum_value_always_equals_pbnumber = true;
-        for (const auto & name_value_pair : name_value_pairs)
-        {
-            Int16 value = name_value_pair.second; // NOLINT
-            const auto * enum_descriptor = field->enum_type()->FindValueByName(name_value_pair.first);
-            if (enum_descriptor)
-            {
-                enum_value_to_pbnumber_map->emplace(value, enum_descriptor->number());
-                if (value != enum_descriptor->number())
-                    enum_value_always_equals_pbnumber = false;
-            }
-            else
-                enum_value_always_equals_pbnumber = false;
-        }
-    }
-
-    void writeField(int enum_pbnumber)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (!enum_pbnumber)
-                return;
-        }
-        if constexpr (pack_repeated)
-            simple_writer.addUIntToRepeatedPack(enum_pbnumber);
-        else
-            simple_writer.writeUInt(field_number, enum_pbnumber);
-    }
-
-    std::optional<std::unordered_map<StringRef, int>> enum_name_to_pbnumber_map;
-    std::optional<std::unordered_map<Int16, int>> enum_value_to_pbnumber_map;
-    bool enum_value_always_equals_pbnumber;
-};
-
-template <>
-std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<google::protobuf::FieldDescriptor::TYPE_ENUM>(
-    const google::protobuf::FieldDescriptor * field)
-{
-    if (shouldSkipNullValue(field))
-        return std::make_unique<ConverterToEnum<true, false>>(simple_writer, field);
-    else if (shouldPackRepeated(field))
-        return std::make_unique<ConverterToEnum<false, true>>(simple_writer, field);
-    else
-        return std::make_unique<ConverterToEnum<false, false>>(simple_writer, field);
-}
-
-
-ProtobufWriter::ProtobufWriter(
-    WriteBuffer & out, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_)
-    : simple_writer(out, use_length_delimiters_)
-{
-    std::vector<const google::protobuf::FieldDescriptor *> field_descriptors_without_match;
-    root_message = ProtobufColumnMatcher::matchColumns<ColumnMatcherTraits>(column_names, message_type, field_descriptors_without_match);
-    for (const auto * field_descriptor_without_match : field_descriptors_without_match)
-    {
-        if (field_descriptor_without_match->is_required())
-            throw Exception(
-                "Output doesn't have a column named '" + field_descriptor_without_match->name()
-                    + "' which is required to write the output in the protobuf format.",
-                ErrorCodes::NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD);
-    }
-    setTraitsDataAfterMatchingColumns(root_message.get());
-}
-
-ProtobufWriter::~ProtobufWriter() = default;
-
-void ProtobufWriter::setTraitsDataAfterMatchingColumns(Message * message)
-{
-    Field * parent_field = message->parent ? &message->parent->fields[message->index_in_parent] : nullptr;
-    message->data.parent_field_number = parent_field ? parent_field->field_number : 0;
-    message->data.is_required = parent_field && parent_field->data.is_required;
-
-    if (parent_field && parent_field->data.is_repeatable)
-        message->data.repeatable_container_message = message;
-    else if (message->parent)
-        message->data.repeatable_container_message = message->parent->data.repeatable_container_message;
-    else
-        message->data.repeatable_container_message = nullptr;
-
-    message->data.is_group = parent_field && (parent_field->field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_GROUP);
-
-    for (auto & field : message->fields)
-    {
-        field.data.is_repeatable = field.field_descriptor->is_repeated();
-        field.data.is_required = field.field_descriptor->is_required();
-        field.data.repeatable_container_message = message->data.repeatable_container_message;
-        field.data.should_pack_repeated = shouldPackRepeated(field.field_descriptor);
-
-        if (field.nested_message)
-        {
-            setTraitsDataAfterMatchingColumns(field.nested_message.get());
-            continue;
-        }
-        switch (field.field_descriptor->type())
-        {
-#    define PROTOBUF_WRITER_CONVERTER_CREATING_CASE(field_type_id) \
-        case field_type_id: \
-            field.data.converter = createConverter<field_type_id>(field.field_descriptor); \
-            break
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_STRING);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BYTES);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FLOAT);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_DOUBLE);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BOOL);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_ENUM);
-#    undef PROTOBUF_WRITER_CONVERTER_CREATING_CASE
-            default:
-                throw Exception(
-                    String("Protobuf type '") + field.field_descriptor->type_name() + "' isn't supported", ErrorCodes::NOT_IMPLEMENTED);
-        }
-    }
-}
-
-void ProtobufWriter::startMessage()
-{
-    current_message = root_message.get();
-    current_field_index = 0;
-    simple_writer.startMessage();
-}
-
-void ProtobufWriter::endMessage()
-{
-    if (!current_message)
-        return;
-    endWritingField();
-    while (current_message->parent)
-    {
-        simple_writer.endNestedMessage(
-            current_message->data.parent_field_number, current_message->data.is_group, !current_message->data.is_required);
-        current_message = current_message->parent;
-    }
-    simple_writer.endMessage();
-    current_message = nullptr;
-}
-
-bool ProtobufWriter::writeField(size_t & column_index)
-{
-    endWritingField();
-    while (true)
-    {
-        if (current_field_index < current_message->fields.size())
-        {
-            Field & field = current_message->fields[current_field_index];
-            if (!field.nested_message)
-            {
-                current_field = &current_message->fields[current_field_index];
-                current_converter = current_field->data.converter.get();
-                column_index = current_field->column_index;
-                if (current_field->data.should_pack_repeated)
-                    simple_writer.startRepeatedPack();
-                return true;
-            }
-            simple_writer.startNestedMessage();
-            current_message = field.nested_message.get();
-            current_message->data.need_repeat = false;
-            current_field_index = 0;
-            continue;
-        }
-        if (current_message->parent)
-        {
-            simple_writer.endNestedMessage(
-                current_message->data.parent_field_number, current_message->data.is_group, !current_message->data.is_required);
-            if (current_message->data.need_repeat)
-            {
-                simple_writer.startNestedMessage();
-                current_message->data.need_repeat = false;
-                current_field_index = 0;
-                continue;
-            }
-            current_field_index = current_message->index_in_parent + 1;
-            current_message = current_message->parent;
-            continue;
-        }
-        return false;
-    }
-}
-
-void ProtobufWriter::endWritingField()
-{
-    if (!current_field)
-        return;
-    if (current_field->data.should_pack_repeated)
-        simple_writer.endRepeatedPack(current_field->field_number);
-    else if ((num_values == 0) && current_field->data.is_required)
-        throw Exception(
-            "No data for the required field '" + current_field->field_descriptor->name() + "'",
-            ErrorCodes::NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD);
-
-    current_field = nullptr;
-    current_converter = nullptr;
-    num_values = 0;
-    ++current_field_index;
-}
-
-void ProtobufWriter::setNestedMessageNeedsRepeat()
-{
-    if (current_field->data.repeatable_container_message)
-        current_field->data.repeatable_container_message->data.need_repeat = true;
-    else
-        throw Exception(
-            "Cannot write more than single value to the non-repeated field '" + current_field->field_descriptor->name() + "'",
-            ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
+    in_repeated_pack = false;
 }
 
 }
diff --git a/src/Formats/ProtobufWriter.h b/src/Formats/ProtobufWriter.h
index 44d3aac221e..6af1a237fbd 100644
--- a/src/Formats/ProtobufWriter.h
+++ b/src/Formats/ProtobufWriter.h
@@ -1,291 +1,68 @@
 #pragma once
 
-#include <Core/UUID.h>
-#include <Common/UInt128.h>
-#include <common/DayNum.h>
-#include <memory>
-
 #if !defined(ARCADIA_BUILD)
 #    include "config_formats.h"
 #endif
 
 #if USE_PROTOBUF
-#    include <IO/WriteBufferFromString.h>
-#    include <boost/noncopyable.hpp>
-#    include <Common/PODArray.h>
-#    include "ProtobufColumnMatcher.h"
-
-
-namespace google
-{
-namespace protobuf
-{
-    class Descriptor;
-    class FieldDescriptor;
-}
-}
-
-namespace DB
-{
-class IAggregateFunction;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
-using ConstAggregateDataPtr = const char *;
-
-
-/** Serializes a protobuf, tries to cast types if necessarily.
-  */
-class ProtobufWriter : private boost::noncopyable
-{
-public:
-    ProtobufWriter(WriteBuffer & out, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_);
-    ~ProtobufWriter();
-
-    /// Should be called at the beginning of writing a message.
-    void startMessage();
-
-    /// Should be called at the end of writing a message.
-    void endMessage();
-
-    /// Prepares for writing values of a field.
-    /// Returns true and sets 'column_index' to the corresponding column's index.
-    /// Returns false if there are no more fields to write in the message type (call endMessage() in this case).
-    bool writeField(size_t & column_index);
-
-    /// Writes a value. This function should be called one or multiple times after writeField().
-    /// Returns false if there are no more place for the values in the protobuf's field.
-    /// This can happen if the protobuf's field is not declared as repeated in the protobuf schema.
-    bool writeNumber(Int8 value) { return writeValueIfPossible(&IConverter::writeInt8, value); }
-    bool writeNumber(UInt8 value) { return writeValueIfPossible(&IConverter::writeUInt8, value); }
-    bool writeNumber(Int16 value) { return writeValueIfPossible(&IConverter::writeInt16, value); }
-    bool writeNumber(UInt16 value) { return writeValueIfPossible(&IConverter::writeUInt16, value); }
-    bool writeNumber(Int32 value) { return writeValueIfPossible(&IConverter::writeInt32, value); }
-    bool writeNumber(UInt32 value) { return writeValueIfPossible(&IConverter::writeUInt32, value); }
-    bool writeNumber(Int64 value) { return writeValueIfPossible(&IConverter::writeInt64, value); }
-    bool writeNumber(UInt64 value) { return writeValueIfPossible(&IConverter::writeUInt64, value); }
-    bool writeNumber(Int128 value) { return writeValueIfPossible(&IConverter::writeInt128, value); }
-    bool writeNumber(UInt128 value) { return writeValueIfPossible(&IConverter::writeUInt128, value); }
-
-    bool writeNumber(Int256 value) { return writeValueIfPossible(&IConverter::writeInt256, value); }
-    bool writeNumber(UInt256 value) { return writeValueIfPossible(&IConverter::writeUInt256, value); }
-
-    bool writeNumber(Float32 value) { return writeValueIfPossible(&IConverter::writeFloat32, value); }
-    bool writeNumber(Float64 value) { return writeValueIfPossible(&IConverter::writeFloat64, value); }
-    bool writeString(const StringRef & str) { return writeValueIfPossible(&IConverter::writeString, str); }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & enum_values) { current_converter->prepareEnumMapping8(enum_values); }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & enum_values) { current_converter->prepareEnumMapping16(enum_values); }
-    bool writeEnum(Int8 value) { return writeValueIfPossible(&IConverter::writeEnum8, value); }
-    bool writeEnum(Int16 value) { return writeValueIfPossible(&IConverter::writeEnum16, value); }
-    bool writeUUID(const UUID & uuid) { return writeValueIfPossible(&IConverter::writeUUID, uuid); }
-    bool writeDate(DayNum date) { return writeValueIfPossible(&IConverter::writeDate, date); }
-    bool writeDateTime(time_t tm) { return writeValueIfPossible(&IConverter::writeDateTime, tm); }
-    bool writeDateTime64(DateTime64 tm, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDateTime64, tm, scale); }
-    bool writeDecimal(Decimal32 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal32, decimal, scale); }
-    bool writeDecimal(Decimal64 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal64, decimal, scale); }
-    bool writeDecimal(const Decimal128 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal128, decimal, scale); }
-    bool writeDecimal(const Decimal256 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal256, decimal, scale); }
-    bool writeAggregateFunction(const AggregateFunctionPtr & function, ConstAggregateDataPtr place) { return writeValueIfPossible(&IConverter::writeAggregateFunction, function, place); }
-
-private:
-    class SimpleWriter
-    {
-    public:
-        SimpleWriter(WriteBuffer & out_, const bool use_length_delimiters_);
-        ~SimpleWriter();
-
-        void startMessage();
-        void endMessage();
-
-        void startNestedMessage();
-        void endNestedMessage(UInt32 field_number, bool is_group, bool skip_if_empty);
-
-        void writeInt(UInt32 field_number, Int64 value);
-        void writeUInt(UInt32 field_number, UInt64 value);
-        void writeSInt(UInt32 field_number, Int64 value);
-        template <typename T>
-        void writeFixed(UInt32 field_number, T value);
-        void writeString(UInt32 field_number, const StringRef & str);
-
-        void startRepeatedPack();
-        void addIntToRepeatedPack(Int64 value);
-        void addUIntToRepeatedPack(UInt64 value);
-        void addSIntToRepeatedPack(Int64 value);
-        template <typename T>
-        void addFixedToRepeatedPack(T value);
-        void endRepeatedPack(UInt32 field_number);
-
-    private:
-        struct Piece
-        {
-            size_t start;
-            size_t end;
-            Piece(size_t start_, size_t end_) : start(start_), end(end_) {}
-            Piece() = default;
-        };
-
-        struct NestedInfo
-        {
-            size_t num_pieces_at_start;
-            size_t num_bytes_skipped_at_start;
-            NestedInfo(size_t num_pieces_at_start_, size_t num_bytes_skipped_at_start_)
-                : num_pieces_at_start(num_pieces_at_start_), num_bytes_skipped_at_start(num_bytes_skipped_at_start_)
-            {
-            }
-        };
-
-        WriteBuffer & out;
-        PODArray<UInt8> buffer;
-        std::vector<Piece> pieces;
-        size_t current_piece_start;
-        size_t num_bytes_skipped;
-        std::vector<NestedInfo> nested_infos;
-        const bool use_length_delimiters;
-    };
-
-    class IConverter
-    {
-    public:
-        virtual ~IConverter() = default;
-        virtual void writeString(const StringRef &) = 0;
-        virtual void writeInt8(Int8) = 0;
-        virtual void writeUInt8(UInt8) = 0;
-        virtual void writeInt16(Int16) = 0;
-        virtual void writeUInt16(UInt16) = 0;
-        virtual void writeInt32(Int32) = 0;
-        virtual void writeUInt32(UInt32) = 0;
-        virtual void writeInt64(Int64) = 0;
-        virtual void writeUInt64(UInt64) = 0;
-        virtual void writeInt128(Int128) = 0;
-        virtual void writeUInt128(const UInt128 &) = 0;
-
-        virtual void writeInt256(const Int256 &) = 0;
-        virtual void writeUInt256(const UInt256 &) = 0;
-
-        virtual void writeFloat32(Float32) = 0;
-        virtual void writeFloat64(Float64) = 0;
-        virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) = 0;
-        virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) = 0;
-        virtual void writeEnum8(Int8) = 0;
-        virtual void writeEnum16(Int16) = 0;
-        virtual void writeUUID(const UUID &) = 0;
-        virtual void writeDate(DayNum) = 0;
-        virtual void writeDateTime(time_t) = 0;
-        virtual void writeDateTime64(DateTime64, UInt32 scale) = 0;
-        virtual void writeDecimal32(Decimal32, UInt32) = 0;
-        virtual void writeDecimal64(Decimal64, UInt32) = 0;
-        virtual void writeDecimal128(const Decimal128 &, UInt32) = 0;
-        virtual void writeDecimal256(const Decimal256 &, UInt32) = 0;
-        virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) = 0;
-    };
-
-    class ConverterBaseImpl;
-    template <bool skip_null_value>
-    class ConverterToString;
-    template <int field_type_id, typename ToType, bool skip_null_value, bool pack_repeated>
-    class ConverterToNumber;
-    template <bool skip_null_value, bool pack_repeated>
-    class ConverterToBool;
-    template <bool skip_null_value, bool pack_repeated>
-    class ConverterToEnum;
-
-    struct ColumnMatcherTraits
-    {
-        struct FieldData
-        {
-            std::unique_ptr<IConverter> converter;
-            bool is_required;
-            bool is_repeatable;
-            bool should_pack_repeated;
-            ProtobufColumnMatcher::Message<ColumnMatcherTraits> * repeatable_container_message;
-        };
-        struct MessageData
-        {
-            UInt32 parent_field_number;
-            bool is_group;
-            bool is_required;
-            ProtobufColumnMatcher::Message<ColumnMatcherTraits> * repeatable_container_message;
-            bool need_repeat;
-        };
-    };
-    using Message = ProtobufColumnMatcher::Message<ColumnMatcherTraits>;
-    using Field = ProtobufColumnMatcher::Field<ColumnMatcherTraits>;
-
-    void setTraitsDataAfterMatchingColumns(Message * message);
-
-    template <int field_type_id>
-    std::unique_ptr<IConverter> createConverter(const google::protobuf::FieldDescriptor * field);
-
-    template <typename... Params>
-    using WriteValueFunctionPtr = void (IConverter::*)(Params...);
-
-    template <typename... Params, typename... Args>
-    bool writeValueIfPossible(WriteValueFunctionPtr<Params...> func, Args &&... args)
-    {
-        if (num_values && !current_field->data.is_repeatable)
-        {
-            setNestedMessageNeedsRepeat();
-            return false;
-        }
-        (current_converter->*func)(std::forward<Args>(args)...);
-        ++num_values;
-        return true;
-    }
-
-    void setNestedMessageNeedsRepeat();
-    void endWritingField();
-
-    SimpleWriter simple_writer;
-    std::unique_ptr<Message> root_message;
-
-    Message * current_message;
-    size_t current_field_index = 0;
-    const Field * current_field = nullptr;
-    IConverter * current_converter = nullptr;
-    size_t num_values = 0;
-};
-
-}
-
-#else
-#    include <common/StringRef.h>
+#   include <Core/Types.h>
+#   include <Common/PODArray.h>
 
 
 namespace DB
 {
-class IAggregateFunction;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
-using ConstAggregateDataPtr = const char *;
+class WriteBuffer;
 
+/// Utility class for writing in the Protobuf format.
+/// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
 class ProtobufWriter
 {
 public:
-    bool writeNumber(Int8 /* value */) { return false; }
-    bool writeNumber(UInt8 /* value */) { return false; }
-    bool writeNumber(Int16 /* value */) { return false; }
-    bool writeNumber(UInt16 /* value */) { return false; }
-    bool writeNumber(Int32 /* value */) { return false; }
-    bool writeNumber(UInt32 /* value */) { return false; }
-    bool writeNumber(Int64 /* value */) { return false; }
-    bool writeNumber(UInt64 /* value */) { return false; }
-    bool writeNumber(Int128 /* value */) { return false; }
-    bool writeNumber(UInt128 /* value */) { return false; }
-    bool writeNumber(Int256 /* value */) { return false; }
-    bool writeNumber(UInt256 /* value */) { return false; }
-    bool writeNumber(Float32 /* value */) { return false; }
-    bool writeNumber(Float64 /* value */) { return false; }
-    bool writeString(const StringRef & /* value */) { return false; }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & /* name_value_pairs */) {}
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & /* name_value_pairs */) {}
-    bool writeEnum(Int8 /* value */) { return false; }
-    bool writeEnum(Int16 /* value */) { return false; }
-    bool writeUUID(const UUID & /* value */) { return false; }
-    bool writeDate(DayNum /* date */) { return false; }
-    bool writeDateTime(time_t /* tm */) { return false; }
-    bool writeDateTime64(DateTime64 /*tm*/, UInt32 /*scale*/) { return false; }
-    bool writeDecimal(Decimal32 /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeDecimal(Decimal64 /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeDecimal(const Decimal128 & /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeDecimal(const Decimal256 & /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeAggregateFunction(const AggregateFunctionPtr & /* function */, ConstAggregateDataPtr /* place */) { return false; }
+    ProtobufWriter(WriteBuffer & out_);
+    ~ProtobufWriter();
+
+    void startMessage();
+    void endMessage(bool with_length_delimiter);
+
+    void startNestedMessage();
+    void endNestedMessage(int field_number, bool is_group, bool skip_if_empty);
+
+    void writeInt(int field_number, Int64 value);
+    void writeUInt(int field_number, UInt64 value);
+    void writeSInt(int field_number, Int64 value);
+    template <typename T>
+    void writeFixed(int field_number, T value);
+    void writeString(int field_number, const std::string_view & str);
+
+    void startRepeatedPack();
+    void endRepeatedPack(int field_number, bool skip_if_empty);
+
+private:
+    struct Piece
+    {
+        size_t start;
+        size_t end;
+        Piece(size_t start_, size_t end_) : start(start_), end(end_) {}
+        Piece() = default;
+    };
+
+    struct NestedInfo
+    {
+        size_t num_pieces_at_start;
+        size_t num_bytes_skipped_at_start;
+        NestedInfo(size_t num_pieces_at_start_, size_t num_bytes_skipped_at_start_)
+            : num_pieces_at_start(num_pieces_at_start_), num_bytes_skipped_at_start(num_bytes_skipped_at_start_)
+        {
+        }
+    };
+
+    WriteBuffer & out;
+    PODArray<UInt8> buffer;
+    std::vector<Piece> pieces;
+    size_t current_piece_start = 0;
+    size_t num_bytes_skipped = 0;
+    std::vector<NestedInfo> nested_infos;
+    bool in_repeated_pack = false;
 };
 
 }
diff --git a/src/Formats/ya.make b/src/Formats/ya.make
index 6b72ec397d5..8fe938be125 100644
--- a/src/Formats/ya.make
+++ b/src/Formats/ya.make
@@ -20,9 +20,9 @@ SRCS(
     NativeFormat.cpp
     NullFormat.cpp
     ParsedTemplateFormatString.cpp
-    ProtobufColumnMatcher.cpp
     ProtobufReader.cpp
     ProtobufSchemas.cpp
+    ProtobufSerializer.cpp
     ProtobufWriter.cpp
     registerFormats.cpp
     verbosePrintString.cpp
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 321aa5e2196..1c3beb2e47d 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -117,3 +117,6 @@ target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_array)
 if (USE_STATS)
     target_link_libraries(clickhouse_functions PRIVATE stats)
 endif()
+
+# Signed integer overflow on user-provided data inside boost::geometry - ignore.
+set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-sanitize=signed-integer-overflow)
diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index 4ad99b528ea..333b397312d 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -407,6 +407,23 @@ struct ToHourImpl
     using FactorTransform = ToDateImpl;
 };
 
+struct TimezoneOffsetImpl
+{
+    static constexpr auto name = "timezoneOffset";
+
+    static inline time_t execute(UInt32 t, const DateLUTImpl & time_zone)
+    {
+        return time_zone.timezoneOffset(t);
+    }
+
+    static inline time_t execute(UInt16, const DateLUTImpl &)
+    {
+        return dateIsNotSupported(name);
+    }
+
+    using FactorTransform = ToTimeImpl;
+};
+
 struct ToMinuteImpl
 {
     static constexpr auto name = "toMinute";
@@ -687,7 +704,11 @@ struct DateTimeTransformImpl
     {
         using Op = Transformer<typename FromDataType::FieldType, typename ToDataType::FieldType, Transform>;
 
-        const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 1, 0);
+        size_t time_zone_argument_position = 1;
+        if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
+            time_zone_argument_position = 2;
+
+        const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_argument_position, 0);
 
         const ColumnPtr source_col = arguments[0].column;
         if (const auto * sources = checkAndGetColumn<typename FromDataType::ColumnType>(source_col.get()))
diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h
index 9f9cfc1e72c..2b4c07b1cff 100644
--- a/src/Functions/DivisionUtils.h
+++ b/src/Functions/DivisionUtils.h
@@ -3,12 +3,14 @@
 #include <cmath>
 #include <type_traits>
 #include <Common/Exception.h>
+#include <Common/NaNUtils.h>
 #include <DataTypes/NumberTraits.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config.h>
 #endif
 
+
 namespace DB
 {
 
@@ -87,7 +89,28 @@ struct DivideIntegralImpl
             return static_cast<Result>(checkedDivision(static_cast<SignedCastA>(a), static_cast<SignedCastB>(b)));
         }
         else
-            return static_cast<Result>(checkedDivision(CastA(a), CastB(b)));
+        {
+            /// Comparisons are not strict to avoid rounding issues when operand is implicitly casted to float.
+
+            if constexpr (std::is_floating_point_v<A>)
+                if (isNaN(a) || a >= std::numeric_limits<CastA>::max() || a <= std::numeric_limits<CastA>::lowest())
+                    throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
+                        ErrorCodes::ILLEGAL_DIVISION);
+
+            if constexpr (std::is_floating_point_v<B>)
+                if (isNaN(b) || b >= std::numeric_limits<CastB>::max() || b <= std::numeric_limits<CastB>::lowest())
+                    throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
+                        ErrorCodes::ILLEGAL_DIVISION);
+
+            auto res = checkedDivision(CastA(a), CastB(b));
+
+            if constexpr (std::is_floating_point_v<decltype(res)>)
+                if (isNaN(res) || res >= std::numeric_limits<Result>::max() || res <= std::numeric_limits<Result>::lowest())
+                    throw Exception("Cannot perform integer division, because it will produce infinite or too large number",
+                        ErrorCodes::ILLEGAL_DIVISION);
+
+            return static_cast<Result>(res);
+        }
     }
 
 #if USE_EMBEDDED_COMPILER
@@ -114,6 +137,16 @@ struct ModuloImpl
         }
         else
         {
+            if constexpr (std::is_floating_point_v<A>)
+                if (isNaN(a) || a > std::numeric_limits<IntegerAType>::max() || a < std::numeric_limits<IntegerAType>::lowest())
+                    throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
+                        ErrorCodes::ILLEGAL_DIVISION);
+
+            if constexpr (std::is_floating_point_v<B>)
+                if (isNaN(b) || b > std::numeric_limits<IntegerBType>::max() || b < std::numeric_limits<IntegerBType>::lowest())
+                    throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
+                        ErrorCodes::ILLEGAL_DIVISION);
+
             throwIfDivisionLeadsToFPE(IntegerAType(a), IntegerBType(b));
 
             if constexpr (is_big_int_v<IntegerAType> || is_big_int_v<IntegerBType>)
diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index c10ff581b0e..bb85ae32622 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -504,7 +504,7 @@ private:
 using namespace traits_;
 using namespace impl_;
 
-template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
+template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
 class FunctionBinaryArithmetic : public IFunction
 {
     static constexpr const bool is_plus = IsOperation<Op>::plus;
@@ -542,16 +542,54 @@ class FunctionBinaryArithmetic : public IFunction
         >(type, std::forward<F>(f));
     }
 
+    template <typename F>
+    static bool castTypeNoFloats(const IDataType * type, F && f)
+    {
+        return castTypeToEither<
+            DataTypeUInt8,
+            DataTypeUInt16,
+            DataTypeUInt32,
+            DataTypeUInt64,
+            DataTypeUInt256,
+            DataTypeInt8,
+            DataTypeInt16,
+            DataTypeInt32,
+            DataTypeInt64,
+            DataTypeInt128,
+            DataTypeInt256,
+            DataTypeDate,
+            DataTypeDateTime,
+            DataTypeDecimal<Decimal32>,
+            DataTypeDecimal<Decimal64>,
+            DataTypeDecimal<Decimal128>,
+            DataTypeDecimal<Decimal256>,
+            DataTypeFixedString
+        >(type, std::forward<F>(f));
+    }
+
     template <typename F>
     static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
     {
-        return castType(left, [&](const auto & left_)
+        if constexpr (valid_on_float_arguments)
         {
-            return castType(right, [&](const auto & right_)
+            return castType(left, [&](const auto & left_)
             {
-                return f(left_, right_);
+                return castType(right, [&](const auto & right_)
+                {
+                    return f(left_, right_);
+                });
             });
-        });
+        }
+        else
+        {
+            return castTypeNoFloats(left, [&](const auto & left_)
+            {
+                return castTypeNoFloats(right, [&](const auto & right_)
+                {
+                    return f(left_, right_);
+                });
+            });
+        }
     }
 
     static FunctionOverloadResolverPtr
@@ -856,9 +894,8 @@ class FunctionBinaryArithmetic : public IFunction
             const NativeResultType const_b = helperGetOrConvert<T1, ResultDataType>(col_right_const, right);
 
             const ResultType res = check_decimal_overflow
-                // the arguments are already scaled after conversion
-                ? OpImplCheck::template process<left_is_decimal, right_is_decimal>(const_a, const_b, 1, 1)
-                : OpImpl::template process<left_is_decimal, right_is_decimal>(const_a, const_b, 1, 1);
+                ? OpImplCheck::template process<left_is_decimal, right_is_decimal>(const_a, const_b, scale_a, scale_b)
+                : OpImpl::template process<left_is_decimal, right_is_decimal>(const_a, const_b, scale_a, scale_b);
 
             if constexpr (result_is_decimal)
                 return ResultDataType(type.getPrecision(), type.getScale()).createColumnConst(
@@ -1319,11 +1356,11 @@ public:
 };
 
 
-template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
-class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>
+template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
+class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>
 {
 public:
-    using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>;
+    using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>;
     using Monotonicity = typename Base::Monotonicity;
 
     static FunctionPtr create(
@@ -1488,7 +1525,7 @@ private:
     DataTypePtr return_type;
 };
 
-template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
+template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
 class BinaryArithmeticOverloadResolver : public IFunctionOverloadResolverImpl
 {
 public:
@@ -1512,14 +1549,14 @@ public:
                 || (arguments[1].column && isColumnConst(*arguments[1].column))))
         {
             return std::make_unique<DefaultFunction>(
-                FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments>::create(
+                FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(
                     arguments[0], arguments[1], return_type, context),
                 ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
                 return_type);
         }
 
         return std::make_unique<DefaultFunction>(
-            FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::create(context),
+            FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(context),
             ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
             return_type);
     }
@@ -1530,7 +1567,7 @@ public:
             throw Exception(
                 "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2",
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
-        return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::getReturnTypeImplStatic(arguments, context);
+        return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::getReturnTypeImplStatic(arguments, context);
     }
 
 private:
diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h
index b1d04fd60f0..5f964b899b4 100644
--- a/src/Functions/FunctionDateOrDateTimeAddInterval.h
+++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h
@@ -68,12 +68,12 @@ struct AddSecondsImpl : public AddOnDateTime64DefaultImpl<AddSecondsImpl>
 
     static constexpr auto name = "addSeconds";
 
-    static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
     {
         return t + delta;
     }
 
-    static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
     {
         return time_zone.fromDayNum(DayNum(d)) + delta;
     }
@@ -92,7 +92,7 @@ struct AddMinutesImpl : public AddOnDateTime64DefaultImpl<AddMinutesImpl>
         return t + delta * 60;
     }
 
-    static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
     {
         return time_zone.fromDayNum(DayNum(d)) + delta * 60;
     }
@@ -111,7 +111,7 @@ struct AddHoursImpl : public AddOnDateTime64DefaultImpl<AddHoursImpl>
         return t + delta * 3600;
     }
 
-    static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
     {
         return time_zone.fromDayNum(DayNum(d)) + delta * 3600;
     }
@@ -125,18 +125,12 @@ struct AddDaysImpl : public AddOnDateTime64DefaultImpl<AddDaysImpl>
 
     static constexpr auto name = "addDays";
 
-//    static inline UInt32 execute(UInt64 t, Int64 delta, const DateLUTImpl & time_zone)
-//    {
-//        // TODO (nemkov): LUT does not support out-of range date values for now.
-//        return time_zone.addDays(t, delta);
-//    }
-
     static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
     {
         return time_zone.addDays(t, delta);
     }
 
-    static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
     {
         return d + delta;
     }
@@ -155,7 +149,7 @@ struct AddWeeksImpl : public AddOnDateTime64DefaultImpl<AddWeeksImpl>
         return time_zone.addWeeks(t, delta);
     }
 
-    static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
     {
         return d + delta * 7;
     }
diff --git a/src/Functions/FunctionFactory.cpp b/src/Functions/FunctionFactory.cpp
index e98cb543df6..e13f310de09 100644
--- a/src/Functions/FunctionFactory.cpp
+++ b/src/Functions/FunctionFactory.cpp
@@ -21,6 +21,10 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+const String & getFunctionCanonicalNameIfAny(const String & name)
+{
+    return FunctionFactory::instance().getCanonicalNameIfAny(name);
+}
 
 void FunctionFactory::registerFunction(const
     std::string & name,
@@ -36,10 +40,13 @@ void FunctionFactory::registerFunction(const
         throw Exception("FunctionFactory: the function name '" + name + "' is already registered as alias",
                         ErrorCodes::LOGICAL_ERROR);
 
-    if (case_sensitiveness == CaseInsensitive
-        && !case_insensitive_functions.emplace(function_name_lowercase, creator).second)
-        throw Exception("FunctionFactory: the case insensitive function name '" + name + "' is not unique",
-                        ErrorCodes::LOGICAL_ERROR);
+    if (case_sensitiveness == CaseInsensitive)
+    {
+        if (!case_insensitive_functions.emplace(function_name_lowercase, creator).second)
+            throw Exception("FunctionFactory: the case insensitive function name '" + name + "' is not unique",
+                ErrorCodes::LOGICAL_ERROR);
+        case_insensitive_name_mapping[function_name_lowercase] = name;
+    }
 }
 
 
diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
new file mode 100644
index 00000000000..f477f6123c3
--- /dev/null
+++ b/src/Functions/FunctionFile.cpp
@@ -0,0 +1,134 @@
+#include <Columns/ColumnString.h>
+#include <Columns/IColumn.h>
+#include <Functions/FunctionFactory.h>
+#include <DataTypes/DataTypeString.h>
+#include <IO/ReadBufferFromFile.h>
+#include <Poco/File.h>
+#include <Poco/Path.h>
+#include <Interpreters/Context.h>
+#include <unistd.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int NOT_IMPLEMENTED;
+    extern const int INCORRECT_FILE_NAME;
+    extern const int DATABASE_ACCESS_DENIED;
+    extern const int FILE_DOESNT_EXIST;
+}
+
+/// A function to read file as a string.
+class FunctionFile : public IFunction
+{
+public:
+    static constexpr auto name = "file";
+    static FunctionPtr create(const Context &context) { return std::make_shared<FunctionFile>(context); }
+    explicit FunctionFile(const Context &context_) : context(context_) {}
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 1; }
+    bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        if (!isString(arguments[0].type))
+            throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED);
+        return std::make_shared<DataTypeString>();
+    }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        const ColumnPtr column = arguments[0].column;
+        const ColumnString * expected = checkAndGetColumn<ColumnString>(column.get());
+        if (!expected)
+            throw Exception(
+                fmt::format("Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()),
+                ErrorCodes::ILLEGAL_COLUMN);
+
+        const ColumnString::Chars & chars = expected->getChars();
+        const ColumnString::Offsets & offsets = expected->getOffsets();
+
+        std::vector<String> checked_filenames(input_rows_count);
+
+        auto result = ColumnString::create();
+        auto & res_chars = result->getChars();
+        auto & res_offsets = result->getOffsets();
+
+        res_offsets.resize(input_rows_count);
+
+        size_t source_offset = 0;
+        size_t result_offset = 0;
+        for (size_t row = 0; row < input_rows_count; ++row)
+        {
+            const char * filename = reinterpret_cast<const char *>(&chars[source_offset]);
+
+            const String user_files_path = context.getUserFilesPath();
+            String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString();
+            Poco::Path poco_filepath = Poco::Path(filename);
+            if (poco_filepath.isRelative())
+                poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath);
+            const String file_absolute_path = poco_filepath.absolute().toString();
+            checkReadIsAllowedOrThrow(user_files_absolute_path, file_absolute_path);
+
+            checked_filenames[row] = file_absolute_path;
+            auto file = Poco::File(file_absolute_path);
+
+            if (!file.exists())
+                throw Exception(fmt::format("File {} doesn't exist.", file_absolute_path), ErrorCodes::FILE_DOESNT_EXIST);
+
+            const auto current_file_size = Poco::File(file_absolute_path).getSize();
+
+            result_offset += current_file_size + 1;
+            res_offsets[row] = result_offset;
+            source_offset = offsets[row];
+        }
+
+        res_chars.resize(result_offset);
+
+        size_t prev_offset = 0;
+
+        for (size_t row = 0; row < input_rows_count; ++row)
+        {
+            auto file_absolute_path = checked_filenames[row];
+            ReadBufferFromFile in(file_absolute_path);
+            char * res_buf = reinterpret_cast<char *>(&res_chars[prev_offset]);
+
+            const size_t file_lenght = res_offsets[row] - prev_offset - 1;
+            prev_offset = res_offsets[row];
+            in.readStrict(res_buf, file_lenght);
+            res_buf[file_lenght] = '\0';
+        }
+
+        return result;
+    }
+
+private:
+
+    void checkReadIsAllowedOrThrow(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const
+    {
+        // If run in Local mode, no need for path checking.
+        if (context.getApplicationType() != Context::ApplicationType::LOCAL)
+            if (file_absolute_path.find(user_files_absolute_path) != 0)
+                throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED);
+
+        Poco::File path_poco_file = Poco::File(file_absolute_path);
+        if (path_poco_file.exists() && path_poco_file.isDirectory())
+            throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME);
+    }
+
+    const Context & context;
+};
+
+
+void registerFunctionFile(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionFile>();
+}
+
+}
diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp
index f5ace858665..17c28ee3343 100644
--- a/src/Functions/FunctionHelpers.cpp
+++ b/src/Functions/FunctionHelpers.cpp
@@ -70,8 +70,19 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName
             }
             else if (const auto * const_column = checkAndGetColumn<ColumnConst>(*col.column))
             {
-                const auto & nested_col = checkAndGetColumn<ColumnNullable>(const_column->getDataColumn())->getNestedColumnPtr();
-                res.emplace_back(ColumnWithTypeAndName{ ColumnConst::create(nested_col, col.column->size()), nested_type, col.name});
+                const auto * nullable_column = checkAndGetColumn<ColumnNullable>(const_column->getDataColumn());
+
+                ColumnPtr nullable_res;
+                if (nullable_column)
+                {
+                    const auto & nested_col = nullable_column->getNestedColumnPtr();
+                    nullable_res = ColumnConst::create(nested_col, col.column->size());
+                }
+                else
+                {
+                    nullable_res = makeNullable(col.column);
+                }
+                res.emplace_back(ColumnWithTypeAndName{ nullable_res, nested_type, col.name });
             }
             else
                 throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN);
@@ -212,4 +223,12 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
     return {nested_columns, offsets->data()};
 }
 
+bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs)
+{
+    const auto & lhs_name = lhs->getName();
+    const auto & rhs_name = rhs->getName();
+
+    return lhs_name == rhs_name;
+}
+
 }
diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h
index ac10fc14d39..76e420957be 100644
--- a/src/Functions/FunctionHelpers.h
+++ b/src/Functions/FunctionHelpers.h
@@ -152,4 +152,8 @@ void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithType
 std::pair<std::vector<const IColumn *>, const ColumnArray::Offset *>
 checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
 
+
+/// Check if two types are equal
+bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs);
+
 }
diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h
index 4d9621338f8..054f8800630 100644
--- a/src/Functions/FunctionsBitmap.h
+++ b/src/Functions/FunctionsBitmap.h
@@ -116,8 +116,35 @@ public:
         DataTypes argument_types = {nested_type};
         Array params_row;
         AggregateFunctionProperties properties;
-        AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(
-            AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
+        AggregateFunctionPtr bitmap_function;
+        WhichDataType which(nested_type);
+        if (which.isUInt8())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<UInt8>::name(), argument_types, params_row, properties);
+        else if (which.isUInt16())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<UInt16>::name(), argument_types, params_row, properties);
+        else if (which.isUInt32())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
+        else if (which.isUInt64())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<UInt64>::name(), argument_types, params_row, properties);
+        else if (which.isInt8())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<Int8>::name(), argument_types, params_row, properties);
+        else if (which.isInt16())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<Int16>::name(), argument_types, params_row, properties);
+        else if (which.isInt32())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<Int32>::name(), argument_types, params_row, properties);
+        else if (which.isInt64())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<Int64>::name(), argument_types, params_row, properties);
+        else
+            throw Exception(
+                "Unexpected type " + array_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         return std::make_shared<DataTypeAggregateFunction>(bitmap_function, argument_types, params_row);
     }
@@ -141,6 +168,14 @@ public:
             return executeBitmapData<UInt32>(argument_types, arguments);
         else if (which.isUInt64())
             return executeBitmapData<UInt64>(argument_types, arguments);
+        else if (which.isInt8())
+            return executeBitmapData<Int8>(argument_types, arguments);
+        else if (which.isInt16())
+            return executeBitmapData<Int16>(argument_types, arguments);
+        else if (which.isInt32())
+            return executeBitmapData<Int32>(argument_types, arguments);
+        else if (which.isInt64())
+            return executeBitmapData<Int64>(argument_types, arguments);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -161,7 +196,7 @@ private:
         Array params_row;
         AggregateFunctionProperties properties;
         AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(
-            AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
+            AggregateFunctionGroupBitmapData<T>::name(), argument_types, params_row, properties);
         auto col_to = ColumnAggregateFunction::create(bitmap_function);
         col_to->reserve(offsets.size());
 
@@ -197,7 +232,7 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type && bitmap_type->getFunctionName() =="groupBitmap"))
             throw Exception(
                 "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -230,6 +265,14 @@ public:
             executeIntType<UInt32>(arguments, input_rows_count, res_data, res_offsets);
         else if (which.isUInt64())
             executeIntType<UInt64>(arguments, input_rows_count, res_data, res_offsets);
+        else if (which.isInt8())
+            executeIntType<Int8>(arguments, input_rows_count, res_data, res_offsets);
+        else if (which.isInt16())
+            executeIntType<Int16>(arguments, input_rows_count, res_data, res_offsets);
+        else if (which.isInt32())
+            executeIntType<Int32>(arguments, input_rows_count, res_data, res_offsets);
+        else if (which.isInt64())
+            executeIntType<Int64>(arguments, input_rows_count, res_data, res_offsets);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -279,7 +322,7 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
             throw Exception(
                 "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -312,6 +355,14 @@ public:
             return executeIntType<UInt32>(arguments, input_rows_count);
         else if (which.isUInt64())
             return executeIntType<UInt64>(arguments, input_rows_count);
+        else if (which.isInt8())
+            return executeIntType<Int8>(arguments, input_rows_count);
+        else if (which.isInt16())
+            return executeIntType<Int16>(arguments, input_rows_count);
+        else if (which.isInt32())
+            return executeIntType<Int32>(arguments, input_rows_count);
+        else if (which.isInt64())
+            return executeIntType<Int64>(arguments, input_rows_count);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -384,7 +435,11 @@ struct BitmapSubsetInRangeImpl
 public:
     static constexpr auto name = "bitmapSubsetInRange";
     template <typename T>
-    static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
+    static void apply(
+        const AggregateFunctionGroupBitmapData<T> & bitmap_data_0,
+        UInt64 range_start,
+        UInt64 range_end,
+        AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
         bitmap_data_0.rbs.rb_range(range_start, range_end, bitmap_data_2.rbs);
     }
@@ -395,7 +450,11 @@ struct BitmapSubsetLimitImpl
 public:
     static constexpr auto name = "bitmapSubsetLimit";
     template <typename T>
-    static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
+    static void apply(
+        const AggregateFunctionGroupBitmapData<T> & bitmap_data_0,
+        UInt64 range_start,
+        UInt64 range_end,
+        AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
         bitmap_data_0.rbs.rb_limit(range_start, range_end, bitmap_data_2.rbs);
     }
@@ -421,7 +480,7 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
             throw Exception(
                 "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -456,6 +515,14 @@ public:
             return executeIntType<UInt32>(arguments, input_rows_count);
         else if (which.isUInt64())
             return executeIntType<UInt64>(arguments, input_rows_count);
+        else if (which.isInt8())
+            return executeIntType<Int8>(arguments, input_rows_count);
+        else if (which.isInt16())
+            return executeIntType<Int16>(arguments, input_rows_count);
+        else if (which.isInt32())
+            return executeIntType<Int32>(arguments, input_rows_count);
+        else if (which.isInt64())
+            return executeIntType<Int64>(arguments, input_rows_count);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -579,7 +646,7 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const auto * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
             throw Exception(
                 "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -604,6 +671,14 @@ public:
             executeIntType<UInt32>(arguments, input_rows_count, vec_to);
         else if (which.isUInt64())
             executeIntType<UInt64>(arguments, input_rows_count, vec_to);
+        else if (which.isInt8())
+            executeIntType<Int8>(arguments, input_rows_count, vec_to);
+        else if (which.isInt16())
+            executeIntType<Int16>(arguments, input_rows_count, vec_to);
+        else if (which.isInt32())
+            executeIntType<Int32>(arguments, input_rows_count, vec_to);
+        else if (which.isInt64())
+            executeIntType<Int64>(arguments, input_rows_count, vec_to);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -743,15 +818,15 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
             throw Exception(
-                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         WhichDataType which(arguments[1].get());
-        if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
+        if (!which.isNativeInt() && !which.isNativeUInt())
             throw Exception(
-                "Second argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".",
+                "Second argument for function " + getName() + " must be an native integer type but it has type " + arguments[1]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         return std::make_shared<DataTypeNumber<UInt8>>();
@@ -775,6 +850,14 @@ public:
             executeIntType<UInt32>(arguments, input_rows_count, vec_to);
         else if (which.isUInt64())
             executeIntType<UInt64>(arguments, input_rows_count, vec_to);
+        else if (which.isInt8())
+            executeIntType<Int8>(arguments, input_rows_count, vec_to);
+        else if (which.isInt16())
+            executeIntType<Int16>(arguments, input_rows_count, vec_to);
+        else if (which.isInt32())
+            executeIntType<Int32>(arguments, input_rows_count, vec_to);
+        else if (which.isInt64())
+            executeIntType<Int64>(arguments, input_rows_count, vec_to);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -839,15 +922,15 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
             throw Exception(
-                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         const auto * bitmap_type1 = typeid_cast<const DataTypeAggregateFunction *>(arguments[1].get());
-        if (!(bitmap_type1 && bitmap_type1->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type1 && bitmap_type1->getFunctionName() == "groupBitmap"))
             throw Exception(
-                "Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName() + ".",
+                "Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         if (bitmap_type0->getArgumentsDataTypes()[0]->getTypeId() != bitmap_type1->getArgumentsDataTypes()[0]->getTypeId())
@@ -877,6 +960,14 @@ public:
             executeIntType<UInt32>(arguments, input_rows_count, vec_to);
         else if (which.isUInt64())
             executeIntType<UInt64>(arguments, input_rows_count, vec_to);
+        else if (which.isInt8())
+            executeIntType<Int8>(arguments, input_rows_count, vec_to);
+        else if (which.isInt16())
+            executeIntType<Int16>(arguments, input_rows_count, vec_to);
+        else if (which.isInt32())
+            executeIntType<Int32>(arguments, input_rows_count, vec_to);
+        else if (which.isInt64())
+            executeIntType<Int64>(arguments, input_rows_count, vec_to);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -974,15 +1065,15 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
             throw Exception(
-                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         const auto * bitmap_type1 = typeid_cast<const DataTypeAggregateFunction *>(arguments[1].get());
-        if (!(bitmap_type1 && bitmap_type1->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type1 && bitmap_type1->getFunctionName() == "groupBitmap"))
             throw Exception(
-                "Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName() + ".",
+                "Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         if (bitmap_type0->getArgumentsDataTypes()[0]->getTypeId() != bitmap_type1->getArgumentsDataTypes()[0]->getTypeId())
@@ -1009,6 +1100,14 @@ public:
             return executeBitmapData<UInt32>(arguments, input_rows_count);
         else if (which.isUInt64())
             return executeBitmapData<UInt64>(arguments, input_rows_count);
+        else if (which.isUInt8())
+            return executeBitmapData<UInt8>(arguments, input_rows_count);
+        else if (which.isUInt16())
+            return executeBitmapData<UInt16>(arguments, input_rows_count);
+        else if (which.isUInt32())
+            return executeBitmapData<UInt32>(arguments, input_rows_count);
+        else if (which.isUInt64())
+            return executeBitmapData<UInt64>(arguments, input_rows_count);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h
index 7756f37d418..abfb1e83a77 100644
--- a/src/Functions/FunctionsCoding.h
+++ b/src/Functions/FunctionsCoding.h
@@ -263,6 +263,12 @@ public:
     static constexpr auto name = "IPv6StringToNum";
     static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv6StringToNum>(); }
 
+    static inline bool tryParseIPv4(const char * pos)
+    {
+        UInt32 result = 0;
+        return DB::parseIPv4(pos, reinterpret_cast<unsigned char *>(&result));
+    }
+
     String getName() const override { return name; }
 
     size_t getNumberOfArguments() const override { return 1; }
@@ -270,8 +276,8 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         if (!isString(arguments[0]))
-            throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
-            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            throw Exception(
+                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         return std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH);
     }
@@ -292,13 +298,27 @@ public:
             const ColumnString::Chars & vec_src = col_in->getChars();
             const ColumnString::Offsets & offsets_src = col_in->getOffsets();
             size_t src_offset = 0;
+            char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:";
 
-            for (size_t out_offset = 0, i = 0;
-                 out_offset < vec_res.size();
-                 out_offset += IPV6_BINARY_LENGTH, ++i)
+            for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i)
             {
-                /// In case of failure, the function fills vec_res with zero bytes.
-                parseIPv6(reinterpret_cast<const char *>(&vec_src[src_offset]), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
+                /// For both cases below: In case of failure, the function parseIPv6 fills vec_res with zero bytes.
+
+                /// If the source IP address is parsable as an IPv4 address, then transform it into a valid IPv6 address.
+                /// Keeping it simple by just prefixing `::ffff:` to the IPv4 address to represent it as a valid IPv6 address.
+                if (tryParseIPv4(reinterpret_cast<const char *>(&vec_src[src_offset])))
+                {
+                    std::memcpy(
+                        src_ipv4_buf + std::strlen("::ffff:"),
+                        reinterpret_cast<const char *>(&vec_src[src_offset]),
+                        std::min<UInt64>(offsets_src[i] - src_offset, IPV4_MAX_TEXT_LENGTH + 1));
+                    parseIPv6(src_ipv4_buf, reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
+                }
+                else
+                {
+                    parseIPv6(
+                        reinterpret_cast<const char *>(&vec_src[src_offset]), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
+                }
                 src_offset = offsets_src[i];
             }
 
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index 96e49686526..2e2a4ce9cfa 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -477,6 +477,80 @@ template <typename Name> struct ConvertImpl<DataTypeDate, DataTypeDateTime64, Na
 template <typename Name> struct ConvertImpl<DataTypeDateTime, DataTypeDateTime64, Name, ConvertDefaultBehaviorTag>
     : DateTimeTransformImpl<DataTypeDateTime, DataTypeDateTime64, ToDateTime64Transform> {};
 
+/** Conversion of numeric to DateTime64
+  */
+
+template <typename FromType>
+struct ToDateTime64TransformUnsigned
+{
+    static constexpr auto name = "toDateTime64";
+
+    const DateTime64::NativeType scale_multiplier = 1;
+
+    ToDateTime64TransformUnsigned(UInt32 scale = 0)
+        : scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
+    {}
+
+    inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
+    {
+        from = std::min(time_t(from), time_t(0xFFFFFFFF));
+        return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(from, 0, scale_multiplier);
+    }
+};
+template <typename FromType>
+struct ToDateTime64TransformSigned
+{
+    static constexpr auto name = "toDateTime64";
+
+    const DateTime64::NativeType scale_multiplier = 1;
+
+    ToDateTime64TransformSigned(UInt32 scale = 0)
+        : scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
+    {}
+
+    inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
+    {
+        if (from < 0)
+            return 0;
+        from = std::min(time_t(from), time_t(0xFFFFFFFF));
+        return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(from, 0, scale_multiplier);
+    }
+};
+template <typename FromDataType, typename FromType>
+struct ToDateTime64TransformFloat
+{
+    static constexpr auto name = "toDateTime64";
+
+    const UInt32 scale = 1;
+
+    ToDateTime64TransformFloat(UInt32 scale_ = 0)
+        : scale(scale_)
+    {}
+
+    inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
+    {
+        if (from < 0)
+            return 0;
+        from = std::min<FromType>(from, FromType(0xFFFFFFFF));
+        return convertToDecimal<FromDataType, DataTypeDateTime64>(from, scale);
+    }
+};
+
+template <typename Name> struct ConvertImpl<DataTypeInt8, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeInt8, DataTypeDateTime64, ToDateTime64TransformSigned<Int8>> {};
+template <typename Name> struct ConvertImpl<DataTypeInt16, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeInt16, DataTypeDateTime64, ToDateTime64TransformSigned<Int16>> {};
+template <typename Name> struct ConvertImpl<DataTypeInt32, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeInt32, DataTypeDateTime64, ToDateTime64TransformSigned<Int32>> {};
+template <typename Name> struct ConvertImpl<DataTypeInt64, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeInt64, DataTypeDateTime64, ToDateTime64TransformSigned<Int64>> {};
+template <typename Name> struct ConvertImpl<DataTypeUInt64, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeUInt64, DataTypeDateTime64, ToDateTime64TransformUnsigned<UInt64>> {};
+template <typename Name> struct ConvertImpl<DataTypeFloat32, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeFloat32, DataTypeDateTime64, ToDateTime64TransformFloat<DataTypeFloat32, Float32>> {};
+template <typename Name> struct ConvertImpl<DataTypeFloat64, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeFloat64, DataTypeDateTime64, ToDateTime64TransformFloat<DataTypeFloat64, Float64>> {};
+
 /** Conversion of DateTime64 to Date or DateTime: discards fractional part.
  */
 template <typename Transform>
@@ -1258,7 +1332,7 @@ public:
             else if constexpr (std::is_same_v<Name, NameToDecimal256>)
                 return createDecimalMaxPrecision<Decimal256>(scale);
 
-            throw Exception("Something wrong with toDecimalNN()", ErrorCodes::LOGICAL_ERROR);
+            throw Exception("Unexpected branch in code of conversion function: it is a bug.", ErrorCodes::LOGICAL_ERROR);
         }
         else
         {
@@ -1282,7 +1356,7 @@ public:
             if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
                 return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0));
             else if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
-                throw Exception("LOGICAL ERROR: It is a bug.", ErrorCodes::LOGICAL_ERROR);
+                throw Exception("Unexpected branch in code of conversion function: it is a bug.", ErrorCodes::LOGICAL_ERROR);
             else
                 return std::make_shared<ToDataType>();
         }
@@ -1294,7 +1368,12 @@ public:
     bool useDefaultImplementationForNulls() const override { return checked_return_type; }
 
     bool useDefaultImplementationForConstants() const override { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override
+    {
+        if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
+            return {2};
+        return {1};
+    }
     bool canBeExecutedOnDefaultArguments() const override { return false; }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
@@ -2313,7 +2392,7 @@ private:
                 using LeftDataType = typename Types::LeftType;
                 using RightDataType = typename Types::RightType;
 
-                if constexpr (IsDataTypeDecimalOrNumber<LeftDataType> && IsDataTypeDecimalOrNumber<RightDataType>)
+                if constexpr (IsDataTypeDecimalOrNumber<LeftDataType> && IsDataTypeDecimalOrNumber<RightDataType> && !std::is_same_v<DataTypeDateTime64, RightDataType>)
                 {
                     if (wrapper_cast_type == CastType::accurate)
                     {
diff --git a/src/Functions/FunctionsExternalDictionaries.cpp b/src/Functions/FunctionsExternalDictionaries.cpp
index 3d536630d7a..f037a3bd808 100644
--- a/src/Functions/FunctionsExternalDictionaries.cpp
+++ b/src/Functions/FunctionsExternalDictionaries.cpp
@@ -38,8 +38,8 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory)
     factory.registerFunction<FunctionDictGetDateTimeOrDefault>();
     factory.registerFunction<FunctionDictGetUUIDOrDefault>();
     factory.registerFunction<FunctionDictGetStringOrDefault>();
-    factory.registerFunction<FunctionDictGetNoType>();
-    factory.registerFunction<FunctionDictGetNoTypeOrDefault>();
+    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>();
+    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>();
 }
 
 }
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 2cbcca734e4..ac59775a755 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -8,6 +8,8 @@
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
@@ -54,9 +56,10 @@ namespace ErrorCodes
     extern const int UNSUPPORTED_METHOD;
     extern const int UNKNOWN_TYPE;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int TYPE_MISMATCH;
     extern const int ILLEGAL_COLUMN;
     extern const int BAD_ARGUMENTS;
+    extern const int TYPE_MISMATCH;
+    extern const int NOT_IMPLEMENTED;
 }
 
 
@@ -104,8 +107,8 @@ public:
         if (!sample_columns)
             return false;
 
-        if (sample_columns.columns() != 3 && sample_columns.columns() != 4)
-            throw Exception{"Function dictGet... takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+        if (sample_columns.columns() < 3)
+            throw Exception{"Wrong arguments count", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
 
         const auto * dict_name_col = checkAndGetColumnConst<ColumnString>(sample_columns.getByPosition(0).column.get());
         if (!dict_name_col)
@@ -118,12 +121,26 @@ public:
         return getDictionary(dict_name_col->getValue<String>())->isInjective(attr_name_col->getValue<String>());
     }
 
+    DictionaryStructure getDictionaryStructure(const String & dictionary_name) const
+    {
+        String resolved_name = DatabaseCatalog::instance().resolveDictionaryName(dictionary_name);
+        auto load_result = external_loader.getLoadResult(resolved_name);
+        if (!load_result.config)
+            throw Exception("Dictionary " + backQuote(dictionary_name) + " not found", ErrorCodes::BAD_ARGUMENTS);
+        return ExternalDictionariesLoader::getDictionaryStructure(*load_result.config);
+    }
+
 private:
     const Context & context;
     const ExternalDictionariesLoader & external_loader;
-    mutable std::shared_ptr<const IDictionaryBase> dictionary;
     /// Access cannot be not granted, since in this case checkAccess() will throw and access_checked will not be updated.
     std::atomic<bool> access_checked = false;
+
+    /// We must not cache dictionary or dictionary's structure here, because there are places
+    /// where ExpressionActionsPtr is cached (StorageDistributed caching it for sharding_key_expr and
+    /// optimize_skip_unused_shards), and if the dictionary will be cached within "query" then
+    /// cached ExpressionActionsPtr will always have first version of the query and the dictionary
+    /// will not be updated after reload (see https://github.com/ClickHouse/ClickHouse/pull/16205)
 };
 
 
@@ -175,836 +192,262 @@ private:
         if (input_rows_count == 0)
             return result_type->createColumn();
 
-        auto dict = helper.getDictionary(arguments[0]);
-        ColumnPtr res;
+        auto dictionary = helper.getDictionary(arguments[0]);
+        auto dictionary_key_type = dictionary->getKeyType();
 
-        if (!((res = executeDispatchSimple<FlatDictionary>(arguments, dict))
-            || (res = executeDispatchSimple<DirectDictionary>(arguments, dict))
-            || (res = executeDispatchSimple<HashedDictionary>(arguments, dict))
-            || (res = executeDispatchSimple<CacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchSimple<SSDCacheDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<ComplexKeyHashedDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyDirectDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyCacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
-#endif
-#if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexCell>(arguments, dict))))
-            throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
+        const ColumnWithTypeAndName & key_column_with_type = arguments[1];
+        const auto key_column = key_column_with_type.column;
+        const auto key_column_type = WhichDataType(key_column_with_type.type);
 
-        return res;
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchSimple(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const auto * id_col_untyped = arguments[1].column.get();
-        if (const auto * id_col = checkAndGetColumn<ColumnUInt64>(id_col_untyped))
+        if (dictionary_key_type == DictionaryKeyType::simple)
         {
-            const auto & ids = id_col->getData();
+            if (!key_column_type.isUInt64())
+                throw Exception{"Second argument of function " + getName() + " must be " + dictionary->getStructure().getKeyDescription(),
+                    ErrorCodes::TYPE_MISMATCH};
 
-            auto out = ColumnUInt8::create(ext::size(ids));
-            dict->has(ids, out->getData());
-            return out;
+            return dictionary->hasKeys({key_column}, {std::make_shared<DataTypeUInt64>()});
+        }
+        else if (dictionary_key_type == DictionaryKeyType::complex)
+        {
+            if (!key_column_type.isTuple())
+                throw Exception{"Second argument of function " + getName() + " must be " + dictionary->getStructure().getKeyDescription(),
+                    ErrorCodes::TYPE_MISMATCH};
+
+            /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
+            ColumnPtr key_column_full = key_column->convertToFullColumnIfConst();
+
+            const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_column_full).getColumnsCopy();
+            const auto & key_types = static_cast<const DataTypeTuple &>(*key_column_with_type.type).getElements();
+
+            return dictionary->hasKeys(key_columns, key_types);
         }
         else
-            throw Exception{"Second argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
-
-        return nullptr;
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchComplex(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const ColumnWithTypeAndName & key_col_with_type = arguments[1];
-        const ColumnPtr & key_col = key_col_with_type.column;
-
-        if (checkColumn<ColumnTuple>(key_col.get()))
-        {
-            const auto & key_columns = assert_cast<const ColumnTuple &>(*key_col).getColumnsCopy();
-            const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
-
-            auto out = ColumnUInt8::create(key_col_with_type.column->size());
-            dict->has(key_columns, key_types, out->getData());
-            return out;
-        }
-        else
-            throw Exception{"Second argument of function " + getName() + " must be " + dict->getKeyDescription(), ErrorCodes::TYPE_MISMATCH};
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Has not supported for range dictionary", dictionary->getDictionaryID().getNameForLogs());
     }
 
     mutable FunctionDictHelper helper;
 };
 
+enum class DictionaryGetFunctionType
+{
+    get,
+    getOrDefault
+};
 
-/** For ColumnVector. Either returns a reference to internal data,
-  *  or convert it to T type, stores the result in backup_storage and returns a reference to it.
-  */
-template <typename T>
-static const PaddedPODArray<T> & getColumnDataAsPaddedPODArray(const IColumn & column, PaddedPODArray<T> & backup_storage);
-
-
-class FunctionDictGetString final : public IFunction
+/// This variant of function derives the result type automatically.
+template <DictionaryGetFunctionType dictionary_get_function_type>
+class FunctionDictGetNoType final : public IFunction
 {
 public:
-    static constexpr auto name = "dictGetString";
+    static constexpr auto name = dictionary_get_function_type == DictionaryGetFunctionType::get ? "dictGet" : "dictGetOrDefault";
 
     static FunctionPtr create(const Context & context)
     {
-        return std::make_shared<FunctionDictGetString>(context);
+        return std::make_shared<FunctionDictGetNoType>(context);
     }
 
-    explicit FunctionDictGetString(const Context & context_) : helper(context_) {}
+    explicit FunctionDictGetNoType(const Context & context_) : helper(context_) {}
 
     String getName() const override { return name; }
 
-private:
     bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
 
     bool useDefaultImplementationForConstants() const final { return true; }
     ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
 
+    bool isDeterministic() const override { return false; }
+
     bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
     {
         return helper.isDictGetFunctionInjective(sample_columns);
     }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        if (arguments.size() != 3 && arguments.size() != 4)
-            throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed "
-                + toString(arguments.size()) + ", should be 3 or 4.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+        if (arguments.size() < 3)
+            throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
 
-        if (!isString(arguments[0]))
-        {
-            throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
+        String dictionary_name;
+        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
+            dictionary_name = name_col->getValue<String>();
+        else
+            throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
+                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
-        if (!isString(arguments[1]))
-        {
-            throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
+        String attribute_name;
+        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
+            attribute_name = name_col->getValue<String>();
+        else
+            throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
+                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
-        if (!WhichDataType(arguments[2]).isUInt64() &&
-            !isTuple(arguments[2]))
-        {
-            throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
-
-        /// This is for the case of range dictionaries_loader.
-        if (arguments.size() == 4 && !arguments[3]->isValueRepresentedByInteger())
-        {
-            throw Exception{"Illegal type " + arguments[3]->getName() +
-                            " of fourth argument of function " + getName() +
-                            " must be convertible to Int64.", ErrorCodes::ILLEGAL_COLUMN};
-        }
-
-        return std::make_shared<DataTypeString>();
+        /// We're extracting the return type from the dictionary's config, without loading the dictionary.
+        return helper.getDictionaryStructure(dictionary_name).getAttribute(attribute_name).type;
     }
 
-    bool isDeterministic() const override { return false; }
-
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
     {
         if (input_rows_count == 0)
             return result_type->createColumn();
 
-        auto dict = helper.getDictionary(arguments[0]);
-        ColumnPtr res;
+        String dictionary_name;
 
-        if (!((res = executeDispatch<FlatDictionary>(arguments, dict))
-            || (res = executeDispatch<HashedDictionary>(arguments, dict))
-            || (res = executeDispatch<DirectDictionary>(arguments, dict))
-            || (res = executeDispatch<CacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatch<SSDCacheDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<ComplexKeyHashedDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyDirectDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyCacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
-#endif
-#if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexCell>(arguments, dict))
-            || (res = executeDispatchRange<RangeHashedDictionary>(arguments, dict))))
-            throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
-
-        return res;
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        if (arguments.size() != 3)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const auto * id_col_untyped = arguments[2].column.get();
-        if (const auto * id_col = checkAndGetColumn<ColumnUInt64>(id_col_untyped))
-        {
-            auto out = ColumnString::create();
-            dict->getString(attr_name, id_col->getData(), out.get());
-            return out;
-        }
+        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
+            dictionary_name = name_col->getValue<String>();
         else
-            throw Exception{"Third argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
-    }
+            throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
+                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchComplex(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
+        String attribute_name;
 
-        if (arguments.size() != 3)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
+            attribute_name = name_col->getValue<String>();
+        else
+            throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
+                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
+        auto dictionary = helper.getDictionary(dictionary_name);
 
-        String attr_name = attr_name_col->getValue<String>();
+        if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type))
+            throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function "
+                    + getName() + ", must be UInt64 or tuple(...).",
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+
+        auto dictionary_key_type = dictionary->getKeyType();
+
+        size_t current_arguments_index = 3;
+
+        ColumnPtr range_col = nullptr;
+        DataTypePtr range_col_type = nullptr;
+
+        if (dictionary_key_type == DictionaryKeyType::range)
+        {
+            if (current_arguments_index >= arguments.size())
+                throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+
+            range_col = arguments[current_arguments_index].column;
+            range_col_type = arguments[current_arguments_index].type;
+
+            if (!(range_col_type->isValueRepresentedByInteger() && range_col_type->getSizeOfValueInMemory() <= sizeof(Int64)))
+                throw Exception{"Illegal type " + range_col_type->getName() + " of fourth argument of function "
+                        + getName() + " must be convertible to Int64.",
+                    ErrorCodes::ILLEGAL_COLUMN};
+
+            ++current_arguments_index;
+        }
+
+        ColumnPtr default_col = nullptr;
+
+        if (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault)
+        {
+            if (current_arguments_index >= arguments.size())
+                throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+
+            default_col = arguments[current_arguments_index].column;
+        }
+
+        ColumnPtr result;
 
         const ColumnWithTypeAndName & key_col_with_type = arguments[2];
-        /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
-        ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
+        const auto key_column = key_col_with_type.column;
 
-        if (checkColumn<ColumnTuple>(key_col.get()))
+        if (dictionary_key_type == DictionaryKeyType::simple)
         {
-            const auto & key_columns = assert_cast<const ColumnTuple &>(*key_col).getColumnsCopy();
+            result = dictionary->getColumn(attribute_name, result_type, {key_column}, {std::make_shared<DataTypeUInt64>()}, default_col);
+        }
+        else if (dictionary_key_type == DictionaryKeyType::complex)
+        {
+            /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
+            ColumnPtr key_column_full = key_col_with_type.column->convertToFullColumnIfConst();
+
+            const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_column_full).getColumnsCopy();
             const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
 
-            auto out = ColumnString::create();
-            dict->getString(attr_name, key_columns, key_types, out.get());
-            return out;
+            result = dictionary->getColumn(attribute_name, result_type, key_columns, key_types, default_col);
+        }
+        else if (dictionary_key_type == DictionaryKeyType::range)
+        {
+            result = dictionary->getColumn(
+                attribute_name, result_type, {key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type}, default_col);
         }
         else
-            throw Exception{"Third argument of function " + getName() + " must be " + dict->getKeyDescription(), ErrorCodes::TYPE_MISMATCH};
+            throw Exception{"Unknown dictionary identifier type", ErrorCodes::BAD_ARGUMENTS};
+
+        return result;
     }
 
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchRange(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        if (arguments.size() != 4)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const auto & id_col_untyped = arguments[2].column;
-        const auto & range_col_untyped = arguments[3].column;
-
-        PaddedPODArray<UInt64> id_col_values_storage;
-        PaddedPODArray<Int64> range_col_values_storage;
-        const auto & id_col_values = getColumnDataAsPaddedPODArray(*id_col_untyped, id_col_values_storage);
-        const auto & range_col_values = getColumnDataAsPaddedPODArray(*range_col_untyped, range_col_values_storage);
-
-        auto out = ColumnString::create();
-        dict->getString(attr_name, id_col_values, range_col_values, out.get());
-        return out;
-    }
-
-    mutable FunctionDictHelper helper;
-};
-
-
-class FunctionDictGetStringOrDefault final : public IFunction
-{
-public:
-    static constexpr auto name = "dictGetStringOrDefault";
-
-    static FunctionPtr create(const Context & context)
-    {
-        return std::make_shared<FunctionDictGetStringOrDefault>(context);
-    }
-
-    explicit FunctionDictGetStringOrDefault(const Context & context_) : helper(context_) {}
-
-    String getName() const override { return name; }
-
 private:
-    size_t getNumberOfArguments() const override { return 4; }
-
-    bool useDefaultImplementationForConstants() const final { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        if (!isString(arguments[0]))
-            throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() +
-                ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!isString(arguments[1]))
-            throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() +
-                ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!WhichDataType(arguments[2]).isUInt64() &&
-            !isTuple(arguments[2]))
-        {
-            throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
-
-        if (!isString(arguments[3]))
-            throw Exception{"Illegal type " + arguments[3]->getName() + " of fourth argument of function " + getName() +
-                ", must be String.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        return std::make_shared<DataTypeString>();
-    }
-
-    bool isDeterministic() const override { return false; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        if (input_rows_count == 0)
-            return result_type->createColumn();
-
-        auto dict = helper.getDictionary(arguments[0]);
-
-        ColumnPtr res;
-        if (!((res = executeDispatch<FlatDictionary>(arguments, dict))
-            || (res = executeDispatch<HashedDictionary>(arguments, dict))
-            || (res = executeDispatch<DirectDictionary>(arguments, dict))
-            || (res = executeDispatch<CacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatch<SSDCacheDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<ComplexKeyHashedDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyDirectDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyCacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
-#endif
-#if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexCell>(arguments, dict))))
-            throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
-
-        return res;
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const auto * id_col_untyped = arguments[2].column.get();
-        if (const auto * id_col = checkAndGetColumn<ColumnUInt64>(id_col_untyped))
-            return executeDispatch(arguments, dict, attr_name, id_col);
-        else if (const auto * id_col_const = checkAndGetColumnConst<ColumnVector<UInt64>>(id_col_untyped))
-            return executeDispatch(arguments, dict, attr_name, id_col_const);
-        else
-            throw Exception{"Third argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(
-            const ColumnsWithTypeAndName & arguments, const DictionaryType * dict,
-            const std::string & attr_name, const ColumnUInt64 * id_col) const
-    {
-        const auto * default_col_untyped = arguments[3].column.get();
-
-        if (const auto * default_col = checkAndGetColumn<ColumnString>(default_col_untyped))
-        {
-            /// vector ids, vector defaults
-            auto out = ColumnString::create();
-            const auto & ids = id_col->getData();
-            dict->getString(attr_name, ids, default_col, out.get());
-            return out;
-        }
-        else if (const auto * default_col_const = checkAndGetColumnConstStringOrFixedString(default_col_untyped))
-        {
-            /// vector ids, const defaults
-            auto out = ColumnString::create();
-            const auto & ids = id_col->getData();
-            String def = default_col_const->getValue<String>();
-            dict->getString(attr_name, ids, def, out.get());
-            return out;
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(
-            const ColumnsWithTypeAndName & arguments, const DictionaryType * dict,
-            const std::string & attr_name, const ColumnConst * id_col) const
-    {
-        const auto * default_col_untyped = arguments[3].column.get();
-
-        if (const auto * default_col = checkAndGetColumn<ColumnString>(default_col_untyped))
-        {
-            /// const ids, vector defaults
-            const PaddedPODArray<UInt64> ids(1, id_col->getValue<UInt64>());
-            PaddedPODArray<UInt8> flags(1);
-            dict->has(ids, flags);
-            if (flags.front())
-            {
-                auto out = ColumnString::create();
-                dict->getString(attr_name, ids, String(), out.get());
-                return DataTypeString().createColumnConst(id_col->size(), out->getDataAt(0).toString());
-            }
-            else
-                return arguments[3].column; // reuse the default column
-        }
-        else if (const auto * default_col_const = checkAndGetColumnConstStringOrFixedString(default_col_untyped))
-        {
-            /// const ids, const defaults
-            const PaddedPODArray<UInt64> ids(1, id_col->getValue<UInt64>());
-            auto out = ColumnString::create();
-            String def = default_col_const->getValue<String>();
-            dict->getString(attr_name, ids, def, out.get());
-            return DataTypeString().createColumnConst(id_col->size(), out->getDataAt(0).toString());
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchComplex(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const ColumnWithTypeAndName & key_col_with_type = arguments[2];
-        /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
-        ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
-
-        const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_col).getColumnsCopy();
-        const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
-
-        auto out = ColumnString::create();
-
-        const auto * default_col_untyped = arguments[3].column.get();
-        if (const auto * default_col = checkAndGetColumn<ColumnString>(default_col_untyped))
-        {
-            dict->getString(attr_name, key_columns, key_types, default_col, out.get());
-        }
-        else if (const auto * default_col_const = checkAndGetColumnConstStringOrFixedString(default_col_untyped))
-        {
-            String def = default_col_const->getValue<String>();
-            dict->getString(attr_name, key_columns, key_types, def, out.get());
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN};
-
-        return out;
-    }
-
     mutable FunctionDictHelper helper;
 };
 
-
-template <typename DataType> struct DictGetTraits;
-#define DECLARE_DICT_GET_TRAITS(TYPE, DATA_TYPE) \
-template <> struct DictGetTraits<DATA_TYPE>\
-{\
-    template <typename DictionaryType>\
-    static void get(\
-        const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,\
-        PaddedPODArray<TYPE> & out)\
-    {\
-        dict->get##TYPE(name, ids, out);\
-    }\
-    template <typename DictionaryType>\
-    static void get(\
-        const DictionaryType * dict, const std::string & name, const Columns & key_columns,\
-        const DataTypes & key_types, PaddedPODArray<TYPE> & out)\
-    {\
-        dict->get##TYPE(name, key_columns, key_types, out);\
-    }\
-    template <typename DictionaryType>\
-    static void get(\
-        const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,\
-        const PaddedPODArray<Int64> & dates, PaddedPODArray<TYPE> & out)\
-    {\
-        dict->get##TYPE(name, ids, dates, out);\
-    }\
-    template <typename DictionaryType, typename DefaultsType>\
-    static void getOrDefault(\
-        const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,\
-        const DefaultsType & def, PaddedPODArray<TYPE> & out)\
-    {\
-        dict->get##TYPE(name, ids, def, out);\
-    }\
-    template <typename DictionaryType, typename DefaultsType>\
-    static void getOrDefault(\
-        const DictionaryType * dict, const std::string & name, const Columns & key_columns,\
-        const DataTypes & key_types, const DefaultsType & def, PaddedPODArray<TYPE> & out)\
-    {\
-        dict->get##TYPE(name, key_columns, key_types, def, out);\
-    }\
-};
-DECLARE_DICT_GET_TRAITS(UInt8, DataTypeUInt8)
-DECLARE_DICT_GET_TRAITS(UInt16, DataTypeUInt16)
-DECLARE_DICT_GET_TRAITS(UInt32, DataTypeUInt32)
-DECLARE_DICT_GET_TRAITS(UInt64, DataTypeUInt64)
-DECLARE_DICT_GET_TRAITS(Int8, DataTypeInt8)
-DECLARE_DICT_GET_TRAITS(Int16, DataTypeInt16)
-DECLARE_DICT_GET_TRAITS(Int32, DataTypeInt32)
-DECLARE_DICT_GET_TRAITS(Int64, DataTypeInt64)
-DECLARE_DICT_GET_TRAITS(Float32, DataTypeFloat32)
-DECLARE_DICT_GET_TRAITS(Float64, DataTypeFloat64)
-DECLARE_DICT_GET_TRAITS(UInt16, DataTypeDate)
-DECLARE_DICT_GET_TRAITS(UInt32, DataTypeDateTime)
-DECLARE_DICT_GET_TRAITS(UInt128, DataTypeUUID)
-#undef DECLARE_DICT_GET_TRAITS
-
-template <typename T> struct DictGetTraits<DataTypeDecimal<T>>
-{
-    static constexpr bool is_dec32 = std::is_same_v<T, Decimal32>;
-    static constexpr bool is_dec64 = std::is_same_v<T, Decimal64>;
-    static constexpr bool is_dec128 = std::is_same_v<T, Decimal128>;
-
-    template <typename DictionaryType>
-    static void get(const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,
-                    DecimalPaddedPODArray<T> & out)
-    {
-        if constexpr (is_dec32) dict->getDecimal32(name, ids, out);
-        if constexpr (is_dec64) dict->getDecimal64(name, ids, out);
-        if constexpr (is_dec128) dict->getDecimal128(name, ids, out);
-    }
-
-    template <typename DictionaryType>
-    static void get(const DictionaryType * dict, const std::string & name, const Columns & key_columns, const DataTypes & key_types,
-                    DecimalPaddedPODArray<T> & out)
-    {
-        if constexpr (is_dec32) dict->getDecimal32(name, key_columns, key_types, out);
-        if constexpr (is_dec64) dict->getDecimal64(name, key_columns, key_types, out);
-        if constexpr (is_dec128) dict->getDecimal128(name, key_columns, key_types, out);
-    }
-
-    template <typename DictionaryType>
-    static void get(const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,
-                    const PaddedPODArray<Int64> & dates, DecimalPaddedPODArray<T> & out)
-    {
-        if constexpr (is_dec32) dict->getDecimal32(name, ids, dates, out);
-        if constexpr (is_dec64) dict->getDecimal64(name, ids, dates, out);
-        if constexpr (is_dec128) dict->getDecimal128(name, ids, dates, out);
-    }
-
-    template <typename DictionaryType, typename DefaultsType>
-    static void getOrDefault(const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,
-                    const DefaultsType & def, DecimalPaddedPODArray<T> & out)
-    {
-        if constexpr (is_dec32) dict->getDecimal32(name, ids, def, out);
-        if constexpr (is_dec64) dict->getDecimal64(name, ids, def, out);
-        if constexpr (is_dec128) dict->getDecimal128(name, ids, def, out);
-    }
-
-    template <typename DictionaryType, typename DefaultsType>
-    static void getOrDefault(const DictionaryType * dict, const std::string & name, const Columns & key_columns,
-                    const DataTypes & key_types, const DefaultsType & def, DecimalPaddedPODArray<T> & out)
-    {
-        if constexpr (is_dec32) dict->getDecimal32(name, key_columns, key_types, def, out);
-        if constexpr (is_dec64) dict->getDecimal64(name, key_columns, key_types, def, out);
-        if constexpr (is_dec128) dict->getDecimal128(name, key_columns, key_types, def, out);
-    }
-};
-
-
-template <typename DataType, typename Name>
-class FunctionDictGet final : public IFunction
+template <typename DataType, typename Name, DictionaryGetFunctionType dictionary_get_function_type>
+class FunctionDictGetImpl final : public IFunction
 {
     using Type = typename DataType::FieldType;
-    using ColVec = std::conditional_t<IsDecimalNumber<Type>, ColumnDecimal<Type>, ColumnVector<Type>>;
 
 public:
     static constexpr auto name = Name::name;
 
-    static FunctionPtr create(const Context & context, UInt32 dec_scale = 0)
+    static FunctionPtr create(const Context &context)
     {
-        return std::make_shared<FunctionDictGet>(context, dec_scale);
+        return std::make_shared<FunctionDictGetImpl>(context);
     }
 
-    explicit FunctionDictGet(const Context & context_, UInt32 dec_scale = 0)
-        : helper(context_)
-        , decimal_scale(dec_scale)
-    {}
+    explicit FunctionDictGetImpl(const Context & context_) : impl(context_) {}
 
     String getName() const override { return name; }
 
 private:
-    bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
 
+    bool isVariadic() const override { return true; }
+
     bool useDefaultImplementationForConstants() const final { return true; }
+
+    bool isDeterministic() const override { return false; }
+
     ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
 
     bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
     {
-        return helper.isDictGetFunctionInjective(sample_columns);
+        return impl.isInjective(sample_columns);
     }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    DataTypePtr getReturnTypeImpl(const DataTypes &) const override
     {
-        if (arguments.size() != 3 && arguments.size() != 4)
-            throw Exception{"Function " + getName() + " takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        if (!isString(arguments[0]))
-            throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!isString(arguments[1]))
-            throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!WhichDataType(arguments[2]).isUInt64() &&
-            !isTuple(arguments[2]))
-            throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (arguments.size() == 4)
-        {
-            const auto * range_argument = arguments[3].get();
-            if (!(range_argument->isValueRepresentedByInteger() &&
-                   range_argument->getSizeOfValueInMemory() <= sizeof(Int64)))
-                throw Exception{"Illegal type " + range_argument->getName() + " of fourth argument of function " + getName()
-                    + ", must be convertible to " + TypeName<Int64>::get() + ".",
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
+        DataTypePtr result;
 
         if constexpr (IsDataTypeDecimal<DataType>)
-            return std::make_shared<DataType>(DataType::maxPrecision(), decimal_scale);
+            result = std::make_shared<DataType>(DataType::maxPrecision(), 0);
         else
-            return std::make_shared<DataType>();
-    }
+            result = std::make_shared<DataType>();
 
-    bool isDeterministic() const override { return false; }
+        return result;
+    }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
     {
-        if (input_rows_count == 0)
-            return result_type->createColumn();
+        auto return_type = impl.getReturnTypeImpl(arguments);
 
-        auto dict = helper.getDictionary(arguments[0]);
+        if (!areTypesEqual(return_type, result_type))
+            throw Exception{"Dictionary attribute has different type " + return_type->getName() + " expected " + result_type->getName(),
+                    ErrorCodes::TYPE_MISMATCH};
 
-        ColumnPtr res;
-        if (!((res = executeDispatch<FlatDictionary>(arguments, dict))
-            || (res = executeDispatch<HashedDictionary>(arguments, dict))
-            || (res = executeDispatch<DirectDictionary>(arguments, dict))
-            || (res = executeDispatch<CacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatch<SSDCacheDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<ComplexKeyHashedDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyDirectDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyCacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
-#endif
-#if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexCell>(arguments, dict))
-            || (res = executeDispatchRange<RangeHashedDictionary>(arguments, dict))))
-            throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
-
-        return res;
+        return impl.executeImpl(arguments, return_type, input_rows_count);
     }
 
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
+    static bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs)
     {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        if (arguments.size() != 3)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 3 arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const auto * id_col_untyped = arguments[2].column.get();
-        if (const auto * id_col = checkAndGetColumn<ColumnUInt64>(id_col_untyped))
-        {
-            typename ColVec::MutablePtr out;
-            if constexpr (IsDataTypeDecimal<DataType>)
-                out = ColVec::create(id_col->size(), decimal_scale);
-            else
-                out = ColVec::create(id_col->size());
-            const auto & ids = id_col->getData();
-            auto & data = out->getData();
-            DictGetTraits<DataType>::get(dict, attr_name, ids, data);
-            return out;
-        }
-        else if (const auto * id_col_const = checkAndGetColumnConst<ColumnVector<UInt64>>(id_col_untyped))
-        {
-            const PaddedPODArray<UInt64> ids(1, id_col_const->getValue<UInt64>());
-
-            if constexpr (IsDataTypeDecimal<DataType>)
-            {
-                DecimalPaddedPODArray<Type> data(1, decimal_scale);
-                DictGetTraits<DataType>::get(dict, attr_name, ids, data);
-                return DataType(DataType::maxPrecision(), decimal_scale).createColumnConst(
-                        id_col_const->size(), toField(data.front(), decimal_scale));
-            }
-            else
-            {
-                PaddedPODArray<Type> data(1);
-                DictGetTraits<DataType>::get(dict, attr_name, ids, data);
-                return DataTypeNumber<Type>().createColumnConst(id_col_const->size(), toField(data.front()));
-            }
-        }
-        else
-            throw Exception{"Third argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
+        return removeNullable(recursiveRemoveLowCardinality(lhs))->equals(*removeNullable(recursiveRemoveLowCardinality(rhs)));
     }
 
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchComplex(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        if (arguments.size() != 3)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const ColumnWithTypeAndName & key_col_with_type = arguments[2];
-
-        /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
-        ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
-
-        if (checkColumn<ColumnTuple>(key_col.get()))
-        {
-            const auto & key_columns = assert_cast<const ColumnTuple &>(*key_col).getColumnsCopy();
-            const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
-
-            typename ColVec::MutablePtr out;
-            if constexpr (IsDataTypeDecimal<DataType>)
-                out = ColVec::create(key_columns.front()->size(), decimal_scale);
-            else
-                out = ColVec::create(key_columns.front()->size());
-            auto & data = out->getData();
-            DictGetTraits<DataType>::get(dict, attr_name, key_columns, key_types, data);
-            return out;
-        }
-        else
-            throw Exception{"Third argument of function " + getName() + " must be " + dict->getKeyDescription(), ErrorCodes::TYPE_MISMATCH};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchRange(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        if (arguments.size() != 4)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const auto & id_col_untyped = arguments[2].column;
-        const auto & range_col_untyped = arguments[3].column;
-
-        PaddedPODArray<UInt64> id_col_values_storage;
-        PaddedPODArray<Int64> range_col_values_storage;
-        const auto & id_col_values = getColumnDataAsPaddedPODArray(*id_col_untyped, id_col_values_storage);
-        const auto & range_col_values = getColumnDataAsPaddedPODArray(*range_col_untyped, range_col_values_storage);
-
-        typename ColVec::MutablePtr out;
-        if constexpr (IsDataTypeDecimal<DataType>)
-            out = ColVec::create(id_col_untyped->size(), decimal_scale);
-        else
-            out = ColVec::create(id_col_untyped->size());
-        auto & data = out->getData();
-        DictGetTraits<DataType>::get(dict, attr_name, id_col_values, range_col_values, data);
-        return out;
-    }
-
-    mutable FunctionDictHelper helper;
-    UInt32 decimal_scale;
+    const FunctionDictGetNoType<dictionary_get_function_type> impl;
 };
 
+template<typename DataType, typename Name>
+using FunctionDictGet = FunctionDictGetImpl<DataType, Name, DictionaryGetFunctionType::get>;
+
 struct NameDictGetUInt8 { static constexpr auto name = "dictGetUInt8"; };
 struct NameDictGetUInt16 { static constexpr auto name = "dictGetUInt16"; };
 struct NameDictGetUInt32 { static constexpr auto name = "dictGetUInt32"; };
@@ -1021,6 +464,7 @@ struct NameDictGetUUID { static constexpr auto name = "dictGetUUID"; };
 struct NameDictGetDecimal32 { static constexpr auto name = "dictGetDecimal32"; };
 struct NameDictGetDecimal64 { static constexpr auto name = "dictGetDecimal64"; };
 struct NameDictGetDecimal128 { static constexpr auto name = "dictGetDecimal128"; };
+struct NameDictGetString { static constexpr auto name = "dictGetString"; };
 
 using FunctionDictGetUInt8 = FunctionDictGet<DataTypeUInt8, NameDictGetUInt8>;
 using FunctionDictGetUInt16 = FunctionDictGet<DataTypeUInt16, NameDictGetUInt16>;
@@ -1038,266 +482,10 @@ using FunctionDictGetUUID = FunctionDictGet<DataTypeUUID, NameDictGetUUID>;
 using FunctionDictGetDecimal32 = FunctionDictGet<DataTypeDecimal<Decimal32>, NameDictGetDecimal32>;
 using FunctionDictGetDecimal64 = FunctionDictGet<DataTypeDecimal<Decimal64>, NameDictGetDecimal64>;
 using FunctionDictGetDecimal128 = FunctionDictGet<DataTypeDecimal<Decimal128>, NameDictGetDecimal128>;
+using FunctionDictGetString = FunctionDictGet<DataTypeString, NameDictGetString>;
 
-
-template <typename DataType, typename Name>
-class FunctionDictGetOrDefault final : public IFunction
-{
-    using Type = typename DataType::FieldType;
-    using ColVec = std::conditional_t<IsDecimalNumber<Type>, ColumnDecimal<Type>, ColumnVector<Type>>;
-
-public:
-    static constexpr auto name = Name::name;
-
-    static FunctionPtr create(const Context & context, UInt32 dec_scale = 0)
-    {
-        return std::make_shared<FunctionDictGetOrDefault>(context, dec_scale);
-    }
-
-    explicit FunctionDictGetOrDefault(const Context & context_, UInt32 dec_scale = 0)
-        : helper(context_)
-        , decimal_scale(dec_scale)
-    {}
-
-    String getName() const override { return name; }
-
-private:
-    size_t getNumberOfArguments() const override { return 4; }
-
-    bool useDefaultImplementationForConstants() const final { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        if (!isString(arguments[0]))
-            throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!isString(arguments[1]))
-            throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!WhichDataType(arguments[2]).isUInt64() &&
-            !isTuple(arguments[2]))
-            throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!checkAndGetDataType<DataType>(arguments[3].get()))
-            throw Exception{"Illegal type " + arguments[3]->getName() + " of fourth argument of function " + getName()
-                + ", must be " + TypeName<Type>::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if constexpr (IsDataTypeDecimal<DataType>)
-            return std::make_shared<DataType>(DataType::maxPrecision(), decimal_scale);
-        else
-            return std::make_shared<DataType>();
-    }
-
-    bool isDeterministic() const override { return false; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        if (input_rows_count == 0)
-            return result_type->createColumn();
-
-        auto dict = helper.getDictionary(arguments[0]);
-        ColumnPtr res;
-
-        if (!((res = executeDispatch<FlatDictionary>(arguments, dict))
-            || (res = executeDispatch<HashedDictionary>(arguments, dict))
-            || (res = executeDispatch<DirectDictionary>(arguments, dict))
-            || (res = executeDispatch<CacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatch<SSDCacheDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<ComplexKeyHashedDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyDirectDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyCacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
-#endif
-#if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexCell>(arguments, dict))))
-            throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
-
-        return res;
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const auto * id_col_untyped = arguments[2].column.get();
-        if (const auto * id_col = checkAndGetColumn<ColumnUInt64>(id_col_untyped))
-            return executeDispatch(arguments, dict, attr_name, id_col);
-        else if (const auto * id_col_const = checkAndGetColumnConst<ColumnVector<UInt64>>(id_col_untyped))
-            return executeDispatch(arguments, dict, attr_name, id_col_const);
-        else
-            throw Exception{"Third argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(
-            const ColumnsWithTypeAndName & arguments, const DictionaryType * dict,
-            const std::string & attr_name, const ColumnUInt64 * id_col) const
-    {
-        const auto * default_col_untyped = arguments[3].column.get();
-
-        if (const auto default_col = checkAndGetColumn<ColVec>(default_col_untyped))
-        {
-            /// vector ids, vector defaults
-            typename ColVec::MutablePtr out;
-            if constexpr (IsDataTypeDecimal<DataType>)
-                out = ColVec::create(id_col->size(), decimal_scale);
-            else
-                out = ColVec::create(id_col->size());
-            const auto & ids = id_col->getData();
-            auto & data = out->getData();
-            const auto & defs = default_col->getData();
-            DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, defs, data);
-            return out;
-        }
-        else if (const auto default_col_const = checkAndGetColumnConst<ColVec>(default_col_untyped))
-        {
-            /// vector ids, const defaults
-            typename ColVec::MutablePtr out;
-            if constexpr (IsDataTypeDecimal<DataType>)
-                out = ColVec::create(id_col->size(), decimal_scale);
-            else
-                out = ColVec::create(id_col->size());
-            const auto & ids = id_col->getData();
-            auto & data = out->getData();
-            const auto def = default_col_const->template getValue<Type>();
-            DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, def, data);
-            return out;
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be " + TypeName<Type>::get(), ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(
-            const ColumnsWithTypeAndName & arguments, const DictionaryType * dict,
-            const std::string & attr_name, const ColumnConst * id_col) const
-    {
-        const auto * default_col_untyped = arguments[3].column.get();
-
-        if (const auto default_col = checkAndGetColumn<ColVec>(default_col_untyped))
-        {
-            /// const ids, vector defaults
-            const PaddedPODArray<UInt64> ids(1, id_col->getValue<UInt64>());
-            PaddedPODArray<UInt8> flags(1);
-            dict->has(ids, flags);
-            if (flags.front())
-            {
-                if constexpr (IsDataTypeDecimal<DataType>)
-                {
-                    DecimalPaddedPODArray<Type> data(1, decimal_scale);
-                    DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, Type(), data);
-                    return DataType(DataType::maxPrecision(), decimal_scale).createColumnConst(
-                            id_col->size(), toField(data.front(), decimal_scale));
-                }
-                else
-                {
-                    PaddedPODArray<Type> data(1);
-                    DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, Type(), data);
-                    return DataType().createColumnConst(id_col->size(), toField(data.front()));
-                }
-            }
-            else
-                return arguments[3].column; // reuse the default column
-        }
-        else if (const auto default_col_const = checkAndGetColumnConst<ColVec>(default_col_untyped))
-        {
-            /// const ids, const defaults
-            const PaddedPODArray<UInt64> ids(1, id_col->getValue<UInt64>());
-
-            if constexpr (IsDataTypeDecimal<DataType>)
-            {
-                DecimalPaddedPODArray<Type> data(1, decimal_scale);
-                const auto & def = default_col_const->template getValue<Type>();
-                DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, def, data);
-                return DataType(DataType::maxPrecision(), decimal_scale).createColumnConst(
-                        id_col->size(), toField(data.front(), decimal_scale));
-            }
-            else
-            {
-                PaddedPODArray<Type> data(1);
-                const auto & def = default_col_const->template getValue<Type>();
-                DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, def, data);
-                return DataType().createColumnConst(id_col->size(), toField(data.front()));
-            }
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be " + TypeName<Type>::get(), ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchComplex(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const ColumnWithTypeAndName & key_col_with_type = arguments[2];
-
-        /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
-        ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
-
-        const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_col).getColumnsCopy();
-        const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
-
-        /// @todo detect when all key columns are constant
-        const auto rows = key_col->size();
-        typename ColVec::MutablePtr out;
-        if constexpr (IsDataTypeDecimal<DataType>)
-            out = ColVec::create(rows, decimal_scale);
-        else
-            out = ColVec::create(rows);
-        auto & data = out->getData();
-
-        const auto * default_col_untyped = arguments[3].column.get();
-        if (const auto default_col = checkAndGetColumn<ColVec>(default_col_untyped))
-        {
-            /// const defaults
-            const auto & defs = default_col->getData();
-
-            DictGetTraits<DataType>::getOrDefault(dict, attr_name, key_columns, key_types, defs, data);
-        }
-        else if (const auto default_col_const = checkAndGetColumnConst<ColVec>(default_col_untyped))
-        {
-            const auto def = default_col_const->template getValue<Type>();
-
-            DictGetTraits<DataType>::getOrDefault(dict, attr_name, key_columns, key_types, def, data);
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be " + TypeName<Type>::get(), ErrorCodes::ILLEGAL_COLUMN};
-
-        return out;
-    }
-
-    mutable FunctionDictHelper helper;
-    UInt32 decimal_scale;
-};
+template<typename DataType, typename Name>
+using FunctionDictGetOrDefault = FunctionDictGetImpl<DataType, Name, DictionaryGetFunctionType::getOrDefault>;
 
 struct NameDictGetUInt8OrDefault { static constexpr auto name = "dictGetUInt8OrDefault"; };
 struct NameDictGetUInt16OrDefault { static constexpr auto name = "dictGetUInt16OrDefault"; };
@@ -1315,6 +503,7 @@ struct NameDictGetUUIDOrDefault { static constexpr auto name = "dictGetUUIDOrDef
 struct NameDictGetDecimal32OrDefault { static constexpr auto name = "dictGetDecimal32OrDefault"; };
 struct NameDictGetDecimal64OrDefault { static constexpr auto name = "dictGetDecimal64OrDefault"; };
 struct NameDictGetDecimal128OrDefault { static constexpr auto name = "dictGetDecimal128OrDefault"; };
+struct NameDictGetStringOrDefault { static constexpr auto name = "dictGetStringOrDefault"; };
 
 using FunctionDictGetUInt8OrDefault = FunctionDictGetOrDefault<DataTypeUInt8, NameDictGetUInt8OrDefault>;
 using FunctionDictGetUInt16OrDefault = FunctionDictGetOrDefault<DataTypeUInt16, NameDictGetUInt16OrDefault>;
@@ -1332,296 +521,7 @@ using FunctionDictGetUUIDOrDefault = FunctionDictGetOrDefault<DataTypeUUID, Name
 using FunctionDictGetDecimal32OrDefault = FunctionDictGetOrDefault<DataTypeDecimal<Decimal32>, NameDictGetDecimal32OrDefault>;
 using FunctionDictGetDecimal64OrDefault = FunctionDictGetOrDefault<DataTypeDecimal<Decimal64>, NameDictGetDecimal64OrDefault>;
 using FunctionDictGetDecimal128OrDefault = FunctionDictGetOrDefault<DataTypeDecimal<Decimal128>, NameDictGetDecimal128OrDefault>;
-
-
-/// This variant of function derives the result type automatically.
-class FunctionDictGetNoType final : public IFunction
-{
-public:
-    static constexpr auto name = "dictGet";
-
-    static FunctionPtr create(const Context & context)
-    {
-        return std::make_shared<FunctionDictGetNoType>(context);
-    }
-
-    explicit FunctionDictGetNoType(const Context & context_) : context(context_), helper(context_) {}
-
-    String getName() const override { return name; }
-
-private:
-    bool isVariadic() const override { return true; }
-    size_t getNumberOfArguments() const override { return 0; }
-
-    bool useDefaultImplementationForConstants() const final { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
-
-    bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
-    {
-        return helper.isDictGetFunctionInjective(sample_columns);
-    }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        if (arguments.size() != 3 && arguments.size() != 4)
-            throw Exception{"Function " + getName() + " takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        String dict_name;
-        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
-        {
-            dict_name = name_col->getValue<String>();
-        }
-        else
-            throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
-                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        String attr_name;
-        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
-        {
-            attr_name = name_col->getValue<String>();
-        }
-        else
-            throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
-                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!WhichDataType(arguments[2].type).isUInt64() &&
-            !isTuple(arguments[2].type))
-            throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (arguments.size() == 4)
-        {
-            const auto * range_argument = arguments[3].type.get();
-            if (!(range_argument->isValueRepresentedByInteger() &&
-                   range_argument->getSizeOfValueInMemory() <= sizeof(Int64)))
-                throw Exception{"Illegal type " + range_argument->getName() + " of fourth argument of function " + getName()
-                    + ", must be convertible to " + TypeName<Int64>::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
-
-        auto dict = helper.getDictionary(dict_name);
-        const DictionaryStructure & structure = dict->getStructure();
-
-        for (const auto idx : ext::range(0, structure.attributes.size()))
-        {
-            const DictionaryAttribute & attribute = structure.attributes[idx];
-            if (attribute.name == attr_name)
-            {
-                WhichDataType dt = attribute.type;
-                switch (dt.idx)
-                {
-                    case TypeIndex::String:
-                    case TypeIndex::FixedString:
-                        impl = FunctionDictGetString::create(context);
-                        break;
-                    case TypeIndex::UInt8:
-                        impl = FunctionDictGetUInt8::create(context);
-                        break;
-                    case TypeIndex::UInt16:
-                        impl = FunctionDictGetUInt16::create(context);
-                        break;
-                    case TypeIndex::UInt32:
-                        impl = FunctionDictGetUInt32::create(context);
-                        break;
-                    case TypeIndex::UInt64:
-                        impl = FunctionDictGetUInt64::create(context);
-                        break;
-                    case TypeIndex::Int8:
-                        impl = FunctionDictGetInt8::create(context);
-                        break;
-                    case TypeIndex::Int16:
-                        impl = FunctionDictGetInt16::create(context);
-                        break;
-                    case TypeIndex::Int32:
-                        impl = FunctionDictGetInt32::create(context);
-                        break;
-                    case TypeIndex::Int64:
-                        impl = FunctionDictGetInt64::create(context);
-                        break;
-                    case TypeIndex::Float32:
-                        impl = FunctionDictGetFloat32::create(context);
-                        break;
-                    case TypeIndex::Float64:
-                        impl = FunctionDictGetFloat64::create(context);
-                        break;
-                    case TypeIndex::Date:
-                        impl = FunctionDictGetDate::create(context);
-                        break;
-                    case TypeIndex::DateTime:
-                        impl = FunctionDictGetDateTime::create(context);
-                        break;
-                    case TypeIndex::UUID:
-                        impl = FunctionDictGetUUID::create(context);
-                        break;
-                    case TypeIndex::Decimal32:
-                        impl = FunctionDictGetDecimal32::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    case TypeIndex::Decimal64:
-                        impl = FunctionDictGetDecimal64::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    case TypeIndex::Decimal128:
-                        impl = FunctionDictGetDecimal128::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    default:
-                        throw Exception("Unknown dictGet type", ErrorCodes::UNKNOWN_TYPE);
-                }
-                return attribute.type;
-            }
-        }
-        throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS};
-    }
-
-    bool isDeterministic() const override { return false; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        return impl->executeImpl(arguments, result_type, input_rows_count);
-    }
-
-    const Context & context;
-    mutable FunctionDictHelper helper;
-    mutable FunctionPtr impl; // underlying function used by dictGet function without explicit type info
-};
-
-
-class FunctionDictGetNoTypeOrDefault final : public IFunction
-{
-public:
-    static constexpr auto name = "dictGetOrDefault";
-
-    static FunctionPtr create(const Context & context)
-    {
-        return std::make_shared<FunctionDictGetNoTypeOrDefault>(context);
-    }
-
-    explicit FunctionDictGetNoTypeOrDefault(const Context & context_) : context(context_), helper(context_) {}
-
-    String getName() const override { return name; }
-
-private:
-    size_t getNumberOfArguments() const override { return 4; }
-
-    bool useDefaultImplementationForConstants() const final { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
-
-    bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
-    {
-        return helper.isDictGetFunctionInjective(sample_columns);
-    }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        String dict_name;
-        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
-        {
-            dict_name = name_col->getValue<String>();
-        }
-        else
-            throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
-                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        String attr_name;
-        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
-        {
-            attr_name = name_col->getValue<String>();
-        }
-        else
-            throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
-                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!WhichDataType(arguments[2].type).isUInt64() &&
-            !isTuple(arguments[2].type))
-            throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        auto dict = helper.getDictionary(dict_name);
-        const DictionaryStructure & structure = dict->getStructure();
-
-        for (const auto idx : ext::range(0, structure.attributes.size()))
-        {
-            const DictionaryAttribute & attribute = structure.attributes[idx];
-            if (attribute.name == attr_name)
-            {
-                auto arg_type = arguments[3].type;
-                WhichDataType dt = attribute.type;
-
-                if ((arg_type->getTypeId() != dt.idx) || (dt.isStringOrFixedString() && !isString(arg_type)))
-                    throw Exception{"Illegal type " + arg_type->getName() + " of fourth argument of function " + getName() +
-                        ", must be " + getTypeName(dt.idx) + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-                switch (dt.idx)
-                {
-                    case TypeIndex::String:
-                        impl = FunctionDictGetStringOrDefault::create(context);
-                        break;
-                    case TypeIndex::UInt8:
-                        impl = FunctionDictGetUInt8OrDefault::create(context);
-                        break;
-                    case TypeIndex::UInt16:
-                        impl = FunctionDictGetUInt16OrDefault::create(context);
-                        break;
-                    case TypeIndex::UInt32:
-                        impl = FunctionDictGetUInt32OrDefault::create(context);
-                        break;
-                    case TypeIndex::UInt64:
-                        impl = FunctionDictGetUInt64OrDefault::create(context);
-                        break;
-                    case TypeIndex::Int8:
-                        impl = FunctionDictGetInt8OrDefault::create(context);
-                        break;
-                    case TypeIndex::Int16:
-                        impl = FunctionDictGetInt16OrDefault::create(context);
-                        break;
-                    case TypeIndex::Int32:
-                        impl = FunctionDictGetInt32OrDefault::create(context);
-                        break;
-                    case TypeIndex::Int64:
-                        impl = FunctionDictGetInt64OrDefault::create(context);
-                        break;
-                    case TypeIndex::Float32:
-                        impl = FunctionDictGetFloat32OrDefault::create(context);
-                        break;
-                    case TypeIndex::Float64:
-                        impl = FunctionDictGetFloat64OrDefault::create(context);
-                        break;
-                    case TypeIndex::Date:
-                        impl = FunctionDictGetDateOrDefault::create(context);
-                        break;
-                    case TypeIndex::DateTime:
-                        impl = FunctionDictGetDateTimeOrDefault::create(context);
-                        break;
-                    case TypeIndex::UUID:
-                        impl = FunctionDictGetUUIDOrDefault::create(context);
-                        break;
-                    case TypeIndex::Decimal32:
-                        impl = FunctionDictGetDecimal32OrDefault::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    case TypeIndex::Decimal64:
-                        impl = FunctionDictGetDecimal64OrDefault::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    case TypeIndex::Decimal128:
-                        impl = FunctionDictGetDecimal128OrDefault::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    default:
-                        throw Exception("Unknown dictGetOrDefault type", ErrorCodes::UNKNOWN_TYPE);
-                }
-
-                return attribute.type;
-            }
-        }
-        throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS};
-    }
-
-    bool isDeterministic() const override { return false; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        return impl->executeImpl(arguments, result_type, input_rows_count);
-    }
-
-    const Context & context;
-    mutable FunctionDictHelper helper;
-    mutable FunctionPtr impl; // underlying function used by dictGet function without explicit type info
-};
+using FunctionDictGetStringOrDefault = FunctionDictGetOrDefault<DataTypeString, NameDictGetStringOrDefault>;
 
 /// Functions to work with hierarchies.
 
@@ -1923,27 +823,4 @@ private:
     mutable FunctionDictHelper helper;
 };
 
-
-template <typename T>
-static const PaddedPODArray<T> & getColumnDataAsPaddedPODArray(const IColumn & column, PaddedPODArray<T> & backup_storage)
-{
-    if (!isColumnConst(column))
-    {
-        if (const auto vector_col = checkAndGetColumn<ColumnVector<T>>(&column))
-        {
-            return vector_col->getData();
-        }
-    }
-
-    const auto full_column = column.convertToFullColumnIfConst();
-
-    // With type conversion and const columns we need to use backup storage here
-    const auto size = full_column->size();
-    backup_storage.resize(size);
-    for (size_t i = 0; i < size; ++i)
-        backup_storage[i] = full_column->getUInt(i);
-
-    return backup_storage;
-}
-
 }
diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h
index aea5829eaef..f066bb1029a 100644
--- a/src/Functions/FunctionsJSON.h
+++ b/src/Functions/FunctionsJSON.h
@@ -25,6 +25,7 @@
 #include <DataTypes/DataTypeTuple.h>
 #include <Interpreters/Context.h>
 #include <ext/range.h>
+#include <type_traits>
 #include <boost/tti/has_member_function.hpp>
 
 #if !defined(ARCADIA_BUILD)
@@ -507,11 +508,20 @@ public:
         }
         else if (element.isDouble())
         {
-            if (!accurate::convertNumeric(element.getDouble(), value))
+            if constexpr (std::is_floating_point_v<NumberType>)
+            {
+                /// We permit inaccurate conversion of double to float.
+                /// Example: double 0.1 from JSON is not representable in float.
+                /// But it will be more convenient for user to perform conversion.
+                value = element.getDouble();
+            }
+            else if (!accurate::convertNumeric(element.getDouble(), value))
                 return false;
         }
         else if (element.isBool() && is_integer_v<NumberType> && convert_bool_to_integer)
+        {
             value = static_cast<NumberType>(element.getBool());
+        }
         else
             return false;
 
diff --git a/src/Functions/FunctionsRound.cpp b/src/Functions/FunctionsRound.cpp
index b1349bd2164..c5ad27a0b90 100644
--- a/src/Functions/FunctionsRound.cpp
+++ b/src/Functions/FunctionsRound.cpp
@@ -8,7 +8,7 @@ namespace DB
 void registerFunctionsRound(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionRound>("round", FunctionFactory::CaseInsensitive);
-    factory.registerFunction<FunctionRoundBankers>("roundBankers", FunctionFactory::CaseInsensitive);
+    factory.registerFunction<FunctionRoundBankers>("roundBankers", FunctionFactory::CaseSensitive);
     factory.registerFunction<FunctionFloor>("floor", FunctionFactory::CaseInsensitive);
     factory.registerFunction<FunctionCeil>("ceil", FunctionFactory::CaseInsensitive);
     factory.registerFunction<FunctionTrunc>("trunc", FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h
index f942b894052..b510f62662e 100644
--- a/src/Functions/FunctionsRound.h
+++ b/src/Functions/FunctionsRound.h
@@ -101,7 +101,8 @@ struct IntegerRoundingComputation
         return scale;
     }
 
-    static ALWAYS_INLINE T computeImpl(T x, T scale)
+    /// Integer overflow is Ok.
+    static ALWAYS_INLINE_NO_SANITIZE_UNDEFINED T computeImpl(T x, T scale)
     {
         switch (rounding_mode)
         {
diff --git a/src/Functions/GCDLCMImpl.h b/src/Functions/GCDLCMImpl.h
index dffd91f8d6a..b83fe37abb6 100644
--- a/src/Functions/GCDLCMImpl.h
+++ b/src/Functions/GCDLCMImpl.h
@@ -21,7 +21,7 @@ namespace ErrorCodes
 }
 
 template <class T>
-inline constexpr bool is_gcd_lcm_implemeted = !(is_big_int_v<T> || std::is_floating_point_v<T>);
+inline constexpr bool is_gcd_lcm_implemeted = !is_big_int_v<T>;
 
 template <typename A, typename B, typename Impl, typename Name>
 struct GCDLCMImpl
@@ -33,7 +33,7 @@ struct GCDLCMImpl
     static inline std::enable_if_t<!is_gcd_lcm_implemeted<Result>, Result>
     apply(A, B)
     {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers and floats", Name::name);
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers", Name::name);
     }
 
     template <typename Result = ResultType>
diff --git a/src/Functions/GatherUtils/Algorithms.h b/src/Functions/GatherUtils/Algorithms.h
index 616257493eb..e174261d76e 100644
--- a/src/Functions/GatherUtils/Algorithms.h
+++ b/src/Functions/GatherUtils/Algorithms.h
@@ -465,7 +465,7 @@ std::vector<size_t> buildKMPPrefixFunction(const SliceType & pattern, const Equa
     for (size_t i = 1; i < pattern.size; ++i)
     {
         result[i] = 0;
-        for (auto length = i; length > 0;)
+        for (size_t length = i; length > 0;)
         {
             length = result[length - 1];
             if (isEqualFunc(pattern, i, length))
@@ -695,7 +695,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
 
             if (size >= 0)
             {
-                auto length = static_cast<size_t>(size);
+                size_t length = static_cast<size_t>(size);
                 if (length > MAX_ARRAY_SIZE)
                     throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
                         length, MAX_ARRAY_SIZE);
@@ -711,7 +711,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
             }
             else
             {
-                auto length = static_cast<size_t>(-size);
+                size_t length = -static_cast<size_t>(size);
                 if (length > MAX_ARRAY_SIZE)
                     throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
                         length, MAX_ARRAY_SIZE);
@@ -744,7 +744,7 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
 
         if (size >= 0)
         {
-            auto length = static_cast<size_t>(size);
+            size_t length = static_cast<size_t>(size);
             if (length > MAX_ARRAY_SIZE)
                 throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
                     length, MAX_ARRAY_SIZE);
@@ -760,7 +760,7 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
         }
         else
         {
-            auto length = static_cast<size_t>(-size);
+            size_t length = -static_cast<size_t>(size);
             if (length > MAX_ARRAY_SIZE)
                 throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
                     length, MAX_ARRAY_SIZE);
diff --git a/src/Functions/GeoHash.cpp b/src/Functions/GeoHash.cpp
index 3ebc6f3d0fc..595bcacd41a 100644
--- a/src/Functions/GeoHash.cpp
+++ b/src/Functions/GeoHash.cpp
@@ -216,9 +216,7 @@ inline Float64 getSpan(uint8_t precision, CoordType type)
 inline uint8_t geohashPrecision(uint8_t precision)
 {
     if (precision == 0 || precision > MAX_PRECISION)
-    {
         precision = MAX_PRECISION;
-    }
 
     return precision;
 }
@@ -281,13 +279,21 @@ GeohashesInBoxPreparedArgs geohashesInBoxPrepare(
         return {};
     }
 
-    longitude_min = std::max(longitude_min, LON_MIN);
-    longitude_max = std::min(longitude_max, LON_MAX);
-    latitude_min = std::max(latitude_min, LAT_MIN);
-    latitude_max = std::min(latitude_max, LAT_MAX);
+    auto saturate = [](Float64 & value, Float64 min, Float64 max)
+    {
+        if (value < min)
+            value = min;
+        else if (value > max)
+            value = max;
+    };
 
-    const auto lon_step = getSpan(precision, LONGITUDE);
-    const auto lat_step = getSpan(precision, LATITUDE);
+    saturate(longitude_min, LON_MIN, LON_MAX);
+    saturate(longitude_max, LON_MIN, LON_MAX);
+    saturate(latitude_min, LAT_MIN, LAT_MAX);
+    saturate(latitude_max, LAT_MIN, LAT_MAX);
+
+    Float64 lon_step = getSpan(precision, LONGITUDE);
+    Float64 lat_step = getSpan(precision, LATITUDE);
 
     /// Align max to the right (or up) border of geohash grid cell to ensure that cell is in result.
     Float64 lon_min = floor(longitude_min / lon_step) * lon_step;
diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h
index a7b5f5a9a13..4c155034b3d 100644
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@@ -135,15 +135,16 @@ struct LowerUpperUTF8Impl
         {
             static const Poco::UTF8Encoding utf8;
 
-            int src_sequence_length = UTF8::seqLength(*src);
+            size_t src_sequence_length = UTF8::seqLength(*src);
 
-            int src_code_point = UTF8::queryConvert(src, src_end - src);
-            if (src_code_point > 0)
+            auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
+            if (src_code_point)
             {
-                int dst_code_point = to_case(src_code_point);
+                int dst_code_point = to_case(*src_code_point);
                 if (dst_code_point > 0)
                 {
-                    int dst_sequence_length = UTF8::convert(dst_code_point, dst, src_end - src);
+                    size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
+                    assert(dst_sequence_length <= 4);
 
                     /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8.
                     /// As an example, this happens for ß and ẞ.
@@ -156,7 +157,9 @@ struct LowerUpperUTF8Impl
                 }
             }
 
-            *dst++ = *src++;
+            *dst = *src;
+            ++dst;
+            ++src;
         }
     }
 
diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h
index d9df4218056..11f3e31e22e 100644
--- a/src/Functions/Regexps.h
+++ b/src/Functions/Regexps.h
@@ -168,7 +168,6 @@ namespace MultiRegexps
         hs_database_t * db = nullptr;
         hs_compile_error_t * compile_error;
 
-
         std::unique_ptr<unsigned int[]> ids;
 
         /// We mark the patterns to provide the callback results.
diff --git a/src/Functions/addressToLine.cpp b/src/Functions/addressToLine.cpp
index 59e347dd348..a115b13e54a 100644
--- a/src/Functions/addressToLine.cpp
+++ b/src/Functions/addressToLine.cpp
@@ -111,12 +111,13 @@ private:
 
         if (const auto * object = symbol_index.findObject(reinterpret_cast<const void *>(addr)))
         {
-            auto dwarf_it = cache.dwarfs.try_emplace(object->name, *object->elf).first;
+            auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first;
             if (!std::filesystem::exists(object->name))
                 return {};
 
             Dwarf::LocationInfo location;
-            if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FAST))
+            std::vector<Dwarf::SymbolizedFrame> frames;  // NOTE: not used in FAST mode.
+            if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FAST, frames))
             {
                 const char * arena_begin = nullptr;
                 WriteBufferFromArena out(cache.arena, arena_begin);
diff --git a/src/Functions/array/arrayAggregation.cpp b/src/Functions/array/arrayAggregation.cpp
index 40afd657abb..e0e246b8af4 100644
--- a/src/Functions/array/arrayAggregation.cpp
+++ b/src/Functions/array/arrayAggregation.cpp
@@ -103,7 +103,7 @@ struct ArrayAggregateImpl
             {
                 using DecimalReturnType = ArrayAggregateResult<typename DataType::FieldType, aggregate_operation>;
                 UInt32 scale = getDecimalScale(*expression_return);
-                result = std::make_shared<DataTypeDecimal<DecimalReturnType>>(DecimalUtils::maxPrecision<DecimalReturnType>(), scale);
+                result = std::make_shared<DataTypeDecimal<DecimalReturnType>>(DecimalUtils::max_precision<DecimalReturnType>, scale);
 
                 return true;
             }
diff --git a/src/Functions/array/arrayCumSum.cpp b/src/Functions/array/arrayCumSum.cpp
index 40c0cd4ade2..9a6eafb8822 100644
--- a/src/Functions/array/arrayCumSum.cpp
+++ b/src/Functions/array/arrayCumSum.cpp
@@ -37,7 +37,7 @@ struct ArrayCumSumImpl
         if (which.isDecimal())
         {
             UInt32 scale = getDecimalScale(*expression_return);
-            DataTypePtr nested = std::make_shared<DataTypeDecimal<Decimal128>>(DecimalUtils::maxPrecision<Decimal128>(), scale);
+            DataTypePtr nested = std::make_shared<DataTypeDecimal<Decimal128>>(DecimalUtils::max_precision<Decimal128>, scale);
             return std::make_shared<DataTypeArray>(nested);
         }
 
@@ -45,6 +45,41 @@ struct ArrayCumSumImpl
     }
 
 
+    template <typename Src, typename Dst>
+    static void NO_SANITIZE_UNDEFINED implConst(
+        size_t size, const IColumn::Offset * __restrict offsets, Dst * __restrict res_values, Src src_value)
+    {
+        size_t pos = 0;
+        for (const auto * end = offsets + size; offsets < end; ++offsets)
+        {
+            auto offset = *offsets;
+            Dst accumulated{};
+            for (; pos < offset; ++pos)
+            {
+                accumulated += src_value;
+                res_values[pos] = accumulated;
+            }
+        }
+    }
+
+    template <typename Src, typename Dst>
+    static void NO_SANITIZE_UNDEFINED implVector(
+        size_t size, const IColumn::Offset * __restrict offsets, Dst * __restrict res_values, const Src * __restrict src_values)
+    {
+        size_t pos = 0;
+        for (const auto * end = offsets + size; offsets < end; ++offsets)
+        {
+            auto offset = *offsets;
+            Dst accumulated{};
+            for (; pos < offset; ++pos)
+            {
+                accumulated += src_values[pos];
+                res_values[pos] = accumulated;
+            }
+        }
+    }
+
+
     template <typename Element, typename Result>
     static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
     {
@@ -75,19 +110,7 @@ struct ArrayCumSumImpl
 
             typename ColVecResult::Container & res_values = res_nested->getData();
             res_values.resize(column_const->size());
-
-            size_t pos = 0;
-            for (auto offset : offsets)
-            {
-                // skip empty arrays
-                if (pos < offset)
-                {
-                    res_values[pos++] = x; // NOLINT
-                    for (; pos < offset; ++pos)
-                        res_values[pos] = res_values[pos - 1] + x;
-                }
-            }
-
+            implConst(offsets.size(), offsets.data(), res_values.data(), x);
             res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
             return true;
         }
@@ -103,18 +126,7 @@ struct ArrayCumSumImpl
 
         typename ColVecResult::Container & res_values = res_nested->getData();
         res_values.resize(data.size());
-
-        size_t pos = 0;
-        for (auto offset : offsets)
-        {
-            // skip empty arrays
-            if (pos < offset)
-            {
-                res_values[pos] = data[pos]; // NOLINT
-                for (++pos; pos < offset; ++pos)
-                    res_values[pos] = res_values[pos - 1] + data[pos];
-            }
-        }
+        implVector(offsets.size(), offsets.data(), res_values.data(), data.data());
         res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
         return true;
 
diff --git a/src/Functions/array/arrayCumSumNonNegative.cpp b/src/Functions/array/arrayCumSumNonNegative.cpp
index ff0f081d70b..2c7362a1605 100644
--- a/src/Functions/array/arrayCumSumNonNegative.cpp
+++ b/src/Functions/array/arrayCumSumNonNegative.cpp
@@ -40,7 +40,7 @@ struct ArrayCumSumNonNegativeImpl
         if (which.isDecimal())
         {
             UInt32 scale = getDecimalScale(*expression_return);
-            DataTypePtr nested = std::make_shared<DataTypeDecimal<Decimal128>>(DecimalUtils::maxPrecision<Decimal128>(), scale);
+            DataTypePtr nested = std::make_shared<DataTypeDecimal<Decimal128>>(DecimalUtils::max_precision<Decimal128>, scale);
             return std::make_shared<DataTypeArray>(nested);
         }
 
@@ -48,6 +48,26 @@ struct ArrayCumSumNonNegativeImpl
     }
 
 
+    template <typename Src, typename Dst>
+    static void NO_SANITIZE_UNDEFINED implVector(
+        size_t size, const IColumn::Offset * __restrict offsets, Dst * __restrict res_values, const Src * __restrict src_values)
+    {
+        size_t pos = 0;
+        for (const auto * end = offsets + size; offsets < end; ++offsets)
+        {
+            auto offset = *offsets;
+            Dst accumulated{};
+            for (; pos < offset; ++pos)
+            {
+                accumulated += src_values[pos];
+                if (accumulated < 0)
+                    accumulated = 0;
+                res_values[pos] = accumulated;
+            }
+        }
+    }
+
+
     template <typename Element, typename Result>
     static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
     {
@@ -70,26 +90,7 @@ struct ArrayCumSumNonNegativeImpl
 
         typename ColVecResult::Container & res_values = res_nested->getData();
         res_values.resize(data.size());
-
-        size_t pos = 0;
-        Result accum_sum = 0;
-        for (auto offset : offsets)
-        {
-            // skip empty arrays
-            if (pos < offset)
-            {
-                accum_sum = data[pos] > 0 ? data[pos] : Element(0); // NOLINT
-                res_values[pos] = accum_sum;
-                for (++pos; pos < offset; ++pos)
-                {
-                    accum_sum = accum_sum + data[pos];
-                    if (accum_sum < 0)
-                        accum_sum = 0;
-
-                    res_values[pos] = accum_sum;
-                }
-            }
-        }
+        implVector(offsets.size(), offsets.data(), res_values.data(), data.data());
         res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
         return true;
 
diff --git a/src/Functions/array/arrayDifference.cpp b/src/Functions/array/arrayDifference.cpp
index c02533c2564..b4b30079a4e 100644
--- a/src/Functions/array/arrayDifference.cpp
+++ b/src/Functions/array/arrayDifference.cpp
@@ -47,6 +47,29 @@ struct ArrayDifferenceImpl
     }
 
 
+    template <typename Element, typename Result>
+    static void NO_SANITIZE_UNDEFINED impl(const Element * __restrict src, Result * __restrict dst, size_t begin, size_t end)
+    {
+        /// First element is zero, then the differences of ith and i-1th elements.
+
+        Element prev{};
+        for (size_t pos = begin; pos < end; ++pos)
+        {
+            if (pos == begin)
+            {
+                dst[pos] = 0;
+                prev = src[pos];
+            }
+            else
+            {
+                Element curr = src[pos];
+                dst[pos] = curr - prev;
+                prev = curr;
+            }
+        }
+    }
+
+
     template <typename Element, typename Result>
     static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
     {
@@ -73,19 +96,15 @@ struct ArrayDifferenceImpl
         size_t pos = 0;
         for (auto offset : offsets)
         {
-            // skip empty arrays
-            if (pos < offset)
-            {
-                res_values[pos] = 0;
-                for (++pos; pos < offset; ++pos)
-                    res_values[pos] = static_cast<Result>(data[pos]) - static_cast<Result>(data[pos - 1]);
-            }
+            impl(data.data(), res_values.data(), pos, offset);
+            pos = offset;
         }
+
         res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
         return true;
-
     }
 
+
     static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
     {
         ColumnPtr res;
@@ -107,7 +126,6 @@ struct ArrayDifferenceImpl
         else
             throw Exception("Unexpected column for arrayDifference: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN);
     }
-
 };
 
 struct NameArrayDifference { static constexpr auto name = "arrayDifference"; };
diff --git a/src/Functions/array/arrayEnumerateExtended.h b/src/Functions/array/arrayEnumerateExtended.h
index 412fe9e7858..39bdd2a515e 100644
--- a/src/Functions/array/arrayEnumerateExtended.h
+++ b/src/Functions/array/arrayEnumerateExtended.h
@@ -353,6 +353,9 @@ bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
         keys_bytes += key_sizes[j];
     }
 
+    if (keys_bytes > 16)
+        return false;
+
     executeMethod<MethodFixed>(offsets, columns, key_sizes, nullptr, res_values);
     return true;
 }
diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp
index 46c99dba483..2050e0c28ab 100644
--- a/src/Functions/array/mapPopulateSeries.cpp
+++ b/src/Functions/array/mapPopulateSeries.cpp
@@ -16,6 +16,7 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int TOO_LARGE_ARRAY_SIZE;
 }
 
 class FunctionMapPopulateSeries : public IFunction
@@ -188,9 +189,13 @@ private:
                 }
             }
 
+            static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30;
+            if (static_cast<size_t>(max_key - min_key) > MAX_ARRAY_SIZE)
+                throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName());
+
             /* fill the result arrays */
             KeyType key;
-            for (key = min_key; key <= max_key; ++key)
+            for (key = min_key;; ++key)
             {
                 to_keys_data.insert(key);
 
@@ -205,6 +210,8 @@ private:
                 }
 
                 ++offset;
+                if (key == max_key)
+                    break;
             }
 
             to_keys_offsets.push_back(offset);
diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp
index 08fc4699c36..aa4b7593c79 100644
--- a/src/Functions/bitAnd.cpp
+++ b/src/Functions/bitAnd.cpp
@@ -37,7 +37,7 @@ struct BitAndImpl
 };
 
 struct NameBitAnd { static constexpr auto name = "bitAnd"; };
-using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true>;
+using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true, false>;
 
 }
 
diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp
index 400c478fcbf..adf2244bc0a 100644
--- a/src/Functions/bitOr.cpp
+++ b/src/Functions/bitOr.cpp
@@ -36,7 +36,7 @@ struct BitOrImpl
 };
 
 struct NameBitOr { static constexpr auto name = "bitOr"; };
-using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true>;
+using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true, false>;
 
 }
 
diff --git a/src/Functions/bitRotateLeft.cpp b/src/Functions/bitRotateLeft.cpp
index aac0197f2c5..9962ac174fd 100644
--- a/src/Functions/bitRotateLeft.cpp
+++ b/src/Functions/bitRotateLeft.cpp
@@ -43,7 +43,7 @@ struct BitRotateLeftImpl
 };
 
 struct NameBitRotateLeft { static constexpr auto name = "bitRotateLeft"; };
-using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft>;
+using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft, true, false>;
 
 }
 
diff --git a/src/Functions/bitRotateRight.cpp b/src/Functions/bitRotateRight.cpp
index e8932eccaa3..8352f2f92bc 100644
--- a/src/Functions/bitRotateRight.cpp
+++ b/src/Functions/bitRotateRight.cpp
@@ -42,7 +42,7 @@ struct BitRotateRightImpl
 };
 
 struct NameBitRotateRight { static constexpr auto name = "bitRotateRight"; };
-using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight>;
+using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight, true, false>;
 
 }
 
diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp
index 9f8adf39741..96e0fbe6d6f 100644
--- a/src/Functions/bitShiftLeft.cpp
+++ b/src/Functions/bitShiftLeft.cpp
@@ -42,7 +42,7 @@ struct BitShiftLeftImpl
 };
 
 struct NameBitShiftLeft { static constexpr auto name = "bitShiftLeft"; };
-using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft>;
+using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft, true, false>;
 
 }
 
diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp
index bc54d3c8b5d..37c3908f8de 100644
--- a/src/Functions/bitShiftRight.cpp
+++ b/src/Functions/bitShiftRight.cpp
@@ -42,7 +42,7 @@ struct BitShiftRightImpl
 };
 
 struct NameBitShiftRight { static constexpr auto name = "bitShiftRight"; };
-using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight>;
+using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight, true, false>;
 
 }
 
diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp
index 54c932d9311..b7e9a84c6c1 100644
--- a/src/Functions/bitTest.cpp
+++ b/src/Functions/bitTest.cpp
@@ -34,7 +34,7 @@ struct BitTestImpl
 };
 
 struct NameBitTest { static constexpr auto name = "bitTest"; };
-using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest>;
+using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest, true, false>;
 
 }
 
diff --git a/src/Functions/bitXor.cpp b/src/Functions/bitXor.cpp
index c5867c8d2ca..d8901c6c9ff 100644
--- a/src/Functions/bitXor.cpp
+++ b/src/Functions/bitXor.cpp
@@ -36,7 +36,7 @@ struct BitXorImpl
 };
 
 struct NameBitXor { static constexpr auto name = "bitXor"; };
-using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true>;
+using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true, false>;
 
 }
 
diff --git a/src/Functions/extractAllGroupsVertical.cpp b/src/Functions/extractAllGroupsVertical.cpp
index 9cbd148b016..bf33eef70f3 100644
--- a/src/Functions/extractAllGroupsVertical.cpp
+++ b/src/Functions/extractAllGroupsVertical.cpp
@@ -18,7 +18,7 @@ namespace DB
 void registerFunctionExtractAllGroupsVertical(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionExtractAllGroups<VerticalImpl>>();
-    factory.registerAlias("extractAllGroups", VerticalImpl::Name, FunctionFactory::CaseInsensitive);
+    factory.registerAlias("extractAllGroups", VerticalImpl::Name, FunctionFactory::CaseSensitive);
 }
 
 }
diff --git a/src/Functions/formatReadableTimeDelta.cpp b/src/Functions/formatReadableTimeDelta.cpp
index 8600e1b1095..9ffdb8464e0 100644
--- a/src/Functions/formatReadableTimeDelta.cpp
+++ b/src/Functions/formatReadableTimeDelta.cpp
@@ -3,6 +3,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnVector.h>
+#include <Common/NaNUtils.h>
 #include <DataTypes/DataTypeString.h>
 #include <IO/WriteBufferFromVector.h>
 #include <IO/WriteHelpers.h>
@@ -134,24 +135,32 @@ public:
             /// Virtual call is Ok (negligible comparing to the rest of calculations).
             Float64 value = arguments[0].column->getFloat64(i);
 
-            bool is_negative = value < 0;
-            if (is_negative)
+            if (!isFinite(value))
             {
-                writeChar('-', buf_to);
-                value = -value;
+                /// Cannot decide what unit it is (years, month), just simply write inf or nan.
+                writeFloatText(value, buf_to);
             }
-
-            /// To output separators between parts: ", " and " and ".
-            bool has_output = false;
-
-            switch (max_unit) /// A kind of Duff Device.
+            else
             {
-                case Years:     processUnit(365 * 24 * 3600, " year", 5, value, buf_to, has_output); [[fallthrough]];
-                case Months:    processUnit(30.5 * 24 * 3600, " month", 6, value, buf_to, has_output); [[fallthrough]];
-                case Days:      processUnit(24 * 3600, " day", 4, value, buf_to, has_output); [[fallthrough]];
-                case Hours:     processUnit(3600, " hour", 5, value, buf_to, has_output); [[fallthrough]];
-                case Minutes:   processUnit(60, " minute", 7, value, buf_to, has_output); [[fallthrough]];
-                case Seconds:   processUnit(1, " second", 7, value, buf_to, has_output);
+                bool is_negative = value < 0;
+                if (is_negative)
+                {
+                    writeChar('-', buf_to);
+                    value = -value;
+                }
+
+                /// To output separators between parts: ", " and " and ".
+                bool has_output = false;
+
+                switch (max_unit) /// A kind of Duff Device.
+                {
+                    case Years:     processUnit(365 * 24 * 3600, " year", 5, value, buf_to, has_output); [[fallthrough]];
+                    case Months:    processUnit(30.5 * 24 * 3600, " month", 6, value, buf_to, has_output); [[fallthrough]];
+                    case Days:      processUnit(24 * 3600, " day", 4, value, buf_to, has_output); [[fallthrough]];
+                    case Hours:     processUnit(3600, " hour", 5, value, buf_to, has_output); [[fallthrough]];
+                    case Minutes:   processUnit(60, " minute", 7, value, buf_to, has_output); [[fallthrough]];
+                    case Seconds:   processUnit(1, " second", 7, value, buf_to, has_output);
+                }
             }
 
             writeChar(0, buf_to);
diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp
index 636512db0de..cd5699bfac5 100644
--- a/src/Functions/fromModifiedJulianDay.cpp
+++ b/src/Functions/fromModifiedJulianDay.cpp
@@ -163,7 +163,7 @@ namespace DB
 
         FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
         {
-            const DataTypePtr & from_type = arguments[0].type;
+            const DataTypePtr & from_type = removeNullable(arguments[0].type);
             DataTypes argument_types = { from_type };
             FunctionBaseImplPtr base;
             auto call = [&](const auto & types) -> bool
@@ -185,7 +185,7 @@ namespace DB
                 * here causes a SEGV. So we must somehow create a
                 * dummy implementation and return it.
                 */
-            if (WhichDataType(from_type).isNullable()) // Nullable(Nothing)
+            if (WhichDataType(from_type).isNothing()) // Nullable(Nothing)
                 return std::make_unique<FunctionBaseFromModifiedJulianDay<Name, DataTypeInt32, nullOnErrors>>(argument_types, return_type);
             else
                 // Should not happen.
diff --git a/src/Functions/gcd.cpp b/src/Functions/gcd.cpp
index 9cb53212c7f..52addfc66ed 100644
--- a/src/Functions/gcd.cpp
+++ b/src/Functions/gcd.cpp
@@ -23,7 +23,7 @@ struct GCDImpl : public GCDLCMImpl<A, B, GCDImpl<A, B>, NameGCD>
     }
 };
 
-using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false>;
+using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false, false>;
 
 }
 
diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp
index 8b9df91e117..801f8b3da7f 100644
--- a/src/Functions/greatCircleDistance.cpp
+++ b/src/Functions/greatCircleDistance.cpp
@@ -95,7 +95,7 @@ void geodistInit()
 
         sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
 
-        sphere_metric_lut[i] = cosf(latitude);
+        sphere_metric_lut[i] = sqrf(cosf(latitude));
     }
 }
 
@@ -182,7 +182,7 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
         ///  (Remember how a plane flies from Moscow to New York)
         /// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
 
-        float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
+        float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
         size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
 
         /// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".
diff --git a/src/Functions/htmlOrXmlCoarseParse.cpp b/src/Functions/htmlOrXmlCoarseParse.cpp
new file mode 100644
index 00000000000..442de3d36b0
--- /dev/null
+++ b/src/Functions/htmlOrXmlCoarseParse.cpp
@@ -0,0 +1,582 @@
+#include <Columns/ColumnString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunctionImpl.h>
+
+#include <utility>
+#include <vector>
+#include <algorithm>
+
+#if USE_HYPERSCAN
+#   include <hs.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int CANNOT_ALLOCATE_MEMORY;
+    extern const int NOT_IMPLEMENTED;
+}
+
+namespace
+{
+struct HxCoarseParseImpl
+{
+private:
+    struct SpanInfo
+    {
+        SpanInfo(): id(0), match_space(std::pair<unsigned long long, unsigned long long>(0, 0)) {}  // NOLINT
+        SpanInfo(unsigned int matchId, std::pair<unsigned long long, unsigned long long> matchSpan): id(matchId), match_space(matchSpan){} // NOLINT
+        SpanInfo(const SpanInfo& obj)
+        {
+            id = obj.id;
+            match_space = obj.match_space;
+        }
+        SpanInfo& operator=(const SpanInfo& obj) = default;
+
+        unsigned int id;
+        std::pair<unsigned long long, unsigned long long> match_space;  // NOLINT
+    };
+    using SpanElement = std::vector<SpanInfo>;
+    struct Span
+    {
+        Span(): set_script(false), set_style(false), set_semi(false), is_finding_cdata(false) {}
+
+        SpanElement copy_stack;         // copy area
+        SpanElement tag_stack;          // regexp area
+        SpanInfo script_ptr;            // script pointer
+        bool set_script;                // whether set script
+        SpanInfo style_ptr;             // style pointer
+        bool set_style;                 // whether set style
+        SpanInfo semi_ptr;              // tag ptr
+        bool set_semi;                  // whether set semi
+
+        bool is_finding_cdata;
+    };
+
+    static inline void copyZone(
+        ColumnString::Offset& current_dst_string_offset,
+        ColumnString::Offset& current_copy_loc,
+        ColumnString::Chars& dst_chars,
+        const ColumnString::Chars& src_chars,
+        size_t bytes_to_copy,
+        unsigned is_space
+    )
+    {
+        bool is_last_space = false;
+        if (current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' ')
+        {
+            is_last_space = true;
+        }
+        if (bytes_to_copy == 0)
+        {
+            if (is_space && !is_last_space)
+            {
+                dst_chars[current_dst_string_offset++] = ' ';
+            }
+        }
+        else
+        {
+            if (is_last_space && src_chars[current_copy_loc] == ' ')
+            {
+                --bytes_to_copy;
+                ++current_copy_loc;
+            }
+            if (bytes_to_copy > 0)
+            {
+                memcpySmallAllowReadWriteOverflow15(
+                    &dst_chars[current_dst_string_offset], &src_chars[current_copy_loc], bytes_to_copy);
+                current_dst_string_offset += bytes_to_copy;
+            }
+
+            // separator is space and last character is not space.
+            if (is_space && !(current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' '))
+            {
+                dst_chars[current_dst_string_offset++] = ' ';
+            }
+        }
+        // return;
+    }
+    static inline void popArea(SpanElement& stack, unsigned long long from, unsigned long long to)  //NOLINT
+    {
+        while (!stack.empty())
+        {
+            if (to > stack.back().match_space.second && from < stack.back().match_space.second)
+            {
+                stack.pop_back();
+            }
+            else
+            {
+                break;
+            }
+        }
+        // return;
+    }
+
+    static void dealCommonTag(Span* matches)
+    {
+        while (!matches->copy_stack.empty() && matches->copy_stack.back().id != 10)
+        {
+            matches->copy_stack.pop_back();
+        }
+        if (!matches->copy_stack.empty())
+        {
+            matches->copy_stack.pop_back();
+        }
+        unsigned long long from;    // NOLINT
+        unsigned long long to;      // NOLINT
+        unsigned id;
+        for (auto begin = matches->tag_stack.begin(); begin != matches->tag_stack.end(); ++begin)
+        {
+            from = begin->match_space.first;
+            to = begin->match_space.second;
+            id = begin->id;
+            switch (id)
+            {
+                case 12:
+                case 13:
+                {
+                    popArea(matches->copy_stack, from, to);
+                    if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
+                        matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    break;
+                }
+                case 0:
+                case 2:
+                case 3:
+                case 4:
+                case 5:
+                case 6:
+                case 7:
+                case 8:
+                case 9:
+                case 10:
+                {
+                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
+                    {
+                        matches->set_semi = true;
+                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
+                    }
+                    break;
+                }
+                case 1:
+                {
+                    if (matches->set_semi)
+                    {
+                        switch (matches->semi_ptr.id)
+                        {
+                            case 0:
+                            case 2:
+                            case 3:
+                            case 6:
+                            case 7:
+                            case 10:
+                            {
+                                if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_script)
+                                    {
+                                        matches->set_script = true;
+                                        matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_style)
+                                    {
+                                        matches->set_style = true;
+                                        matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
+                                matches->copy_stack.push_back(SpanInfo(0, std::make_pair(matches->semi_ptr.match_space.first, to)));
+                                matches->set_semi = false;
+                                break;
+                            }
+                            case 4:
+                            case 5:
+                            case 8:
+                            case 9:
+                            {
+                                SpanInfo complete_zone;
+
+                                complete_zone.match_space.second = to;
+                                if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->script_ptr.id;
+                                    complete_zone.match_space.first = matches->script_ptr.match_space.first;
+                                    matches->set_script = false;
+                                }
+                                else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->style_ptr.id;
+                                    complete_zone.match_space.first = matches->style_ptr.match_space.first;
+                                    matches->set_style = false;
+                                }
+                                else
+                                {
+                                    complete_zone.id = matches->semi_ptr.id;
+                                    complete_zone.match_space.first = matches->semi_ptr.match_space.first;
+                                }
+                                popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
+                                matches->copy_stack.push_back(complete_zone);
+                                matches->set_semi = false;
+                                break;
+                            }
+                        }
+                    }
+                    break;
+                }
+                default:
+                {
+                    break;
+                }
+            }
+        }
+        // return;
+    }
+    static int spanCollect(unsigned int id,
+                          unsigned long long from,  // NOLINT
+                          unsigned long long to,    // NOLINT
+                          unsigned int , void * ctx)
+    {
+        Span* matches = static_cast<Span*>(ctx);
+        from = id == 12 ? from : to - patterns_length[id];
+
+        if (matches->is_finding_cdata)
+        {
+            if (id == 11)
+            {
+                matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                matches->is_finding_cdata = false;
+                matches->tag_stack.clear();
+                if (matches->semi_ptr.id == 10)
+                {
+                    matches->set_semi = false;
+                }
+            }
+            else if (id == 12 || id == 13)
+            {
+                popArea(matches->copy_stack, from, to);
+                if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
+                    matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+
+                popArea(matches->tag_stack, from, to);
+                if (matches->tag_stack.empty() || from >= matches->tag_stack.back().match_space.second)
+                    matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+            }
+            else
+            {
+                popArea(matches->tag_stack, from, to);
+                matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+            }
+        }
+        else
+        {
+            switch (id)
+            {
+                case 12:
+                case 13:
+                {
+                    popArea(matches->copy_stack, from, to);
+                    if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
+                        matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    break;
+                }
+                case 0:
+                case 2:
+                case 3:
+                case 4:
+                case 5:
+                case 6:
+                case 7:
+                case 8:
+                case 9:
+                {
+                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
+                    {
+                        matches->set_semi = true;
+                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
+                    }
+                    break;
+                }
+                case 10:
+                {
+                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
+                    {
+                        matches->set_semi = true;
+                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
+                    }
+                    matches->is_finding_cdata = true;
+                    matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    break;
+                }
+                case 1:
+                {
+                    if (matches->set_semi)
+                    {
+                        switch (matches->semi_ptr.id)
+                        {
+                            case 0:
+                            case 2:
+                            case 3:
+                            case 6:
+                            case 7:
+                            case 10:
+                            {
+                                if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_script)
+                                    {
+                                        matches->set_script = true;
+                                        matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_style)
+                                    {
+                                        matches->set_style = true;
+                                        matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
+                                matches->copy_stack.push_back(SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to)));
+                                matches->set_semi = false;
+                                break;
+                            }
+                            case 4:
+                            case 5:
+                            case 8:
+                            case 9:
+                            {
+                                SpanInfo complete_zone;
+                                complete_zone.match_space.second = to;
+                                if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->script_ptr.id;
+                                    complete_zone.match_space.first = matches->script_ptr.match_space.first;
+                                    matches->set_script = false;
+                                }
+                                else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->style_ptr.id;
+                                    complete_zone.match_space.first = matches->style_ptr.match_space.first;
+                                    matches->set_style = false;
+                                }
+                                else
+                                {
+                                    complete_zone.id = matches->semi_ptr.id;
+                                    complete_zone.match_space.first = matches->semi_ptr.match_space.first;
+                                }
+                                popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
+                                matches->copy_stack.push_back(complete_zone);
+                                matches->set_semi = false;
+                                break;
+                            }
+                        }
+                    }
+                    break;
+                }
+                default:
+                {
+                    break;
+                }
+            }
+        }
+        return 0;
+    }
+    #if USE_HYPERSCAN
+    static hs_database_t* buildDatabase(const std::vector<const char* > &expressions,
+                                        const std::vector<unsigned> &flags,
+                                        const std::vector<unsigned> &id,
+                                        unsigned int mode)
+    {
+        hs_database_t *db;
+        hs_compile_error_t *compile_err;
+        hs_error_t err;
+        err = hs_compile_multi(expressions.data(), flags.data(), id.data(),
+                            expressions.size(), mode, nullptr, &db, &compile_err);
+
+        if (err != HS_SUCCESS)
+        {
+            hs_free_compile_error(compile_err);
+            throw Exception("Hyper scan database cannot be compiled.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+        }
+        return db;
+    }
+    #endif
+    static std::vector<const char*> patterns;
+    static std::vector<std::size_t> patterns_length;
+    static std::vector<unsigned> patterns_flag;
+    static std::vector<unsigned> ids;
+
+public:
+    static void executeInternal(
+        const ColumnString::Chars & src_chars,
+        const ColumnString::Offsets & src_offsets,
+        ColumnString::Chars & dst_chars,
+        ColumnString::Offsets & dst_offsets)
+    {
+    #if USE_HYPERSCAN
+        hs_database_t * db = buildDatabase(patterns, patterns_flag, ids, HS_MODE_BLOCK);
+        hs_scratch_t* scratch = nullptr;
+        if (hs_alloc_scratch(db, &scratch) != HS_SUCCESS)
+        {
+            hs_free_database(db);
+            throw Exception("Unable to allocate scratch space.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+        }
+        dst_chars.resize(src_chars.size());
+        dst_offsets.resize(src_offsets.size());
+
+        ColumnString::Offset current_src_string_offset = 0;
+        ColumnString::Offset current_dst_string_offset = 0;
+        ColumnString::Offset current_copy_loc;
+        ColumnString::Offset current_copy_end;
+        unsigned is_space;
+        size_t bytes_to_copy;
+        Span match_zoneall;
+
+        for (size_t off = 0; off < src_offsets.size(); ++off)
+        {
+            hs_scan(db, reinterpret_cast<const char *>(&src_chars[current_src_string_offset]), src_offsets[off] - current_src_string_offset, 0, scratch, spanCollect, &match_zoneall);
+            if (match_zoneall.is_finding_cdata)
+            {
+                dealCommonTag(&match_zoneall);
+            }
+            SpanElement& match_zone = match_zoneall.copy_stack;
+            current_copy_loc = current_src_string_offset;
+            if (match_zone.empty())
+            {
+                current_copy_end = src_offsets[off];
+                is_space = 0;
+            }
+            else
+            {
+                current_copy_end = current_src_string_offset + match_zone.begin()->match_space.first;
+                is_space = (match_zone.begin()->id == 12 || match_zone.begin()->id == 13)?1:0;
+            }
+
+            bytes_to_copy = current_copy_end - current_copy_loc;
+            copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
+            for (auto begin = match_zone.begin(); begin != match_zone.end(); ++begin)
+            {
+                current_copy_loc = current_src_string_offset + begin->match_space.second;
+                if (begin + 1 >= match_zone.end())
+                {
+                    current_copy_end = src_offsets[off];
+                    is_space = 0;
+                }
+                else
+                {
+                    current_copy_end = current_src_string_offset + (begin+1)->match_space.first;
+                    is_space = ((begin+1)->id == 12 || (begin+1)->id == 13)?1:0;
+                }
+                bytes_to_copy = current_copy_end - current_copy_loc;
+                copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
+            }
+            if (current_dst_string_offset > 1 && dst_chars[current_dst_string_offset - 2] == ' ')
+            {
+                dst_chars[current_dst_string_offset - 2] = 0;
+                --current_dst_string_offset;
+            }
+            dst_offsets[off] = current_dst_string_offset;
+            current_src_string_offset = src_offsets[off];
+            match_zoneall.copy_stack.clear();
+            match_zoneall.tag_stack.clear();
+        }
+            dst_chars.resize(dst_chars.size());
+            hs_free_scratch(scratch);
+            hs_free_database(db);
+    #else
+        (void)src_chars;
+        (void)src_offsets;
+        (void)dst_chars;
+        (void)dst_offsets;
+        throw Exception(
+            "htmlOrXmlCoarseParse is not implemented when hyperscan is off (is it x86 processor?)",
+            ErrorCodes::NOT_IMPLEMENTED);
+    #endif
+    }
+};
+
+std::vector<const char*> HxCoarseParseImpl::patterns =
+    {
+        "<[^\\s<>]",       // 0  "<", except "< ", "<<", "<>"
+        ">",               // 1  ">"
+        "<script\\s",      // 2  <script xxxxx>
+        "<script",         // 3  <script>
+        "</script\\s",     // 4  </script xxxx>
+        "</script",        // 5  </script>
+        "<style\\s",       // 6  <style xxxxxx>
+        "<style",          // 7  <style>
+        "</style\\s",      // 8  </style xxxxx>
+        "</style",         // 9  </style>
+        "<!\\[CDATA\\[",   // 10 <![CDATA[xxxxxx]]>
+        "\\]\\]>",         // 11 ]]>
+        "\\s{2,}",         // 12 "   ", continuous blanks
+        "[^\\S ]"          // 13 "\n", "\t" and other white space, it does not include single ' '.
+    };
+std::vector<std::size_t> HxCoarseParseImpl::patterns_length =
+    {
+        2, 1, 8, 7, 9, 8, 7, 6, 8, 7, 9, 3, 0, 1
+    };
+#if USE_HYPERSCAN
+std::vector<unsigned> HxCoarseParseImpl::patterns_flag =
+    {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, HS_FLAG_SOM_LEFTMOST, 0
+    };
+#endif
+std::vector<unsigned> HxCoarseParseImpl::ids =
+    {
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
+    };
+
+class FunctionHtmlOrXmlCoarseParse : public IFunction
+{
+public:
+    static constexpr auto name = "htmlOrXmlCoarseParse";
+
+    static FunctionPtr create(const Context &) {return std::make_shared<FunctionHtmlOrXmlCoarseParse>(); }
+
+    String getName() const override {return name;}
+
+    size_t getNumberOfArguments() const override {return 1;}
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (!isString(arguments[0]))
+            throw Exception(
+                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        return arguments[0];
+    }
+
+    bool useDefaultImplementationForConstants() const override {return true;}
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & , size_t) const override
+    {
+        const auto & strcolumn = arguments[0].column;
+        if (const ColumnString* html_sentence = checkAndGetColumn<ColumnString>(strcolumn.get()))
+        {
+            auto col_res = ColumnString::create();
+            HxCoarseParseImpl::executeInternal(html_sentence->getChars(), html_sentence->getOffsets(), col_res->getChars(), col_res->getOffsets());
+            return col_res;
+        }
+        else
+        {
+            throw Exception("First argument for function " + getName() + " must be string.", ErrorCodes::ILLEGAL_COLUMN);
+        }
+    }
+};
+}
+
+void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionHtmlOrXmlCoarseParse>();
+}
+
+}
+#endif
diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 3be4848f1ff..614bfcf700e 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -532,7 +532,7 @@ private:
         return nullptr;
     }
 
-    ColumnPtr executeTuple(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
+    ColumnPtr executeTuple(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
     {
         /// Calculate function for each corresponding elements of tuples.
 
@@ -558,6 +558,7 @@ private:
 
         const DataTypeTuple & type1 = static_cast<const DataTypeTuple &>(*arg1.type);
         const DataTypeTuple & type2 = static_cast<const DataTypeTuple &>(*arg2.type);
+        const DataTypeTuple & tuple_result = static_cast<const DataTypeTuple &>(*result_type);
 
         ColumnsWithTypeAndName temporary_columns(3);
         temporary_columns[0] = arguments[0];
@@ -570,7 +571,7 @@ private:
             temporary_columns[1] = {col1_contents[i], type1.getElements()[i], {}};
             temporary_columns[2] = {col2_contents[i], type2.getElements()[i], {}};
 
-            tuple_columns[i] = executeImpl(temporary_columns, std::make_shared<DataTypeUInt8>(), input_rows_count);
+            tuple_columns[i] = executeImpl(temporary_columns, tuple_result.getElements()[i], input_rows_count);
         }
 
         return ColumnTuple::create(tuple_columns);
@@ -988,7 +989,7 @@ public:
             || (res = executeTyped<UInt128, UInt128>(cond_col, arguments, result_type, input_rows_count))
             || (res = executeString(cond_col, arguments, result_type))
             || (res = executeGenericArray(cond_col, arguments, result_type))
-            || (res = executeTuple(arguments, input_rows_count))))
+            || (res = executeTuple(arguments, result_type, input_rows_count))))
         {
             return executeGeneric(cond_col, arguments, input_rows_count);
         }
diff --git a/src/Functions/ignore.cpp b/src/Functions/ignore.cpp
index 6b02c3a462d..1348144cb05 100644
--- a/src/Functions/ignore.cpp
+++ b/src/Functions/ignore.cpp
@@ -29,6 +29,7 @@ public:
     }
 
     bool useDefaultImplementationForNulls() const override { return false; }
+    bool isSuitableForConstantFolding() const override { return false; }
 
     /// We should never return LowCardinality result, cause we declare that result is always constant zero.
     /// (in getResultIfAlwaysReturnsConstantAndHasArguments)
diff --git a/src/Functions/isDecimalOverflow.cpp b/src/Functions/isDecimalOverflow.cpp
index c4c02e3763e..9104b1f1dee 100644
--- a/src/Functions/isDecimalOverflow.cpp
+++ b/src/Functions/isDecimalOverflow.cpp
@@ -133,7 +133,7 @@ private:
         static_assert(IsDecimalNumber<T>);
         using NativeT = typename T::NativeType;
 
-        if (precision > DecimalUtils::maxPrecision<T>())
+        if (precision > DecimalUtils::max_precision<T>)
             return false;
 
         NativeT pow10 = intExp10OfSize<NativeT>(precision);
diff --git a/src/Functions/lcm.cpp b/src/Functions/lcm.cpp
index 5155a80e6cd..f46e11dfa17 100644
--- a/src/Functions/lcm.cpp
+++ b/src/Functions/lcm.cpp
@@ -54,7 +54,7 @@ struct LCMImpl : public GCDLCMImpl<A, B, LCMImpl<A, B>, NameLCM>
     }
 };
 
-using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false>;
+using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false, false>;
 
 }
 
diff --git a/src/Functions/materialize.h b/src/Functions/materialize.h
index ccdbe455c34..5b06ac36da7 100644
--- a/src/Functions/materialize.h
+++ b/src/Functions/materialize.h
@@ -27,6 +27,8 @@ public:
         return name;
     }
 
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+
     size_t getNumberOfArguments() const override
     {
         return 1;
diff --git a/src/Functions/randomStringUTF8.cpp b/src/Functions/randomStringUTF8.cpp
index 1873a5afde3..163092226f2 100644
--- a/src/Functions/randomStringUTF8.cpp
+++ b/src/Functions/randomStringUTF8.cpp
@@ -119,8 +119,13 @@ public:
                 UInt32 code_point2 = generate_code_point(rand >> 32);
 
                 /// We have padding in column buffers that we can overwrite.
-                pos += UTF8::convert(code_point1, pos, sizeof(int));
-                last_writen_bytes = UTF8::convert(code_point2, pos, sizeof(int));
+                size_t length1 = UTF8::convertCodePointToUTF8(code_point1, pos, sizeof(int));
+                assert(length1 <= 4);
+                pos += length1;
+
+                size_t length2 = UTF8::convertCodePointToUTF8(code_point2, pos, sizeof(int));
+                assert(length2 <= 4);
+                last_writen_bytes = length2;
                 pos += last_writen_bytes;
             }
             offset = pos - data_to.data() + 1;
diff --git a/src/Functions/registerFunctionsDateTime.cpp b/src/Functions/registerFunctionsDateTime.cpp
index 2cb737b8c75..441f28bfb54 100644
--- a/src/Functions/registerFunctionsDateTime.cpp
+++ b/src/Functions/registerFunctionsDateTime.cpp
@@ -69,6 +69,8 @@ void registerFunctionFormatDateTime(FunctionFactory &);
 void registerFunctionFromModifiedJulianDay(FunctionFactory &);
 void registerFunctionDateTrunc(FunctionFactory &);
 
+void registerFunctiontimezoneOffset(FunctionFactory &);
+
 void registerFunctionsDateTime(FunctionFactory & factory)
 {
     registerFunctionToYear(factory);
@@ -136,6 +138,7 @@ void registerFunctionsDateTime(FunctionFactory & factory)
     registerFunctionFormatDateTime(factory);
     registerFunctionFromModifiedJulianDay(factory);
     registerFunctionDateTrunc(factory);
+    registerFunctiontimezoneOffset(factory);
 }
 
 }
diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp
index 653922bbced..3438145981b 100644
--- a/src/Functions/registerFunctionsMiscellaneous.cpp
+++ b/src/Functions/registerFunctionsMiscellaneous.cpp
@@ -45,6 +45,7 @@ void registerFunctionTimeZone(FunctionFactory &);
 void registerFunctionRunningAccumulate(FunctionFactory &);
 void registerFunctionRunningDifference(FunctionFactory &);
 void registerFunctionRunningDifferenceStartingWithFirstValue(FunctionFactory &);
+void registerFunctionRunningConcurrency(FunctionFactory &);
 void registerFunctionFinalizeAggregation(FunctionFactory &);
 void registerFunctionToLowCardinality(FunctionFactory &);
 void registerFunctionLowCardinalityIndices(FunctionFactory &);
@@ -67,6 +68,7 @@ void registerFunctionInitializeAggregation(FunctionFactory &);
 void registerFunctionErrorCodeToName(FunctionFactory &);
 void registerFunctionTcpPort(FunctionFactory &);
 void registerFunctionByteSize(FunctionFactory &);
+void registerFunctionFile(FunctionFactory & factory);
 
 #if USE_ICU
 void registerFunctionConvertCharset(FunctionFactory &);
@@ -112,6 +114,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
     registerFunctionRunningAccumulate(factory);
     registerFunctionRunningDifference(factory);
     registerFunctionRunningDifferenceStartingWithFirstValue(factory);
+    registerFunctionRunningConcurrency(factory);
     registerFunctionFinalizeAggregation(factory);
     registerFunctionToLowCardinality(factory);
     registerFunctionLowCardinalityIndices(factory);
@@ -134,6 +137,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
     registerFunctionErrorCodeToName(factory);
     registerFunctionTcpPort(factory);
     registerFunctionByteSize(factory);
+    registerFunctionFile(factory);
 
 #if USE_ICU
     registerFunctionConvertCharset(factory);
diff --git a/src/Functions/registerFunctionsReinterpret.cpp b/src/Functions/registerFunctionsReinterpret.cpp
index d82274ce9ed..e6fa0402071 100644
--- a/src/Functions/registerFunctionsReinterpret.cpp
+++ b/src/Functions/registerFunctionsReinterpret.cpp
@@ -4,14 +4,10 @@ namespace DB
 class FunctionFactory;
 
 void registerFunctionsReinterpretAs(FunctionFactory & factory);
-void registerFunctionReinterpretAsString(FunctionFactory & factory);
-void registerFunctionReinterpretAsFixedString(FunctionFactory & factory);
 
 void registerFunctionsReinterpret(FunctionFactory & factory)
 {
     registerFunctionsReinterpretAs(factory);
-    registerFunctionReinterpretAsString(factory);
-    registerFunctionReinterpretAsFixedString(factory);
 }
 
 }
diff --git a/src/Functions/registerFunctionsString.cpp b/src/Functions/registerFunctionsString.cpp
index 5cf30dd83a6..b6327dfb92f 100644
--- a/src/Functions/registerFunctionsString.cpp
+++ b/src/Functions/registerFunctionsString.cpp
@@ -6,7 +6,9 @@ namespace DB
 {
 
 class FunctionFactory;
-
+#if USE_HYPERSCAN
+void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory &);
+#endif
 void registerFunctionRepeat(FunctionFactory &);
 void registerFunctionEmpty(FunctionFactory &);
 void registerFunctionNotEmpty(FunctionFactory &);
@@ -45,6 +47,9 @@ void registerFunctionTryBase64Decode(FunctionFactory &);
 
 void registerFunctionsString(FunctionFactory & factory)
 {
+#if USE_HYPERSCAN
+    registerFunctionHtmlOrXmlCoarseParse(factory);
+#endif
     registerFunctionRepeat(factory);
     registerFunctionEmpty(factory);
     registerFunctionNotEmpty(factory);
diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp
index 64facaf0840..3f4ba3d23e1 100644
--- a/src/Functions/reinterpretAs.cpp
+++ b/src/Functions/reinterpretAs.cpp
@@ -1,5 +1,8 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/castTypeToEither.h>
+#include <Functions/FunctionHelpers.h>
+
+#include <Core/callOnTypeIndex.h>
 
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeString.h>
@@ -7,10 +10,14 @@
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypeDateTime64.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnVector.h>
+#include <Columns/ColumnDecimal.h>
 
 #include <Common/typeid_cast.h>
 #include <Common/memcpySmall.h>
@@ -21,178 +28,423 @@ namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 namespace
 {
-template <typename ToDataType, typename Name, bool support_between_float_integer>
-class FunctionReinterpretAs : public IFunction
+
+/** Performs byte reinterpretation similar to reinterpret_cast.
+ *
+ * Following reinterpretations are allowed:
+ * 1. Any type that isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion into FixedString.
+ * 2. Any type that isValueUnambiguouslyRepresentedInContiguousMemoryRegion into String.
+ * 3. Types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
+ * String, and types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID).
+ */
+class FunctionReinterpret : public IFunction
 {
-    template <typename F>
-    static bool castType(const IDataType * type, F && f)
+public:
+    static constexpr auto name = "reinterpret";
+
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpret>(); }
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 2; }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        return castTypeToEither<DataTypeUInt32, DataTypeInt32, DataTypeUInt64, DataTypeInt64, DataTypeFloat32, DataTypeFloat64>(
-            type, std::forward<F>(f));
+        const auto & column = arguments.back().column;
+
+        DataTypePtr from_type = arguments[0].type;
+
+        const auto * type_col = checkAndGetColumnConst<ColumnString>(column.get());
+        if (!type_col)
+            throw Exception("Second argument to " + getName() + " must be a constant string describing type."
+                " Instead there is non-constant column of type " + arguments.back().type->getName(),
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        DataTypePtr to_type = DataTypeFactory::instance().get(type_col->getValue<String>());
+
+        WhichDataType result_reinterpret_type(to_type);
+
+        if (result_reinterpret_type.isFixedString())
+        {
+            if (!from_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
+                throw Exception("Cannot reinterpret " + from_type->getName() +
+                    " as FixedString because it is not fixed size and contiguous in memory",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+        else if (result_reinterpret_type.isString())
+        {
+            if (!from_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
+                throw Exception("Cannot reinterpret " + from_type->getName() +
+                    " as String because it is not contiguous in memory",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+        else if (canBeReinterpretedAsNumeric(result_reinterpret_type))
+        {
+            WhichDataType from_data_type(from_type);
+
+            if (!canBeReinterpretedAsNumeric(from_data_type) && !from_data_type.isStringOrFixedString())
+                throw Exception("Cannot reinterpret " + from_type->getName() + " as " + to_type->getName()
+                    + " because only Numeric, String or FixedString can be reinterpreted in Numeric",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+        else
+            throw Exception("Cannot reinterpret " + from_type->getName() + " as " + to_type->getName()
+                    + " because only reinterpretation in String, FixedString and Numeric types is supported",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        return to_type;
     }
 
-    template <typename From, typename To>
-    static void reinterpretImpl(const PaddedPODArray<From> & from, PaddedPODArray<To> & to)
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
     {
-        size_t size = from.size();
-        to.resize(size);
-        for (size_t i = 0; i < size; ++i)
+        auto from_type = arguments[0].type;
+
+        ColumnPtr result;
+
+        if (!callOnTwoTypeIndexes(from_type->getTypeId(), result_type->getTypeId(), [&](const auto & types)
         {
-            to[i] = unalignedLoad<To>(&(from.data()[i]));
+            using Types = std::decay_t<decltype(types)>;
+            using FromType = typename Types::LeftType;
+            using ToType = typename Types::RightType;
+
+            /// Place this check before std::is_same_v<FromType, ToType> because same FixedString
+            /// types does not necessary have the same byte size fixed value.
+            if constexpr (std::is_same_v<ToType, DataTypeFixedString>)
+            {
+                const IColumn & src = *arguments[0].column;
+                MutableColumnPtr dst = result_type->createColumn();
+
+                ColumnFixedString * dst_concrete = assert_cast<ColumnFixedString *>(dst.get());
+
+                if (src.isFixedAndContiguous() && src.sizeOfValueIfFixed() == dst_concrete->getN())
+                    executeContiguousToFixedString(src, *dst_concrete, dst_concrete->getN());
+                else
+                    executeToFixedString(src, *dst_concrete, dst_concrete->getN());
+
+                result = std::move(dst);
+
+                return true;
+            }
+            else if constexpr (std::is_same_v<FromType, ToType>)
+            {
+                result = arguments[0].column;
+
+                return true;
+            }
+            else if constexpr (std::is_same_v<ToType, DataTypeString>)
+            {
+                const IColumn & src = *arguments[0].column;
+                MutableColumnPtr dst = result_type->createColumn();
+
+                ColumnString * dst_concrete = assert_cast<ColumnString *>(dst.get());
+                executeToString(src, *dst_concrete);
+
+                result = std::move(dst);
+
+                return true;
+            }
+            else if constexpr (CanBeReinterpretedAsNumeric<ToType>)
+            {
+                using ToColumnType = typename ToType::ColumnType;
+                using ToFieldType = typename ToType::FieldType;
+
+                if constexpr (std::is_same_v<FromType, DataTypeString>)
+                {
+                    const auto * col_from = assert_cast<const ColumnString *>(arguments[0].column.get());
+
+                    auto col_res = numericColumnCreateHelper<ToType>(static_cast<const ToType&>(*result_type.get()));
+
+                    const ColumnString::Chars & data_from = col_from->getChars();
+                    const ColumnString::Offsets & offsets_from = col_from->getOffsets();
+                    size_t size = offsets_from.size();
+                    typename ToColumnType::Container & vec_res = col_res->getData();
+                    vec_res.resize(size);
+
+                    size_t offset = 0;
+                    for (size_t i = 0; i < size; ++i)
+                    {
+                        ToFieldType value{};
+                        memcpy(&value,
+                            &data_from[offset],
+                            std::min(static_cast<UInt64>(sizeof(ToFieldType)), offsets_from[i] - offset - 1));
+                        vec_res[i] = value;
+                        offset = offsets_from[i];
+                    }
+
+                    result = std::move(col_res);
+
+                    return true;
+                }
+                else if constexpr (std::is_same_v<FromType, DataTypeFixedString>)
+                {
+                    const auto * col_from_fixed = assert_cast<const ColumnFixedString *>(arguments[0].column.get());
+
+                    auto col_res = numericColumnCreateHelper<ToType>(static_cast<const ToType&>(*result_type.get()));
+
+                    const ColumnString::Chars & data_from = col_from_fixed->getChars();
+                    size_t step = col_from_fixed->getN();
+                    size_t size = data_from.size() / step;
+                    typename ToColumnType::Container & vec_res = col_res->getData();
+                    vec_res.resize(size);
+
+                    size_t offset = 0;
+                    size_t copy_size = std::min(step, sizeof(ToFieldType));
+                    for (size_t i = 0; i < size; ++i)
+                    {
+                        ToFieldType value{};
+                        memcpy(&value, &data_from[offset], copy_size);
+                        vec_res[i] = value;
+                        offset += step;
+                    }
+
+                    result = std::move(col_res);
+
+                    return true;
+                }
+                else if constexpr (CanBeReinterpretedAsNumeric<FromType>)
+                {
+                    using From = typename FromType::FieldType;
+                    using To = typename ToType::FieldType;
+
+                    using FromColumnType = std::conditional_t<IsDecimalNumber<From>, ColumnDecimal<From>, ColumnVector<From>>;
+
+                    const auto * column_from = assert_cast<const FromColumnType*>(arguments[0].column.get());
+
+                    auto column_to = numericColumnCreateHelper<ToType>(static_cast<const ToType&>(*result_type.get()));
+
+                    auto & from = column_from->getData();
+                    auto & to = column_to->getData();
+
+                    size_t size = from.size();
+                    to.resize_fill(size);
+
+                    static constexpr size_t copy_size = std::min(sizeof(From), sizeof(To));
+
+                    for (size_t i = 0; i < size; ++i)
+                        memcpy(static_cast<void*>(&to[i]), static_cast<const void*>(&from[i]), copy_size);
+
+                    result = std::move(column_to);
+
+                    return true;
+                }
+            }
+
+            return false;
+        }))
+        {
+            throw Exception("Cannot reinterpret " + from_type->getName() + " as " + result_type->getName(),
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+
+        return result;
+    }
+private:
+    template <typename T>
+    static constexpr auto CanBeReinterpretedAsNumeric =
+        IsDataTypeDecimalOrNumber<T> ||
+        std::is_same_v<T, DataTypeDate> ||
+        std::is_same_v<T, DataTypeDateTime> ||
+        std::is_same_v<T, DataTypeUUID>;
+
+    static bool canBeReinterpretedAsNumeric(const WhichDataType & type)
+    {
+        return type.isUInt() ||
+            type.isInt() ||
+            type.isDateOrDateTime() ||
+            type.isFloat() ||
+            type.isUUID() ||
+            type.isDecimal();
+    }
+
+    static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
+    {
+        size_t rows = src.size();
+        ColumnFixedString::Chars & data_to = dst.getChars();
+        data_to.resize_fill(n * rows);
+
+        ColumnFixedString::Offset offset = 0;
+        for (size_t i = 0; i < rows; ++i)
+        {
+            StringRef data = src.getDataAt(i);
+
+            std::memcpy(&data_to[offset], data.data, std::min(n, data.size));
+            offset += n;
         }
     }
 
+    static void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
+    {
+        size_t rows = src.size();
+        ColumnFixedString::Chars & data_to = dst.getChars();
+        data_to.resize(n * rows);
+
+        memcpy(data_to.data(), src.getRawData().data, data_to.size());
+    }
+
+    static void NO_INLINE executeToString(const IColumn & src, ColumnString & dst)
+    {
+        size_t rows = src.size();
+        ColumnString::Chars & data_to = dst.getChars();
+        ColumnString::Offsets & offsets_to = dst.getOffsets();
+        offsets_to.resize(rows);
+
+        ColumnString::Offset offset = 0;
+        for (size_t i = 0; i < rows; ++i)
+        {
+            StringRef data = src.getDataAt(i);
+
+            /// Cut trailing zero bytes.
+            while (data.size && data.data[data.size - 1] == 0)
+                --data.size;
+
+            data_to.resize(offset + data.size + 1);
+            memcpy(&data_to[offset], data.data, data.size);
+            offset += data.size;
+            data_to[offset] = 0;
+            ++offset;
+            offsets_to[i] = offset;
+        }
+    }
+
+    template <typename Type>
+    static typename Type::ColumnType::MutablePtr numericColumnCreateHelper(const Type & type)
+    {
+        size_t column_size = 0;
+
+        using ColumnType = typename Type::ColumnType;
+
+        if constexpr (IsDataTypeDecimal<Type>)
+            return ColumnType::create(column_size, type.getScale());
+        else
+            return ColumnType::create(column_size);
+    }
+
+    template <typename FromContainer, typename ToContainer>
+    static void reinterpretImpl(const FromContainer & from, ToContainer & to)
+    {
+        using From = typename FromContainer::value_type;
+        using To = typename ToContainer::value_type;
+
+        size_t size = from.size();
+        to.resize_fill(size);
+
+        static constexpr size_t copy_size = std::min(sizeof(From), sizeof(To));
+
+        for (size_t i = 0; i < size; ++i)
+            memcpy(static_cast<void*>(&to[i]), static_cast<const void*>(&from[i]), copy_size);
+    }
+};
+
+template <typename ToDataType, typename Name>
+class FunctionReinterpretAs : public IFunction
+{
 public:
     static constexpr auto name = Name::name;
     static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAs>(); }
 
-    using ToFieldType = typename ToDataType::FieldType;
-    using ColumnType = typename ToDataType::ColumnType;
-
     String getName() const override { return name; }
 
     size_t getNumberOfArguments() const override { return 1; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        const IDataType & type = *arguments[0];
-        if constexpr (support_between_float_integer)
-        {
-            if (!isStringOrFixedString(type) && !isNumber(type))
-                throw Exception(
-                    "Cannot reinterpret " + type.getName() + " as " + ToDataType().getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-
-            if (isNumber(type))
-            {
-                if (type.getSizeOfValueInMemory() != ToDataType{}.getSizeOfValueInMemory())
-                    throw Exception(
-                        "Cannot reinterpret " + type.getName() + " as " + ToDataType().getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-            }
-        }
-        else
-        {
-            if (!isStringOrFixedString(type))
-                throw Exception(
-                    "Cannot reinterpret " + type.getName() + " as " + ToDataType().getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-        }
-        return std::make_shared<ToDataType>();
-    }
-
     bool useDefaultImplementationForConstants() const override { return true; }
 
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+    static ColumnsWithTypeAndName addTypeColumnToArguments(const ColumnsWithTypeAndName & arguments)
     {
-        if (const ColumnString * col_from = typeid_cast<const ColumnString *>(arguments[0].column.get()))
+        const auto & argument = arguments[0];
+
+        DataTypePtr data_type;
+
+        if constexpr (std::is_same_v<ToDataType, DataTypeFixedString>)
         {
-            auto col_res = ColumnType::create();
+            const auto & type = argument.type;
 
-            const ColumnString::Chars & data_from = col_from->getChars();
-            const ColumnString::Offsets & offsets_from = col_from->getOffsets();
-            size_t size = offsets_from.size();
-            typename ColumnType::Container & vec_res = col_res->getData();
-            vec_res.resize(size);
+            if (!type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
+                throw Exception("Cannot reinterpret " + type->getName() +
+                    " as FixedString because it is not fixed size and contiguous in memory",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-            size_t offset = 0;
-            for (size_t i = 0; i < size; ++i)
-            {
-                ToFieldType value{};
-                memcpy(&value, &data_from[offset], std::min(static_cast<UInt64>(sizeof(ToFieldType)), offsets_from[i] - offset - 1));
-                vec_res[i] = value;
-                offset = offsets_from[i];
-            }
-
-            return col_res;
-        }
-        else if (const ColumnFixedString * col_from_fixed = typeid_cast<const ColumnFixedString *>(arguments[0].column.get()))
-        {
-            auto col_res = ColumnVector<ToFieldType>::create();
-
-            const ColumnString::Chars & data_from = col_from_fixed->getChars();
-            size_t step = col_from_fixed->getN();
-            size_t size = data_from.size() / step;
-            typename ColumnVector<ToFieldType>::Container & vec_res = col_res->getData();
-            vec_res.resize(size);
-
-            size_t offset = 0;
-            size_t copy_size = std::min(step, sizeof(ToFieldType));
-            for (size_t i = 0; i < size; ++i)
-            {
-                ToFieldType value{};
-                memcpy(&value, &data_from[offset], copy_size);
-                vec_res[i] = value;
-                offset += step;
-            }
-
-            return col_res;
-        }
-        else if constexpr (support_between_float_integer)
-        {
-            ColumnPtr res;
-            if (castType(arguments[0].type.get(), [&](const auto & type)
-                {
-                    using DataType = std::decay_t<decltype(type)>;
-                    using T = typename DataType::FieldType;
-
-                    const ColumnVector<T> * col = checkAndGetColumn<ColumnVector<T>>(arguments[0].column.get());
-                    auto col_res = ColumnType::create();
-                    reinterpretImpl(col->getData(), col_res->getData());
-                    res = std::move(col_res);
-
-                    return true;
-                }))
-            {
-                return res;
-            }
-            else
-            {
-                throw Exception(
-                    "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(),
-                    ErrorCodes::ILLEGAL_COLUMN);
-            }
+            size_t type_value_size_in_memory = type->getSizeOfValueInMemory();
+            data_type = std::make_shared<DataTypeFixedString>(type_value_size_in_memory);
         }
         else
-        {
-            throw Exception(
-                "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(),
-                ErrorCodes::ILLEGAL_COLUMN);
-        }
-    }
-};
+            data_type = std::make_shared<ToDataType>();
 
+        auto type_name_column = DataTypeString().createColumnConst(1, data_type->getName());
+        ColumnWithTypeAndName type_column(type_name_column, std::make_shared<DataTypeString>(), "");
+
+        ColumnsWithTypeAndName arguments_with_type
+        {
+            argument,
+            type_column
+        };
+
+        return arguments_with_type;
+    }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        auto arguments_with_type = addTypeColumnToArguments(arguments);
+        return impl.getReturnTypeImpl(arguments_with_type);
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, size_t input_rows_count) const override
+    {
+        auto arguments_with_type = addTypeColumnToArguments(arguments);
+        return impl.executeImpl(arguments_with_type, return_type, input_rows_count);
+    }
+
+    FunctionReinterpret impl;
+};
 
 struct NameReinterpretAsUInt8       { static constexpr auto name = "reinterpretAsUInt8"; };
 struct NameReinterpretAsUInt16      { static constexpr auto name = "reinterpretAsUInt16"; };
 struct NameReinterpretAsUInt32      { static constexpr auto name = "reinterpretAsUInt32"; };
 struct NameReinterpretAsUInt64      { static constexpr auto name = "reinterpretAsUInt64"; };
+struct NameReinterpretAsUInt256     { static constexpr auto name = "reinterpretAsUInt256"; };
 struct NameReinterpretAsInt8        { static constexpr auto name = "reinterpretAsInt8"; };
 struct NameReinterpretAsInt16       { static constexpr auto name = "reinterpretAsInt16"; };
 struct NameReinterpretAsInt32       { static constexpr auto name = "reinterpretAsInt32"; };
 struct NameReinterpretAsInt64       { static constexpr auto name = "reinterpretAsInt64"; };
+struct NameReinterpretAsInt128      { static constexpr auto name = "reinterpretAsInt128"; };
+struct NameReinterpretAsInt256      { static constexpr auto name = "reinterpretAsInt256"; };
 struct NameReinterpretAsFloat32     { static constexpr auto name = "reinterpretAsFloat32"; };
 struct NameReinterpretAsFloat64     { static constexpr auto name = "reinterpretAsFloat64"; };
 struct NameReinterpretAsDate        { static constexpr auto name = "reinterpretAsDate"; };
 struct NameReinterpretAsDateTime    { static constexpr auto name = "reinterpretAsDateTime"; };
 struct NameReinterpretAsUUID        { static constexpr auto name = "reinterpretAsUUID"; };
+struct NameReinterpretAsString      { static constexpr auto name = "reinterpretAsString"; };
+struct NameReinterpretAsFixedString { static constexpr auto name = "reinterpretAsFixedString"; };
+
+using FunctionReinterpretAsUInt8 = FunctionReinterpretAs<DataTypeUInt8, NameReinterpretAsUInt8>;
+using FunctionReinterpretAsUInt16 = FunctionReinterpretAs<DataTypeUInt16, NameReinterpretAsUInt16>;
+using FunctionReinterpretAsUInt32 = FunctionReinterpretAs<DataTypeUInt32, NameReinterpretAsUInt32>;
+using FunctionReinterpretAsUInt64 = FunctionReinterpretAs<DataTypeUInt64, NameReinterpretAsUInt64>;
+using FunctionReinterpretAsUInt256 = FunctionReinterpretAs<DataTypeUInt256, NameReinterpretAsUInt256>;
+using FunctionReinterpretAsInt8 = FunctionReinterpretAs<DataTypeInt8, NameReinterpretAsInt8>;
+using FunctionReinterpretAsInt16 = FunctionReinterpretAs<DataTypeInt16, NameReinterpretAsInt16>;
+using FunctionReinterpretAsInt32 = FunctionReinterpretAs<DataTypeInt32, NameReinterpretAsInt32>;
+using FunctionReinterpretAsInt64 = FunctionReinterpretAs<DataTypeInt64, NameReinterpretAsInt64>;
+using FunctionReinterpretAsInt128 = FunctionReinterpretAs<DataTypeInt128, NameReinterpretAsInt128>;
+using FunctionReinterpretAsInt256 = FunctionReinterpretAs<DataTypeInt256, NameReinterpretAsInt256>;
+using FunctionReinterpretAsFloat32 = FunctionReinterpretAs<DataTypeFloat32, NameReinterpretAsFloat32>;
+using FunctionReinterpretAsFloat64 = FunctionReinterpretAs<DataTypeFloat64, NameReinterpretAsFloat64>;
+using FunctionReinterpretAsDate = FunctionReinterpretAs<DataTypeDate, NameReinterpretAsDate>;
+using FunctionReinterpretAsDateTime = FunctionReinterpretAs<DataTypeDateTime, NameReinterpretAsDateTime>;
+using FunctionReinterpretAsUUID = FunctionReinterpretAs<DataTypeUUID, NameReinterpretAsUUID>;
+
+using FunctionReinterpretAsString = FunctionReinterpretAs<DataTypeString, NameReinterpretAsString>;
+
+using FunctionReinterpretAsFixedString = FunctionReinterpretAs<DataTypeFixedString, NameReinterpretAsFixedString>;
 
-using FunctionReinterpretAsUInt8 = FunctionReinterpretAs<DataTypeUInt8, NameReinterpretAsUInt8, false>;
-using FunctionReinterpretAsUInt16 = FunctionReinterpretAs<DataTypeUInt16, NameReinterpretAsUInt16, false>;
-using FunctionReinterpretAsUInt32 = FunctionReinterpretAs<DataTypeUInt32, NameReinterpretAsUInt32, true>;
-using FunctionReinterpretAsUInt64 = FunctionReinterpretAs<DataTypeUInt64, NameReinterpretAsUInt64, true>;
-using FunctionReinterpretAsInt8 = FunctionReinterpretAs<DataTypeInt8, NameReinterpretAsInt8, false>;
-using FunctionReinterpretAsInt16 = FunctionReinterpretAs<DataTypeInt16, NameReinterpretAsInt16, false>;
-using FunctionReinterpretAsInt32 = FunctionReinterpretAs<DataTypeInt32, NameReinterpretAsInt32, true>;
-using FunctionReinterpretAsInt64 = FunctionReinterpretAs<DataTypeInt64, NameReinterpretAsInt64, true>;
-using FunctionReinterpretAsFloat32 = FunctionReinterpretAs<DataTypeFloat32, NameReinterpretAsFloat32, true>;
-using FunctionReinterpretAsFloat64 = FunctionReinterpretAs<DataTypeFloat64, NameReinterpretAsFloat64, true>;
-using FunctionReinterpretAsDate = FunctionReinterpretAs<DataTypeDate, NameReinterpretAsDate, false>;
-using FunctionReinterpretAsDateTime = FunctionReinterpretAs<DataTypeDateTime, NameReinterpretAsDateTime, false>;
-using FunctionReinterpretAsUUID = FunctionReinterpretAs<DataTypeUUID, NameReinterpretAsUUID, false>;
 }
 
 void registerFunctionsReinterpretAs(FunctionFactory & factory)
@@ -201,15 +453,24 @@ void registerFunctionsReinterpretAs(FunctionFactory & factory)
     factory.registerFunction<FunctionReinterpretAsUInt16>();
     factory.registerFunction<FunctionReinterpretAsUInt32>();
     factory.registerFunction<FunctionReinterpretAsUInt64>();
+    factory.registerFunction<FunctionReinterpretAsUInt256>();
     factory.registerFunction<FunctionReinterpretAsInt8>();
     factory.registerFunction<FunctionReinterpretAsInt16>();
     factory.registerFunction<FunctionReinterpretAsInt32>();
     factory.registerFunction<FunctionReinterpretAsInt64>();
+    factory.registerFunction<FunctionReinterpretAsInt128>();
+    factory.registerFunction<FunctionReinterpretAsInt256>();
     factory.registerFunction<FunctionReinterpretAsFloat32>();
     factory.registerFunction<FunctionReinterpretAsFloat64>();
     factory.registerFunction<FunctionReinterpretAsDate>();
     factory.registerFunction<FunctionReinterpretAsDateTime>();
     factory.registerFunction<FunctionReinterpretAsUUID>();
+
+    factory.registerFunction<FunctionReinterpretAsString>();
+
+    factory.registerFunction<FunctionReinterpretAsFixedString>();
+
+    factory.registerFunction<FunctionReinterpret>();
 }
 
 }
diff --git a/src/Functions/reinterpretAsFixedString.cpp b/src/Functions/reinterpretAsFixedString.cpp
deleted file mode 100644
index 465e7dffc6a..00000000000
--- a/src/Functions/reinterpretAsFixedString.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#include <Functions/FunctionFactory.h>
-
-#include <DataTypes/DataTypeFixedString.h>
-#include <Columns/ColumnFixedString.h>
-#include <Common/typeid_cast.h>
-#include <Common/memcpySmall.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int ILLEGAL_COLUMN;
-}
-
-namespace
-{
-
-class FunctionReinterpretAsFixedString : public IFunction
-{
-public:
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsFixedString>(); }
-
-    static constexpr auto name = "reinterpretAsFixedString";
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    size_t getNumberOfArguments() const override { return 1; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        const IDataType & type = *arguments[0];
-
-        if (type.isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
-            return std::make_shared<DataTypeFixedString>(type.getSizeOfValueInMemory());
-        throw Exception("Cannot reinterpret " + type.getName() + " as FixedString because it is not fixed size and contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-    }
-
-    static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
-    {
-        size_t rows = src.size();
-        ColumnFixedString::Chars & data_to = dst.getChars();
-        data_to.resize(n * rows);
-
-        ColumnFixedString::Offset offset = 0;
-        for (size_t i = 0; i < rows; ++i)
-        {
-            StringRef data = src.getDataAt(i);
-            memcpySmallAllowReadWriteOverflow15(&data_to[offset], data.data, n);
-            offset += n;
-        }
-    }
-
-    static void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
-    {
-        size_t rows = src.size();
-        ColumnFixedString::Chars & data_to = dst.getChars();
-        data_to.resize(n * rows);
-
-        memcpy(data_to.data(), src.getRawData().data, data_to.size());
-    }
-
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
-    {
-        const IColumn & src = *arguments[0].column;
-        MutableColumnPtr dst = result_type->createColumn();
-
-        if (ColumnFixedString * dst_concrete = typeid_cast<ColumnFixedString *>(dst.get()))
-        {
-            if (src.isFixedAndContiguous() && src.sizeOfValueIfFixed() == dst_concrete->getN())
-                executeContiguousToFixedString(src, *dst_concrete, dst_concrete->getN());
-            else
-                executeToFixedString(src, *dst_concrete, dst_concrete->getN());
-        }
-        else
-            throw Exception("Illegal column " + src.getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
-
-        return dst;
-    }
-};
-
-}
-
-void registerFunctionReinterpretAsFixedString(FunctionFactory & factory)
-{
-    factory.registerFunction<FunctionReinterpretAsFixedString>();
-}
-
-}
-
diff --git a/src/Functions/reinterpretAsString.cpp b/src/Functions/reinterpretAsString.cpp
deleted file mode 100644
index 70db8f315bd..00000000000
--- a/src/Functions/reinterpretAsString.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-#include <Functions/FunctionFactory.h>
-
-#include <DataTypes/DataTypeString.h>
-#include <Columns/ColumnString.h>
-#include <Common/typeid_cast.h>
-#include <Common/memcpySmall.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_COLUMN;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
-
-namespace
-{
-
-/** Function for transforming numbers and dates to strings that contain the same set of bytes in the machine representation. */
-class FunctionReinterpretAsString : public IFunction
-{
-public:
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsString>(); }
-
-    static constexpr auto name = "reinterpretAsString";
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    size_t getNumberOfArguments() const override { return 1; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        const IDataType & type = *arguments[0];
-
-        if (type.isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
-            return std::make_shared<DataTypeString>();
-        throw Exception("Cannot reinterpret " + type.getName() + " as String because it is not contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-    }
-
-    static void executeToString(const IColumn & src, ColumnString & dst)
-    {
-        size_t rows = src.size();
-        ColumnString::Chars & data_to = dst.getChars();
-        ColumnString::Offsets & offsets_to = dst.getOffsets();
-        offsets_to.resize(rows);
-
-        ColumnString::Offset offset = 0;
-        for (size_t i = 0; i < rows; ++i)
-        {
-            StringRef data = src.getDataAt(i);
-
-            /// Cut trailing zero bytes.
-            while (data.size && data.data[data.size - 1] == 0)
-                --data.size;
-
-            data_to.resize(offset + data.size + 1);
-            memcpySmallAllowReadWriteOverflow15(&data_to[offset], data.data, data.size);
-            offset += data.size;
-            data_to[offset] = 0;
-            ++offset;
-            offsets_to[i] = offset;
-        }
-    }
-
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
-    {
-        const IColumn & src = *arguments[0].column;
-        MutableColumnPtr dst = result_type->createColumn();
-
-        if (ColumnString * dst_concrete = typeid_cast<ColumnString *>(dst.get()))
-            executeToString(src, *dst_concrete);
-        else
-            throw Exception("Illegal column " + src.getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
-
-        return dst;
-    }
-};
-
-}
-
-void registerFunctionReinterpretAsString(FunctionFactory & factory)
-{
-    factory.registerFunction<FunctionReinterpretAsString>();
-}
-
-}
diff --git a/src/Functions/replicate.cpp b/src/Functions/replicate.cpp
index adbb37a7c91..ca391bec6ce 100644
--- a/src/Functions/replicate.cpp
+++ b/src/Functions/replicate.cpp
@@ -1,3 +1,4 @@
+#include <Functions/replicate.h>
 #include <Functions/IFunctionImpl.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
@@ -11,60 +12,50 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ILLEGAL_COLUMN;
+    extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
 }
 
-namespace
+DataTypePtr FunctionReplicate::getReturnTypeImpl(const DataTypes & arguments) const
 {
+    if (arguments.size() < 2)
+        throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
+                        "Function {} expect at least two arguments, got {}", getName(), arguments.size());
 
-/** Creates an array, multiplying the column (the first argument) by the number of elements in the array (the second argument).
-  */
-class FunctionReplicate : public IFunction
-{
-public:
-    static constexpr auto name = "replicate";
-
-    static FunctionPtr create(const Context &)
+    for (size_t i = 1; i < arguments.size(); ++i)
     {
-        return std::make_shared<FunctionReplicate>();
-    }
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    size_t getNumberOfArguments() const override
-    {
-        return 2;
-    }
-
-    bool useDefaultImplementationForNulls() const override { return false; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
+        const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
         if (!array_type)
-            throw Exception("Second argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-        return std::make_shared<DataTypeArray>(arguments[0]);
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                            "Argument {} for function {} must be array.",
+                            i + 1, getName());
     }
+    return std::make_shared<DataTypeArray>(arguments[0]);
+}
 
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
+ColumnPtr FunctionReplicate::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const
+{
+    ColumnPtr first_column = arguments[0].column;
+    ColumnPtr offsets;
+
+    for (size_t i = 1; i < arguments.size(); ++i)
     {
-        ColumnPtr first_column = arguments[0].column;
-        const ColumnArray * array_column = checkAndGetColumn<ColumnArray>(arguments[1].column.get());
+        const ColumnArray * array_column = checkAndGetColumn<ColumnArray>(arguments[i].column.get());
         ColumnPtr temp_column;
         if (!array_column)
         {
-            const auto * const_array_column = checkAndGetColumnConst<ColumnArray>(arguments[1].column.get());
+            const auto * const_array_column = checkAndGetColumnConst<ColumnArray>(arguments[i].column.get());
             if (!const_array_column)
                 throw Exception("Unexpected column for replicate", ErrorCodes::ILLEGAL_COLUMN);
             temp_column = const_array_column->convertToFullColumn();
             array_column = checkAndGetColumn<ColumnArray>(temp_column.get());
         }
-        return ColumnArray::create(first_column->replicate(array_column->getOffsets())->convertToFullColumnIfConst(), array_column->getOffsetsPtr());
-    }
-};
 
+        if (!offsets || offsets->empty())
+            offsets = array_column->getOffsetsPtr();
+    }
+
+    const auto & offsets_data = assert_cast<const ColumnArray::ColumnOffsets &>(*offsets).getData();
+    return ColumnArray::create(first_column->replicate(offsets_data)->convertToFullColumnIfConst(), offsets);
 }
 
 void registerFunctionReplicate(FunctionFactory & factory)
diff --git a/src/Functions/replicate.h b/src/Functions/replicate.h
new file mode 100644
index 00000000000..9a33951b2a3
--- /dev/null
+++ b/src/Functions/replicate.h
@@ -0,0 +1,40 @@
+#pragma once
+#include <Functions/IFunctionImpl.h>
+
+namespace DB
+{
+
+class Context;
+
+/// Creates an array, multiplying the column (the first argument) by the number of elements in the array (the second argument).
+/// Function may accept more then two arguments. If so, the first array with non-empty offsets is chosen.
+class FunctionReplicate : public IFunction
+{
+public:
+    static constexpr auto name = "replicate";
+
+    static FunctionPtr create(const Context &)
+    {
+        return std::make_shared<FunctionReplicate>();
+    }
+
+    String getName() const override
+    {
+        return name;
+    }
+
+    size_t getNumberOfArguments() const override
+    {
+        return 0;
+    }
+
+    bool isVariadic() const override { return true; }
+
+    bool useDefaultImplementationForNulls() const override { return false; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override;
+};
+
+}
diff --git a/src/Functions/runningConcurrency.cpp b/src/Functions/runningConcurrency.cpp
new file mode 100644
index 00000000000..a225e3152e7
--- /dev/null
+++ b/src/Functions/runningConcurrency.cpp
@@ -0,0 +1,223 @@
+#include <Columns/ColumnVector.h>
+#include <Core/callOnTypeIndex.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Formats/FormatSettings.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/IFunctionImpl.h>
+#include <IO/WriteBufferFromString.h>
+#include <common/defines.h>
+#include <set>
+
+namespace DB
+{
+    namespace ErrorCodes
+    {
+        extern const int ILLEGAL_COLUMN;
+        extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+        extern const int INCORRECT_DATA;
+    }
+
+    template <typename Name, typename ArgDataType, typename ConcurrencyDataType>
+    class ExecutableFunctionRunningConcurrency : public IExecutableFunctionImpl
+    {
+    public:
+        String getName() const override
+        {
+            return Name::name;
+        }
+
+        ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+        {
+            using ColVecArg = typename ArgDataType::ColumnType;
+            const ColVecArg * col_begin = checkAndGetColumn<ColVecArg>(arguments[0].column.get());
+            const ColVecArg * col_end   = checkAndGetColumn<ColVecArg>(arguments[1].column.get());
+            if (!col_begin || !col_end)
+                throw Exception("Constant columns are not supported at the moment",
+                                ErrorCodes::ILLEGAL_COLUMN);
+            const typename ColVecArg::Container & vec_begin = col_begin->getData();
+            const typename ColVecArg::Container & vec_end   = col_end->getData();
+
+            using ColVecConc = typename ConcurrencyDataType::ColumnType;
+            typename ColVecConc::MutablePtr col_concurrency = ColVecConc::create(input_rows_count);
+            typename ColVecConc::Container & vec_concurrency = col_concurrency->getData();
+
+            std::multiset<typename ArgDataType::FieldType> ongoing_until;
+            for (size_t i = 0; i < input_rows_count; ++i)
+            {
+                const auto begin = vec_begin[i];
+                const auto end   = vec_end[i];
+
+                if (unlikely(begin > end))
+                {
+                    const FormatSettings default_format;
+                    WriteBufferFromOwnString buf_begin, buf_end;
+                    arguments[0].type->serializeAsTextQuoted(*(arguments[0].column), i, buf_begin, default_format);
+                    arguments[1].type->serializeAsTextQuoted(*(arguments[1].column), i, buf_end, default_format);
+                    throw Exception(
+                        "Incorrect order of events: " + buf_begin.str() + " > " + buf_end.str(),
+                        ErrorCodes::INCORRECT_DATA);
+                }
+
+                ongoing_until.insert(end);
+
+                // Erase all the elements from "ongoing_until" which
+                // are less than or equal to "begin", i.e. durations
+                // that have already ended. We consider "begin" to be
+                // inclusive, and "end" to be exclusive.
+                ongoing_until.erase(
+                    ongoing_until.begin(), ongoing_until.upper_bound(begin));
+
+                vec_concurrency[i] = ongoing_until.size();
+            }
+
+            return col_concurrency;
+        }
+
+        bool useDefaultImplementationForConstants() const override
+        {
+            return true;
+        }
+    };
+
+    template <typename Name, typename ArgDataType, typename ConcurrencyDataType>
+    class FunctionBaseRunningConcurrency : public IFunctionBaseImpl
+    {
+    public:
+        explicit FunctionBaseRunningConcurrency(DataTypes argument_types_, DataTypePtr return_type_)
+            : argument_types(std::move(argument_types_))
+            , return_type(std::move(return_type_)) {}
+
+        String getName() const override
+        {
+            return Name::name;
+        }
+
+        const DataTypes & getArgumentTypes() const override
+        {
+            return argument_types;
+        }
+
+        const DataTypePtr & getResultType() const override
+        {
+            return return_type;
+        }
+
+        ExecutableFunctionImplPtr prepare(const ColumnsWithTypeAndName &) const override
+        {
+            return std::make_unique<ExecutableFunctionRunningConcurrency<Name, ArgDataType, ConcurrencyDataType>>();
+        }
+
+        bool isStateful() const override
+        {
+            return true;
+        }
+
+    private:
+        DataTypes argument_types;
+        DataTypePtr return_type;
+    };
+
+    template <typename Name, typename ConcurrencyDataType>
+    class RunningConcurrencyOverloadResolver : public IFunctionOverloadResolverImpl
+    {
+        template <typename T>
+        struct TypeTag
+        {
+            using Type = T;
+        };
+
+        /// Call a polymorphic lambda with a type tag of src_type.
+        template <typename F>
+        void dispatchForSourceType(const IDataType & src_type, F && f) const
+        {
+            WhichDataType which(src_type);
+
+            switch (which.idx)
+            {
+            case TypeIndex::Date:       f(TypeTag<DataTypeDate>());       break;
+            case TypeIndex::DateTime:   f(TypeTag<DataTypeDateTime>());   break;
+            case TypeIndex::DateTime64: f(TypeTag<DataTypeDateTime64>()); break;
+            default:
+                throw Exception(
+                    "Arguments for function " + getName() + " must be Date, DateTime, or DateTime64.",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            }
+        }
+
+    public:
+        static constexpr auto name = Name::name;
+
+        static FunctionOverloadResolverImplPtr create(const Context &)
+        {
+            return std::make_unique<RunningConcurrencyOverloadResolver<Name, ConcurrencyDataType>>();
+        }
+
+        String getName() const override
+        {
+            return Name::name;
+        }
+
+        FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
+        {
+            // The type of the second argument must match with that of the first one.
+            if (unlikely(!arguments[1].type->equals(*(arguments[0].type))))
+            {
+                throw Exception(
+                    "Function " + getName() + " must be called with two arguments having the same type.",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            }
+
+            DataTypes argument_types = { arguments[0].type, arguments[1].type };
+            FunctionBaseImplPtr base;
+            dispatchForSourceType(*(arguments[0].type), [&](auto arg_type_tag) // Throws when the type is inappropriate.
+            {
+                using Tag = decltype(arg_type_tag);
+                using ArgDataType = typename Tag::Type;
+
+                base = std::make_unique<FunctionBaseRunningConcurrency<Name, ArgDataType, ConcurrencyDataType>>(argument_types, return_type);
+            });
+
+            return base;
+        }
+
+        DataTypePtr getReturnType(const DataTypes &) const override
+        {
+            return std::make_shared<ConcurrencyDataType>();
+        }
+
+        size_t getNumberOfArguments() const override
+        {
+            return 2;
+        }
+
+        bool isInjective(const ColumnsWithTypeAndName &) const override
+        {
+            return false;
+        }
+
+        bool isStateful() const override
+        {
+            return true;
+        }
+
+        bool useDefaultImplementationForNulls() const override
+        {
+            return false;
+        }
+    };
+
+    struct NameRunningConcurrency
+    {
+        static constexpr auto name = "runningConcurrency";
+    };
+
+    void registerFunctionRunningConcurrency(FunctionFactory & factory)
+    {
+        factory.registerFunction<RunningConcurrencyOverloadResolver<NameRunningConcurrency, DataTypeUInt32>>();
+    }
+}
diff --git a/src/Functions/tests/gtest_number_traits.cpp b/src/Functions/tests/gtest_number_traits.cpp
index 7664b4fcbdc..7f25c6cbeb7 100644
--- a/src/Functions/tests/gtest_number_traits.cpp
+++ b/src/Functions/tests/gtest_number_traits.cpp
@@ -258,7 +258,7 @@ TEST(NumberTraits, Others)
     ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfFloatingPointDivision<DB::UInt16, DB::Int16>::Type()), "Float64");
     ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfFloatingPointDivision<DB::UInt32, DB::Int16>::Type()), "Float64");
     ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfIntegerDivision<DB::UInt8, DB::Int16>::Type()), "Int8");
-    ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfModulo<DB::UInt32, DB::Int8>::Type()), "Int8");
+    ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfModulo<DB::UInt32, DB::Int8>::Type()), "UInt8");
 }
 
 
diff --git a/src/Functions/timezoneOffset.cpp b/src/Functions/timezoneOffset.cpp
new file mode 100644
index 00000000000..5acdb105b2b
--- /dev/null
+++ b/src/Functions/timezoneOffset.cpp
@@ -0,0 +1,19 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/DateTimeTransforms.h>
+#include <Functions/FunctionDateOrDateTimeToSomething.h>
+#include <DataTypes/DataTypesNumber.h>
+
+
+namespace DB
+{
+
+using FunctiontimezoneOffset = FunctionDateOrDateTimeToSomething<DataTypeInt32, TimezoneOffsetImpl>;
+
+void registerFunctiontimezoneOffset(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctiontimezoneOffset>();
+}
+
+}
+
+
diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp
index ab589be9c07..07fbd5a7b96 100644
--- a/src/Functions/transform.cpp
+++ b/src/Functions/transform.cpp
@@ -1,4 +1,6 @@
 #include <mutex>
+#include <ext/bit_cast.h>
+
 #include <Common/FieldVisitors.h>
 #include <DataTypes/DataTypeArray.h>
 #include <Columns/ColumnString.h>
@@ -13,6 +15,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Functions/FunctionFactory.h>
 #include <DataTypes/getLeastSupertype.h>
+#include <Interpreters/convertFieldToType.h>
 
 
 namespace DB
@@ -491,7 +494,7 @@ private:
         dst.resize(size);
         for (size_t i = 0; i < size; ++i)
         {
-            auto it = table.find(src[i]);
+            const auto * it = table.find(ext::bit_cast<UInt64>(src[i]));
             if (it)
                 memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));    /// little endian.
             else
@@ -507,7 +510,7 @@ private:
         dst.resize(size);
         for (size_t i = 0; i < size; ++i)
         {
-            auto it = table.find(src[i]);
+            const auto * it = table.find(ext::bit_cast<UInt64>(src[i]));
             if (it)
                 memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));    /// little endian.
             else
@@ -523,7 +526,7 @@ private:
         dst.resize(size);
         for (size_t i = 0; i < size; ++i)
         {
-            auto it = table.find(src[i]);
+            const auto * it = table.find(ext::bit_cast<UInt64>(src[i]));
             if (it)
                 memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
             else
@@ -541,7 +544,7 @@ private:
         ColumnString::Offset current_dst_offset = 0;
         for (size_t i = 0; i < size; ++i)
         {
-            auto it = table.find(src[i]);
+            const auto * it = table.find(ext::bit_cast<UInt64>(src[i]));
             StringRef ref = it ? it->getMapped() : dst_default;
             dst_data.resize(current_dst_offset + ref.size);
             memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
@@ -562,7 +565,8 @@ private:
         ColumnString::Offset current_dst_default_offset = 0;
         for (size_t i = 0; i < size; ++i)
         {
-            auto it = table.find(src[i]);
+            Field key = src[i];
+            const auto * it = table.find(key.reinterpret<UInt64>());
             StringRef ref;
 
             if (it)
@@ -778,50 +782,66 @@ private:
 
         /// Note: Doesn't check the duplicates in the `from` array.
 
-        if (from[0].getType() != Field::Types::String && to[0].getType() != Field::Types::String)
+        const IDataType & from_type = *arguments[0].type;
+
+        if (from[0].getType() != Field::Types::String)
         {
-            cache.table_num_to_num = std::make_unique<Cache::NumToNum>();
-            auto & table = *cache.table_num_to_num;
-            for (size_t i = 0; i < size; ++i)
+            if (to[0].getType() != Field::Types::String)
             {
-                // Field may be of Float type, but for the purpose of bitwise
-                // equality we can treat them as UInt64, hence the reinterpret().
-                table[from[i].reinterpret<UInt64>()] = (*used_to)[i].reinterpret<UInt64>();
+                cache.table_num_to_num = std::make_unique<Cache::NumToNum>();
+                auto & table = *cache.table_num_to_num;
+                for (size_t i = 0; i < size; ++i)
+                {
+                    Field key = convertFieldToType(from[i], from_type);
+                    if (key.isNull())
+                        continue;
+
+                    // Field may be of Float type, but for the purpose of bitwise
+                    // equality we can treat them as UInt64, hence the reinterpret().
+                    table[key.reinterpret<UInt64>()] = (*used_to)[i].reinterpret<UInt64>();
+                }
+            }
+            else
+            {
+                cache.table_num_to_string = std::make_unique<Cache::NumToString>();
+                auto & table = *cache.table_num_to_string;
+                for (size_t i = 0; i < size; ++i)
+                {
+                    Field key = convertFieldToType(from[i], from_type);
+                    if (key.isNull())
+                        continue;
+
+                    const String & str_to = to[i].get<const String &>();
+                    StringRef ref{cache.string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
+                    table[key.reinterpret<UInt64>()] = ref;
+                }
             }
         }
-        else if (from[0].getType() != Field::Types::String && to[0].getType() == Field::Types::String)
+        else
         {
-            cache.table_num_to_string = std::make_unique<Cache::NumToString>();
-            auto & table = *cache.table_num_to_string;
-            for (size_t i = 0; i < size; ++i)
+            if (to[0].getType() != Field::Types::String)
             {
-                const String & str_to = to[i].get<const String &>();
-                StringRef ref{cache.string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
-                table[from[i].reinterpret<UInt64>()] = ref;
+                cache.table_string_to_num = std::make_unique<Cache::StringToNum>();
+                auto & table = *cache.table_string_to_num;
+                for (size_t i = 0; i < size; ++i)
+                {
+                    const String & str_from = from[i].get<const String &>();
+                    StringRef ref{cache.string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
+                    table[ref] = (*used_to)[i].reinterpret<UInt64>();
+                }
             }
-        }
-        else if (from[0].getType() == Field::Types::String && to[0].getType() != Field::Types::String)
-        {
-            cache.table_string_to_num = std::make_unique<Cache::StringToNum>();
-            auto & table = *cache.table_string_to_num;
-            for (size_t i = 0; i < size; ++i)
+            else
             {
-                const String & str_from = from[i].get<const String &>();
-                StringRef ref{cache.string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
-                table[ref] = (*used_to)[i].reinterpret<UInt64>();
-            }
-        }
-        else if (from[0].getType() == Field::Types::String && to[0].getType() == Field::Types::String)
-        {
-            cache.table_string_to_string = std::make_unique<Cache::StringToString>();
-            auto & table = *cache.table_string_to_string;
-            for (size_t i = 0; i < size; ++i)
-            {
-                const String & str_from = from[i].get<const String &>();
-                const String & str_to = to[i].get<const String &>();
-                StringRef ref_from{cache.string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
-                StringRef ref_to{cache.string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
-                table[ref_from] = ref_to;
+                cache.table_string_to_string = std::make_unique<Cache::StringToString>();
+                auto & table = *cache.table_string_to_string;
+                for (size_t i = 0; i < size; ++i)
+                {
+                    const String & str_from = from[i].get<const String &>();
+                    const String & str_to = to[i].get<const String &>();
+                    StringRef ref_from{cache.string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
+                    StringRef ref_to{cache.string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
+                    table[ref_from] = ref_to;
+                }
             }
         }
 
diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index 9488c9d7d4e..20ba5f846a3 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -39,6 +39,7 @@ SRCS(
     CRC.cpp
     FunctionFQDN.cpp
     FunctionFactory.cpp
+    FunctionFile.cpp
     FunctionHelpers.cpp
     FunctionJoinGet.cpp
     FunctionsAES.cpp
@@ -290,6 +291,7 @@ SRCS(
     hasToken.cpp
     hasTokenCaseInsensitive.cpp
     hostName.cpp
+    htmlOrXmlCoarseParse.cpp
     hypot.cpp
     identity.cpp
     if.cpp
@@ -409,8 +411,6 @@ SRCS(
     registerFunctionsUnixTimestamp64.cpp
     registerFunctionsVisitParam.cpp
     reinterpretAs.cpp
-    reinterpretAsFixedString.cpp
-    reinterpretAsString.cpp
     repeat.cpp
     replaceAll.cpp
     replaceOne.cpp
@@ -425,6 +425,7 @@ SRCS(
     rowNumberInAllBlocks.cpp
     rowNumberInBlock.cpp
     runningAccumulate.cpp
+    runningConcurrency.cpp
     runningDifference.cpp
     runningDifferenceStartingWithFirstValue.cpp
     sigmoid.cpp
@@ -454,6 +455,7 @@ SRCS(
     timeSlot.cpp
     timeSlots.cpp
     timezone.cpp
+    timezoneOffset.cpp
     toColumnTypeName.cpp
     toCustomWeek.cpp
     toDayOfMonth.cpp
diff --git a/src/IO/AsynchronousWriteBuffer.h b/src/IO/AsynchronousWriteBuffer.h
index 74b5804691b..8c44f8c7d4a 100644
--- a/src/IO/AsynchronousWriteBuffer.h
+++ b/src/IO/AsynchronousWriteBuffer.h
@@ -1,10 +1,8 @@
 #pragma once
 
-#include <math.h>
-
 #include <vector>
-
 #include <Common/ThreadPool.h>
+#include <Common/MemoryTracker.h>
 #include <IO/WriteBuffer.h>
 
 
@@ -53,18 +51,14 @@ public:
 
     ~AsynchronousWriteBuffer() override
     {
-        try
-        {
-            if (started)
-                pool.wait();
+        /// FIXME move final flush into the caller
+        MemoryTracker::LockExceptionInThread lock;
 
-            swapBuffers();
-            out.next();
-        }
-        catch (...)
-        {
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-        }
+        if (started)
+            pool.wait();
+
+        swapBuffers();
+        out.next();
     }
 
     /// That is executed in a separate thread
diff --git a/src/IO/BrotliReadBuffer.cpp b/src/IO/BrotliReadBuffer.cpp
index 70d3a76e629..41991ad0516 100644
--- a/src/IO/BrotliReadBuffer.cpp
+++ b/src/IO/BrotliReadBuffer.cpp
@@ -77,7 +77,7 @@ bool BrotliReadBuffer::nextImpl()
         if (in->eof())
         {
             eof = true;
-            return working_buffer.size() != 0;
+            return !working_buffer.empty();
         }
         else
         {
diff --git a/src/IO/BrotliWriteBuffer.cpp b/src/IO/BrotliWriteBuffer.cpp
index e4e3713d379..d14c94ca43d 100644
--- a/src/IO/BrotliWriteBuffer.cpp
+++ b/src/IO/BrotliWriteBuffer.cpp
@@ -6,6 +6,8 @@
 #    include <IO/BrotliWriteBuffer.h>
 #    include <brotli/encode.h>
 
+#include <Common/MemoryTracker.h>
+
 namespace DB
 {
 
@@ -47,14 +49,9 @@ BrotliWriteBuffer::BrotliWriteBuffer(std::unique_ptr<WriteBuffer> out_, int comp
 
 BrotliWriteBuffer::~BrotliWriteBuffer()
 {
-    try
-    {
-        finish();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    finish();
 }
 
 void BrotliWriteBuffer::nextImpl()
diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h
index c22dcbecf7b..198441d8bc1 100644
--- a/src/IO/BufferBase.h
+++ b/src/IO/BufferBase.h
@@ -40,6 +40,7 @@ public:
         inline Position end() const { return end_pos; }
         inline size_t size() const { return size_t(end_pos - begin_pos); }
         inline void resize(size_t size) { end_pos = begin_pos + size; }
+        inline bool empty() const { return size() == 0; }
 
         inline void swap(Buffer & other)
         {
diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h
index 782eea84ed7..f8cc8b7febb 100644
--- a/src/IO/BufferWithOwnMemory.h
+++ b/src/IO/BufferWithOwnMemory.h
@@ -35,10 +35,10 @@ struct Memory : boost::noncopyable, Allocator
     char * m_data = nullptr;
     size_t alignment = 0;
 
-    Memory() {}
+    Memory() = default;
 
     /// If alignment != 0, then allocate memory aligned to specified value.
-    Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_)
+    explicit Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_)
     {
         alloc();
     }
@@ -140,7 +140,7 @@ protected:
     Memory<> memory;
 public:
     /// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership.
-    BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
+    explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
         : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment)
     {
         Base::set(existing_memory ? existing_memory : memory.data(), size);
diff --git a/src/IO/ConcatReadBuffer.h b/src/IO/ConcatReadBuffer.h
index 1df99429e93..c416b0fd892 100644
--- a/src/IO/ConcatReadBuffer.h
+++ b/src/IO/ConcatReadBuffer.h
@@ -25,11 +25,16 @@ protected:
             return false;
 
         /// First reading
-        if (working_buffer.size() == 0 && (*current)->hasPendingData())
+        if (working_buffer.empty())
         {
-            working_buffer = Buffer((*current)->position(), (*current)->buffer().end());
-            return true;
+            if ((*current)->hasPendingData())
+            {
+                working_buffer = Buffer((*current)->position(), (*current)->buffer().end());
+                return true;
+            }
         }
+        else
+            (*current)->position() = position();
 
         if (!(*current)->next())
         {
@@ -51,14 +56,12 @@ protected:
     }
 
 public:
-    ConcatReadBuffer(const ReadBuffers & buffers_) : ReadBuffer(nullptr, 0), buffers(buffers_), current(buffers.begin()) {}
-
-    ConcatReadBuffer(ReadBuffer & buf1, ReadBuffer & buf2) : ReadBuffer(nullptr, 0)
+    explicit ConcatReadBuffer(const ReadBuffers & buffers_) : ReadBuffer(nullptr, 0), buffers(buffers_), current(buffers.begin())
     {
-        buffers.push_back(&buf1);
-        buffers.push_back(&buf2);
-        current = buffers.begin();
+        assert(!buffers.empty());
     }
+
+    ConcatReadBuffer(ReadBuffer & buf1, ReadBuffer & buf2) : ConcatReadBuffer({&buf1, &buf2}) {}
 };
 
 }
diff --git a/src/IO/EmptyReadBuffer.h b/src/IO/EmptyReadBuffer.h
new file mode 100644
index 00000000000..e2189b9943f
--- /dev/null
+++ b/src/IO/EmptyReadBuffer.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <IO/ReadBuffer.h>
+
+namespace DB
+{
+
+/// Just a stub - reads nothing from nowhere.
+class EmptyReadBuffer : public ReadBuffer
+{
+public:
+    EmptyReadBuffer() : ReadBuffer(nullptr, 0) {}
+
+private:
+    bool nextImpl() override { return false; }
+};
+
+}
diff --git a/src/IO/HTTPChunkedReadBuffer.cpp b/src/IO/HTTPChunkedReadBuffer.cpp
new file mode 100644
index 00000000000..bd9bbba4c6c
--- /dev/null
+++ b/src/IO/HTTPChunkedReadBuffer.cpp
@@ -0,0 +1,92 @@
+#include <IO/HTTPChunkedReadBuffer.h>
+
+#include <IO/ReadHelpers.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/hex.h>
+#include <common/arithmeticOverflow.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ARGUMENT_OUT_OF_BOUND;
+    extern const int UNEXPECTED_END_OF_FILE;
+    extern const int CORRUPTED_DATA;
+    extern const int TOO_MANY_BYTES;
+}
+
+size_t HTTPChunkedReadBuffer::readChunkHeader()
+{
+    if (in->eof())
+        throw Exception("Unexpected end of file while reading chunk header of HTTP chunked data", ErrorCodes::UNEXPECTED_END_OF_FILE);
+
+    if (!isHexDigit(*in->position()))
+        throw Exception("Unexpected data instead of HTTP chunk header", ErrorCodes::CORRUPTED_DATA);
+
+    size_t res = 0;
+    do
+    {
+        if (common::mulOverflow(res, 16ul, res) || common::addOverflow<size_t>(res, unhex(*in->position()), res))
+            throw Exception("Chunk size is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+        ++in->position();
+    } while (!in->eof() && isHexDigit(*in->position()));
+
+    /// NOTE: If we want to read any chunk extensions, it should be done here.
+
+    skipToCarriageReturnOrEOF(*in);
+
+    if (in->eof())
+        throw Exception("Unexpected end of file while reading chunk header of HTTP chunked data", ErrorCodes::UNEXPECTED_END_OF_FILE);
+
+    if (res > max_size)
+        throw Exception("Chunk size is too large", ErrorCodes::TOO_MANY_BYTES);
+
+    assertString("\n", *in);
+    return res;
+}
+
+void HTTPChunkedReadBuffer::readChunkFooter()
+{
+    assertString("\r\n", *in);
+}
+
+bool HTTPChunkedReadBuffer::nextImpl()
+{
+    if (!in)
+        return false;
+
+    /// The footer of previous chunk.
+    if (count())
+        readChunkFooter();
+
+    size_t chunk_size = readChunkHeader();
+    if (0 == chunk_size)
+    {
+        readChunkFooter();
+        in.reset();  // prevent double-eof situation.
+        return false;
+    }
+
+    if (in->available() >= chunk_size)
+    {
+        /// Zero-copy read from input.
+        working_buffer = Buffer(in->position(), in->position() + chunk_size);
+        in->position() += chunk_size;
+    }
+    else
+    {
+        /// Chunk is not completely in buffer, copy it to scratch space.
+        memory.resize(chunk_size);
+        in->readStrict(memory.data(), chunk_size);
+        working_buffer = Buffer(memory.data(), memory.data() + chunk_size);
+    }
+
+    /// NOTE: We postpone reading the footer to the next iteration, because it may not be completely in buffer,
+    ///       but we need to keep the current data in buffer available.
+
+    return true;
+}
+
+}
diff --git a/src/IO/HTTPChunkedReadBuffer.h b/src/IO/HTTPChunkedReadBuffer.h
new file mode 100644
index 00000000000..0ccebc69d08
--- /dev/null
+++ b/src/IO/HTTPChunkedReadBuffer.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <IO/BufferWithOwnMemory.h>
+#include <IO/ReadBuffer.h>
+
+namespace DB
+{
+
+/// Reads data with HTTP Chunked Transfer Encoding.
+class HTTPChunkedReadBuffer : public BufferWithOwnMemory<ReadBuffer>
+{
+public:
+    HTTPChunkedReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t max_chunk_size) : in(std::move(in_)), max_size(max_chunk_size) {}
+
+private:
+    std::unique_ptr<ReadBuffer> in;
+    const size_t max_size;
+
+    size_t readChunkHeader();
+    void readChunkFooter();
+
+    bool nextImpl() override;
+};
+
+}
diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp
index d12aa10fe6a..346bbf0427e 100644
--- a/src/IO/HTTPCommon.cpp
+++ b/src/IO/HTTPCommon.cpp
@@ -1,5 +1,6 @@
 #include <IO/HTTPCommon.h>
 
+#include <Server/HTTP/HTTPServerResponse.h>
 #include <Common/DNSResolver.h>
 #include <Common/Exception.h>
 #include <Common/PoolBase.h>
@@ -23,7 +24,6 @@
 #    include <Poco/Net/SecureStreamSocket.h>
 #endif
 
-#include <Poco/Net/HTTPServerResponse.h>
 #include <Poco/Util/Application.h>
 
 #include <tuple>
@@ -266,7 +266,7 @@ namespace
     };
 }
 
-void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout)
+void setResponseDefaultHeaders(HTTPServerResponse & response, unsigned keep_alive_timeout)
 {
     if (!response.getKeepAlive())
         return;
diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h
index 4a81d23a8a3..18e83abb83b 100644
--- a/src/IO/HTTPCommon.h
+++ b/src/IO/HTTPCommon.h
@@ -14,20 +14,13 @@
 #include <IO/ConnectionTimeouts.h>
 
 
-namespace Poco
-{
-namespace Net
-{
-    class HTTPServerResponse;
-}
-}
-
-
 namespace DB
 {
 
 constexpr int HTTP_TOO_MANY_REQUESTS = 429;
 
+class HTTPServerResponse;
+
 class SingleEndpointHTTPSessionPool : public PoolBase<Poco::Net::HTTPClientSession>
 {
 private:
@@ -45,7 +38,7 @@ public:
 using PooledHTTPSessionPtr = SingleEndpointHTTPSessionPool::Entry;
 using HTTPSessionPtr = std::shared_ptr<Poco::Net::HTTPClientSession>;
 
-void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout);
+void setResponseDefaultHeaders(HTTPServerResponse & response, unsigned keep_alive_timeout);
 
 /// Create session object to perform requests and set required parameters.
 HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, bool resolve_host = true);
@@ -54,7 +47,7 @@ HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts &
 PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true);
 PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true);
 
-bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status);
+bool isRedirect(Poco::Net::HTTPResponse::HTTPStatus status);
 
 /** Used to receive response (response headers and possibly body)
   *  after sending data (request headers and possibly body).
@@ -65,5 +58,5 @@ std::istream * receiveResponse(
     Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, bool allow_redirects);
 
 void assertResponseIsOk(
-    const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false);
+    const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false);
 }
diff --git a/src/IO/HashingReadBuffer.h b/src/IO/HashingReadBuffer.h
index 9fcd6dc6b41..08b6de69dcb 100644
--- a/src/IO/HashingReadBuffer.h
+++ b/src/IO/HashingReadBuffer.h
@@ -1,10 +1,11 @@
 #pragma once
 
-#include <IO/ReadBuffer.h>
 #include <IO/HashingWriteBuffer.h>
+#include <IO/ReadBuffer.h>
 
 namespace DB
 {
+
 /*
  * Calculates the hash from the read data. When reading, the data is read from the nested ReadBuffer.
  * Small pieces are copied into its own memory.
@@ -12,14 +13,14 @@ namespace DB
 class HashingReadBuffer : public IHashingBuffer<ReadBuffer>
 {
 public:
-    HashingReadBuffer(ReadBuffer & in_, size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) :
-        IHashingBuffer<ReadBuffer>(block_size_), in(in_)
+    explicit HashingReadBuffer(ReadBuffer & in_, size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE)
+        : IHashingBuffer<ReadBuffer>(block_size_), in(in_)
     {
         working_buffer = in.buffer();
         pos = in.position();
 
         /// calculate hash from the data already read
-        if (working_buffer.size())
+        if (!working_buffer.empty())
         {
             calculateHash(pos, working_buffer.end() - pos);
         }
@@ -39,7 +40,7 @@ private:
         return res;
     }
 
-private:
     ReadBuffer & in;
 };
+
 }
diff --git a/src/IO/HexWriteBuffer.cpp b/src/IO/HexWriteBuffer.cpp
index d7b8a993ce5..4e3403ba74b 100644
--- a/src/IO/HexWriteBuffer.cpp
+++ b/src/IO/HexWriteBuffer.cpp
@@ -1,6 +1,6 @@
 #include <common/types.h>
 #include <Common/hex.h>
-#include <Common/Exception.h>
+#include <Common/MemoryTracker.h>
 #include <IO/HexWriteBuffer.h>
 
 
@@ -22,14 +22,9 @@ void HexWriteBuffer::nextImpl()
 
 HexWriteBuffer::~HexWriteBuffer()
 {
-    try
-    {
-        nextImpl();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    nextImpl();
 }
 
 }
diff --git a/src/IO/IReadableWriteBuffer.h b/src/IO/IReadableWriteBuffer.h
index a02dd4e23cb..539825e3a85 100644
--- a/src/IO/IReadableWriteBuffer.h
+++ b/src/IO/IReadableWriteBuffer.h
@@ -17,7 +17,7 @@ struct IReadableWriteBuffer
         return getReadBufferImpl();
     }
 
-    virtual ~IReadableWriteBuffer() {}
+    virtual ~IReadableWriteBuffer() = default;
 
 protected:
 
diff --git a/src/IO/LZMADeflatingWriteBuffer.cpp b/src/IO/LZMADeflatingWriteBuffer.cpp
index e3051f1de65..5803bc1e9f1 100644
--- a/src/IO/LZMADeflatingWriteBuffer.cpp
+++ b/src/IO/LZMADeflatingWriteBuffer.cpp
@@ -1,4 +1,5 @@
 #include <IO/LZMADeflatingWriteBuffer.h>
+#include <Common/MemoryTracker.h>
 
 #if !defined(ARCADIA_BUILD)
 
@@ -48,16 +49,11 @@ LZMADeflatingWriteBuffer::LZMADeflatingWriteBuffer(
 
 LZMADeflatingWriteBuffer::~LZMADeflatingWriteBuffer()
 {
-    try
-    {
-        finish();
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
 
-        lzma_end(&lstr);
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    finish();
+    lzma_end(&lstr);
 }
 
 void LZMADeflatingWriteBuffer::nextImpl()
diff --git a/src/IO/LZMAInflatingReadBuffer.cpp b/src/IO/LZMAInflatingReadBuffer.cpp
index e30e8df5f9d..6a0a7e5ee31 100644
--- a/src/IO/LZMAInflatingReadBuffer.cpp
+++ b/src/IO/LZMAInflatingReadBuffer.cpp
@@ -66,7 +66,7 @@ bool LZMAInflatingReadBuffer::nextImpl()
         if (in->eof())
         {
             eof = true;
-            return working_buffer.size() != 0;
+            return !working_buffer.empty();
         }
         else
         {
diff --git a/src/IO/LimitReadBuffer.cpp b/src/IO/LimitReadBuffer.cpp
index f36facfdd99..9daffa3a1d3 100644
--- a/src/IO/LimitReadBuffer.cpp
+++ b/src/IO/LimitReadBuffer.cpp
@@ -1,4 +1,5 @@
 #include <IO/LimitReadBuffer.h>
+
 #include <Common/Exception.h>
 
 
@@ -13,8 +14,10 @@ namespace ErrorCodes
 
 bool LimitReadBuffer::nextImpl()
 {
+    assert(position() >= in->position());
+
     /// Let underlying buffer calculate read bytes in `next()` call.
-    in.position() = position();
+    in->position() = position();
 
     if (bytes >= limit)
     {
@@ -24,10 +27,13 @@ bool LimitReadBuffer::nextImpl()
             return false;
     }
 
-    if (!in.next())
+    if (!in->next())
+    {
+        working_buffer = in->buffer();
         return false;
+    }
 
-    working_buffer = in.buffer();
+    working_buffer = in->buffer();
 
     if (limit - bytes < working_buffer.size())
         working_buffer.resize(limit - bytes);
@@ -36,22 +42,44 @@ bool LimitReadBuffer::nextImpl()
 }
 
 
-LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_)
-    : ReadBuffer(in_.position(), 0), in(in_), limit(limit_), throw_exception(throw_exception_), exception_message(std::move(exception_message_))
+LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::string exception_message_)
+    : ReadBuffer(in_ ? in_->position() : nullptr, 0)
+    , in(in_)
+    , owns_in(owns)
+    , limit(limit_)
+    , throw_exception(throw_exception_)
+    , exception_message(std::move(exception_message_))
 {
-    size_t remaining_bytes_in_buffer = in.buffer().end() - in.position();
+    assert(in);
+
+    size_t remaining_bytes_in_buffer = in->buffer().end() - in->position();
     if (remaining_bytes_in_buffer > limit)
         remaining_bytes_in_buffer = limit;
 
-    working_buffer = Buffer(in.position(), in.position() + remaining_bytes_in_buffer);
+    working_buffer = Buffer(in->position(), in->position() + remaining_bytes_in_buffer);
+}
+
+
+LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_)
+    : LimitReadBuffer(&in_, false, limit_, throw_exception_, exception_message_)
+{
+}
+
+
+LimitReadBuffer::LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, std::string exception_message_)
+    : LimitReadBuffer(in_.release(), true, limit_, throw_exception_, exception_message_)
+{
 }
 
 
 LimitReadBuffer::~LimitReadBuffer()
 {
     /// Update underlying buffer's position in case when limit wasn't reached.
-    if (working_buffer.size() != 0)
-        in.position() = position();
+    if (!working_buffer.empty())
+        in->position() = position();
+
+    if (owns_in)
+        delete in;
 }
 
 }
diff --git a/src/IO/LimitReadBuffer.h b/src/IO/LimitReadBuffer.h
index db3d2684ef7..a5fa0f0d5cc 100644
--- a/src/IO/LimitReadBuffer.h
+++ b/src/IO/LimitReadBuffer.h
@@ -12,17 +12,22 @@ namespace DB
   */
 class LimitReadBuffer : public ReadBuffer
 {
+public:
+    LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_ = {});
+    LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, std::string exception_message_ = {});
+    ~LimitReadBuffer() override;
+
 private:
-    ReadBuffer & in;
+    ReadBuffer * in;
+    bool owns_in;
+
     UInt64 limit;
     bool throw_exception;
     std::string exception_message;
 
-    bool nextImpl() override;
+    LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::string exception_message_);
 
-public:
-    LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_ = {});
-    ~LimitReadBuffer() override;
+    bool nextImpl() override;
 };
 
 }
diff --git a/src/IO/MemoryReadWriteBuffer.cpp b/src/IO/MemoryReadWriteBuffer.cpp
index 0b0d9704de6..69bcd52a8d2 100644
--- a/src/IO/MemoryReadWriteBuffer.cpp
+++ b/src/IO/MemoryReadWriteBuffer.cpp
@@ -61,7 +61,7 @@ private:
             position() = nullptr;
         }
 
-        return buffer().size() != 0;
+        return !buffer().empty();
     }
 
     using Container = std::forward_list<BufferBase::Buffer>;
diff --git a/src/IO/PeekableReadBuffer.cpp b/src/IO/PeekableReadBuffer.cpp
index e0e99afbfec..1d999d586b2 100644
--- a/src/IO/PeekableReadBuffer.cpp
+++ b/src/IO/PeekableReadBuffer.cpp
@@ -1,7 +1,9 @@
 #include <IO/PeekableReadBuffer.h>
 
+
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
@@ -107,22 +109,29 @@ bool PeekableReadBuffer::peekNext()
     return sub_buf.next();
 }
 
-void PeekableReadBuffer::rollbackToCheckpoint()
+void PeekableReadBuffer::rollbackToCheckpoint(bool drop)
 {
     checkStateCorrect();
+
     if (!checkpoint)
         throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
     else if (checkpointInOwnMemory() == currentlyReadFromOwnMemory())
         pos = *checkpoint;
     else /// Checkpoint is in own memory and pos is not. Switch to reading from own memory
         BufferBase::set(memory.data(), peeked_size, *checkpoint - memory.data());
+
+    if (drop)
+        dropCheckpoint();
+
     checkStateCorrect();
 }
 
 bool PeekableReadBuffer::nextImpl()
 {
-    /// FIXME wrong bytes count because it can read the same data again after rollbackToCheckpoint()
-    /// However, changing bytes count on every call of next() (even after rollback) allows to determine if some pointers were invalidated.
+    /// FIXME: wrong bytes count because it can read the same data again after rollbackToCheckpoint()
+    ///        however, changing bytes count on every call of next() (even after rollback) allows to determine
+    ///        if some pointers were invalidated.
+
     checkStateCorrect();
     bool res;
 
@@ -138,7 +147,7 @@ bool PeekableReadBuffer::nextImpl()
         if (useSubbufferOnly())
         {
             /// Load next data to sub_buf
-            sub_buf.position() = pos;
+            sub_buf.position() = position();
             res = sub_buf.next();
         }
         else
diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h
index c914ff2d6d3..4f6e669b31d 100644
--- a/src/IO/PeekableReadBuffer.h
+++ b/src/IO/PeekableReadBuffer.h
@@ -38,11 +38,6 @@ public:
             peeked_size = 0;
         }
         checkpoint.emplace(pos);
-
-        // FIXME: we are checking checkpoint existence in few places (rollbackToCheckpoint/dropCheckpoint)
-        // by simple if(checkpoint) but checkpoint can be nullptr after
-        // setCheckpoint called on empty (non initialized/eof) buffer
-        // and we can't just use simple if(checkpoint)
     }
 
     /// Forget checkpoint and all data between checkpoint and position
@@ -63,7 +58,7 @@ public:
 
     /// Sets position at checkpoint.
     /// All pointers (such as this->buffer().end()) may be invalidated
-    void rollbackToCheckpoint();
+    void rollbackToCheckpoint(bool drop = false);
 
     /// If checkpoint and current position are in different buffers, appends data from sub-buffer to own memory,
     /// so data between checkpoint and position will be in continuous memory.
diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h
index 3d6eb6970ce..e3166ba8180 100644
--- a/src/IO/ReadBuffer.h
+++ b/src/IO/ReadBuffer.h
@@ -55,13 +55,19 @@ public:
       */
     bool next()
     {
+        assert(!hasPendingData());
+        assert(position() <= working_buffer.end());
+
         bytes += offset();
         bool res = nextImpl();
         if (!res)
-            working_buffer.resize(0);
-
-        pos = working_buffer.begin() + nextimpl_working_buffer_offset;
+            working_buffer = Buffer(pos, pos);
+        else
+            pos = working_buffer.begin() + nextimpl_working_buffer_offset;
         nextimpl_working_buffer_offset = 0;
+
+        assert(position() <= working_buffer.end());
+
         return res;
     }
 
@@ -72,7 +78,7 @@ public:
             next();
     }
 
-    virtual ~ReadBuffer() {}
+    virtual ~ReadBuffer() = default;
 
 
     /** Unlike std::istream, it returns true if all data was read
@@ -128,15 +134,27 @@ public:
         tryIgnore(std::numeric_limits<size_t>::max());
     }
 
-    /** Reads a single byte. */
-    bool ALWAYS_INLINE read(char & c)
+    /// Peeks a single byte.
+    bool ALWAYS_INLINE peek(char & c)
     {
         if (eof())
             return false;
-        c = *pos++;
+        c = *pos;
         return true;
     }
 
+    /// Reads a single byte.
+    bool ALWAYS_INLINE read(char & c)
+    {
+        if (peek(c))
+        {
+            ++pos;
+            return true;
+        }
+
+        return false;
+    }
+
     void ALWAYS_INLINE readStrict(char & c)
     {
         if (read(c))
@@ -192,7 +210,7 @@ private:
       */
     virtual bool nextImpl() { return false; }
 
-    [[noreturn]] void throwReadAfterEOF()
+    [[noreturn]] static void throwReadAfterEOF()
     {
         throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
     }
@@ -201,5 +219,39 @@ private:
 
 using ReadBufferPtr = std::shared_ptr<ReadBuffer>;
 
+/// Due to inconsistencies in ReadBuffer-family interfaces:
+///  - some require to fully wrap underlying buffer and own it,
+///  - some just wrap the reference without ownership,
+/// we need to be able to wrap reference-only buffers with movable transparent proxy-buffer.
+/// The uniqueness of such wraps is responsibility of the code author.
+inline std::unique_ptr<ReadBuffer> wrapReadBufferReference(ReadBuffer & buf)
+{
+    class ReadBufferWrapper : public ReadBuffer
+    {
+        public:
+            explicit ReadBufferWrapper(ReadBuffer & buf_) : ReadBuffer(buf_.position(), 0), buf(buf_)
+            {
+                working_buffer = Buffer(buf.position(), buf.buffer().end());
+            }
+
+        private:
+            ReadBuffer & buf;
+
+            bool nextImpl() override
+            {
+                buf.position() = position();
+
+                if (!buf.next())
+                    return false;
+
+                working_buffer = buf.buffer();
+
+                return true;
+            }
+    };
+
+    return std::make_unique<ReadBufferWrapper>(buf);
+}
+
 
 }
diff --git a/src/IO/ReadBufferFromFile.h b/src/IO/ReadBufferFromFile.h
index cebda605b21..33365bc7ceb 100644
--- a/src/IO/ReadBufferFromFile.h
+++ b/src/IO/ReadBufferFromFile.h
@@ -25,11 +25,11 @@ protected:
     CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead};
 
 public:
-    ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
+    explicit ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
         char * existing_memory = nullptr, size_t alignment = 0);
 
     /// Use pre-opened file descriptor.
-    ReadBufferFromFile(
+    explicit ReadBufferFromFile(
         int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
         const std::string & original_file_name = {},
         size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp
index 0ab07b85027..dd5d9e67cd7 100644
--- a/src/IO/ReadBufferFromFileDescriptor.cpp
+++ b/src/IO/ReadBufferFromFileDescriptor.cpp
@@ -90,6 +90,7 @@ bool ReadBufferFromFileDescriptor::nextImpl()
     if (bytes_read)
     {
         ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read);
+        working_buffer = internal_buffer;
         working_buffer.resize(bytes_read);
     }
     else
diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp
index 2c13446e693..59f0dc25667 100644
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@@ -78,7 +78,7 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_,
 {
 }
 
-bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds)
+bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const
 {
     return available() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR);
 }
diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h
index 8064cd39246..d182d48d1f8 100644
--- a/src/IO/ReadBufferFromPocoSocket.h
+++ b/src/IO/ReadBufferFromPocoSocket.h
@@ -1,15 +1,14 @@
 #pragma once
 
-#include <Poco/Net/Socket.h>
-
-#include <IO/ReadBuffer.h>
 #include <IO/BufferWithOwnMemory.h>
+#include <IO/ReadBuffer.h>
+
+#include <Poco/Net/Socket.h>
 
 namespace DB
 {
 
-/** Works with the ready Poco::Net::Socket. Blocking operations.
-  */
+/// Works with the ready Poco::Net::Socket. Blocking operations.
 class ReadBufferFromPocoSocket : public BufferWithOwnMemory<ReadBuffer>
 {
 protected:
@@ -24,9 +23,9 @@ protected:
     bool nextImpl() override;
 
 public:
-    ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+    explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
 
-    bool poll(size_t timeout_microseconds);
+    bool poll(size_t timeout_microseconds) const;
 
     void setAsyncCallback(std::function<void(Poco::Net::Socket &)> async_callback_) { async_callback = std::move(async_callback_); }
 
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 76a722a8ad1..72ffd74a42d 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -683,7 +683,7 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV &
 
             /** CSV format can contain insignificant spaces and tabs.
               * Usually the task of skipping them is for the calling code.
-              * But in this case, it will be difficult to do this, so remove the trailing whitespace by yourself.
+              * But in this case, it will be difficult to do this, so remove the trailing whitespace by ourself.
               */
             size_t size = s.size();
             while (size > 0
@@ -831,14 +831,18 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
     static constexpr auto date_time_broken_down_length = 19;
     /// YYYY-MM-DD
     static constexpr auto date_broken_down_length = 10;
-    /// unix timestamp max length
-    static constexpr auto unix_timestamp_max_length = 10;
 
     char s[date_time_broken_down_length];
     char * s_pos = s;
 
-    /// A piece similar to unix timestamp.
-    while (s_pos < s + unix_timestamp_max_length && !buf.eof() && isNumericASCII(*buf.position()))
+    /** Read characters, that could represent unix timestamp.
+      * Only unix timestamp of at least 5 characters is supported.
+      * Then look at 5th character. If it is a number - treat whole as unix timestamp.
+      * If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format.
+      */
+
+    /// A piece similar to unix timestamp, maybe scaled to subsecond precision.
+    while (s_pos < s + date_time_broken_down_length && !buf.eof() && isNumericASCII(*buf.position()))
     {
         *s_pos = *buf.position();
         ++s_pos;
@@ -846,7 +850,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
     }
 
     /// 2015-01-01 01:02:03 or 2015-01-01
-    if (s_pos == s + 4 && !buf.eof() && (*buf.position() < '0' || *buf.position() > '9'))
+    if (s_pos == s + 4 && !buf.eof() && !isNumericASCII(*buf.position()))
     {
         const auto already_read_length = s_pos - s;
         const size_t remaining_date_time_size = date_time_broken_down_length - already_read_length;
@@ -885,8 +889,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
     }
     else
     {
-        /// Only unix timestamp of 5-10 characters is supported. For consistency. See readDateTimeTextImpl.
-        if (s_pos - s >= 5 && s_pos - s <= 10)
+        if (s_pos - s >= 5)
         {
             /// Not very efficient.
             datetime = 0;
@@ -1050,6 +1053,25 @@ void readAndThrowException(ReadBuffer & buf, const String & additional_message)
 }
 
 
+void skipToCarriageReturnOrEOF(ReadBuffer & buf)
+{
+    while (!buf.eof())
+    {
+        char * next_pos = find_first_symbols<'\r'>(buf.position(), buf.buffer().end());
+        buf.position() = next_pos;
+
+        if (!buf.hasPendingData())
+            continue;
+
+        if (*buf.position() == '\r')
+        {
+            ++buf.position();
+            return;
+        }
+    }
+}
+
+
 void skipToNextLineOrEOF(ReadBuffer & buf)
 {
     while (!buf.eof())
@@ -1104,9 +1126,9 @@ void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current)
     assert(current >= in.position());
     assert(current <= in.buffer().end());
 
-    const int old_bytes = memory.size();
-    const int additional_bytes = current - in.position();
-    const int new_bytes = old_bytes + additional_bytes;
+    const size_t old_bytes = memory.size();
+    const size_t additional_bytes = current - in.position();
+    const size_t new_bytes = old_bytes + additional_bytes;
     /// There are no new bytes to add to memory.
     /// No need to do extra stuff.
     if (new_bytes == 0)
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 4482667f447..e33de04f322 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -536,7 +536,7 @@ void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16);
 void parseUUIDWithoutSeparator(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16);
 
 template <typename IteratorSrc, typename IteratorDst>
-void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes);
+void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes);
 
 
 template <typename ReturnType>
@@ -703,12 +703,6 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
 template <typename ReturnType = void>
 inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
 {
-    /** Read 10 characters, that could represent unix timestamp.
-      * Only unix timestamp of 5-10 characters is supported.
-      * Then look at 5th character. If it is a number - treat whole as unix timestamp.
-      * If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format.
-      */
-
     /// Optimistic path, when whole value is in buffer.
     const char * s = buf.position();
 
@@ -779,6 +773,18 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
         while (!buf.eof() && isNumericASCII(*buf.position()))
             ++buf.position();
     }
+    else if (scale && (whole >= 1000000000LL * scale))
+    {
+        /// Unix timestamp with subsecond precision, already scaled to integer.
+        /// For disambiguation we support only time since 2001-09-09 01:46:40 UTC and less than 30 000 years in future.
+
+        for (size_t i = 0; i < scale; ++i)
+        {
+            components.fractional *= 10;
+            components.fractional += components.whole % 10;
+            components.whole /= 10;
+        }
+    }
 
     datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(components, scale);
 
@@ -1046,10 +1052,14 @@ void readText(std::vector<T> & x, ReadBuffer & buf)
 
 
 /// Skip whitespace characters.
-inline void skipWhitespaceIfAny(ReadBuffer & buf)
+inline void skipWhitespaceIfAny(ReadBuffer & buf, bool one_line = false)
 {
-    while (!buf.eof() && isWhitespaceASCII(*buf.position()))
-        ++buf.position();
+    if (!one_line)
+        while (!buf.eof() && isWhitespaceASCII(*buf.position()))
+            ++buf.position();
+    else
+        while (!buf.eof() && isWhitespaceASCIIOneLine(*buf.position()))
+            ++buf.position();
 }
 
 /// Skips json value.
@@ -1212,6 +1222,9 @@ inline void skipBOMIfExists(ReadBuffer & buf)
 /// Skip to next character after next \n. If no \n in stream, skip to end.
 void skipToNextLineOrEOF(ReadBuffer & buf);
 
+/// Skip to next character after next \r. If no \r in stream, skip to end.
+void skipToCarriageReturnOrEOF(ReadBuffer & buf);
+
 /// Skip to next character after next unescaped \n. If no \n in stream, skip to end. Does not throw on invalid escape sequences.
 void skipToUnescapedNextLineOrEOF(ReadBuffer & buf);
 
diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h
index de10f268dc3..9cd37bd00f8 100644
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@@ -76,9 +76,7 @@ public:
         }
     }
 
-    virtual ~UpdatableSessionBase()
-    {
-    }
+    virtual ~UpdatableSessionBase() = default;
 };
 
 
@@ -205,6 +203,8 @@ namespace detail
         {
             if (next_callback)
                 next_callback(count());
+            if (!working_buffer.empty())
+                impl->position() = position();
             if (!impl->next())
                 return false;
             internal_buffer = impl->buffer();
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index bf6d30986a9..471044dd08c 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -69,6 +69,10 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
             boost::algorithm::to_lower(matched_region);
             region = matched_region;
         }
+        else
+        {
+            region = Aws::Region::AWS_GLOBAL;
+        }
     }
 }
 
diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp
index fbcd4ed97f1..f9962735ddc 100644
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@@ -13,6 +13,7 @@
 #    include <aws/core/platform/Environment.h>
 #    include <aws/core/utils/logging/LogMacros.h>
 #    include <aws/core/utils/logging/LogSystemInterface.h>
+#    include <aws/core/utils/HashingUtils.h>
 #    include <aws/s3/S3Client.h>
 #    include <aws/core/http/HttpClientFactory.h>
 #    include <IO/S3/PocoHTTPClientFactory.h>
@@ -273,56 +274,12 @@ namespace S3
         return ret;
     }
 
-    /// This method is not static because it requires ClientFactory to be initialized.
-    std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
-        const String & endpoint,
-        bool is_virtual_hosted_style,
-        const String & access_key_id,
-        const String & secret_access_key,
-        bool use_environment_credentials,
-        const RemoteHostFilter & remote_host_filter,
-        unsigned int s3_max_redirects)
-    {
-        PocoHTTPClientConfiguration client_configuration(remote_host_filter, s3_max_redirects);
-
-        if (!endpoint.empty())
-            client_configuration.endpointOverride = endpoint;
-
-        return create(client_configuration,
-            is_virtual_hosted_style,
-            access_key_id,
-            secret_access_key,
-            use_environment_credentials);
-    }
-
-    std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
-        const PocoHTTPClientConfiguration & cfg_,
-        bool is_virtual_hosted_style,
-        const String & access_key_id,
-        const String & secret_access_key,
-        bool use_environment_credentials)
-    {
-        Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
-
-        PocoHTTPClientConfiguration client_configuration = cfg_;
-        client_configuration.updateSchemeAndRegion();
-
-        return std::make_shared<Aws::S3::S3Client>(
-            std::make_shared<S3CredentialsProviderChain>(
-                client_configuration,
-                credentials,
-                use_environment_credentials), // AWS credentials provider.
-            std::move(client_configuration), // Client configuration.
-            Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, // Sign policy.
-            is_virtual_hosted_style || client_configuration.endpointOverride.empty() // Use virtual addressing if endpoint is not specified.
-        );
-    }
-
     std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
         const PocoHTTPClientConfiguration & cfg_,
         bool is_virtual_hosted_style,
         const String & access_key_id,
         const String & secret_access_key,
+        const String & server_side_encryption_customer_key_base64,
         HeaderCollection headers,
         bool use_environment_credentials)
     {
@@ -331,7 +288,28 @@ namespace S3
 
         Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
 
-        auto auth_signer = std::make_shared<S3AuthSigner>(client_configuration, std::move(credentials), std::move(headers), use_environment_credentials);
+        if (!server_side_encryption_customer_key_base64.empty())
+        {
+            /// See S3Client::GeneratePresignedUrlWithSSEC().
+
+            headers.push_back({Aws::S3::SSEHeaders::SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM,
+                Aws::S3::Model::ServerSideEncryptionMapper::GetNameForServerSideEncryption(Aws::S3::Model::ServerSideEncryption::AES256)});
+
+            headers.push_back({Aws::S3::SSEHeaders::SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY,
+                server_side_encryption_customer_key_base64});
+
+            Aws::Utils::ByteBuffer buffer = Aws::Utils::HashingUtils::Base64Decode(server_side_encryption_customer_key_base64);
+            String str_buffer(reinterpret_cast<char *>(buffer.GetUnderlyingData()), buffer.GetLength());
+            headers.push_back({Aws::S3::SSEHeaders::SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5,
+                Aws::Utils::HashingUtils::Base64Encode(Aws::Utils::HashingUtils::CalculateMD5(str_buffer))});
+        }
+
+        auto auth_signer = std::make_shared<S3AuthSigner>(
+            client_configuration,
+            std::move(credentials),
+            std::move(headers),
+            use_environment_credentials);
+
         return std::make_shared<Aws::S3::S3Client>(
             std::move(auth_signer),
             std::move(client_configuration), // Client configuration.
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index c367444395d..b071daefee1 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -31,27 +31,12 @@ public:
 
     static ClientFactory & instance();
 
-    std::shared_ptr<Aws::S3::S3Client> create(
-        const String & endpoint,
-        bool is_virtual_hosted_style,
-        const String & access_key_id,
-        const String & secret_access_key,
-        bool use_environment_credentials,
-        const RemoteHostFilter & remote_host_filter,
-        unsigned int s3_max_redirects);
-
-    std::shared_ptr<Aws::S3::S3Client> create(
-        const PocoHTTPClientConfiguration & cfg,
-        bool is_virtual_hosted_style,
-        const String & access_key_id,
-        const String & secret_access_key,
-        bool use_environment_credentials);
-
     std::shared_ptr<Aws::S3::S3Client> create(
         const PocoHTTPClientConfiguration & cfg,
         bool is_virtual_hosted_style,
         const String & access_key_id,
         const String & secret_access_key,
+        const String & server_side_encryption_customer_key_base64,
         HeaderCollection headers,
         bool use_environment_credentials);
 
diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h
index f7a468b0490..f8e6d817fb1 100644
--- a/src/IO/SeekableReadBuffer.h
+++ b/src/IO/SeekableReadBuffer.h
@@ -21,6 +21,12 @@ public:
      */
     virtual off_t seek(off_t off, int whence) = 0;
 
+    /**
+     * Keep in mind that seekable buffer may encounter eof() once and the working buffer
+     * may get into inconsistent state. Don't forget to reset it on the first nextImpl()
+     * after seek().
+     */
+
     /**
      * @return Offset from the begin of the underlying buffer / file corresponds to the buffer current position.
      */
diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h
index c513b22b0a5..24529fad8c0 100644
--- a/src/IO/WriteBuffer.h
+++ b/src/IO/WriteBuffer.h
@@ -4,6 +4,7 @@
 #include <cstring>
 #include <memory>
 #include <iostream>
+#include <cassert>
 
 #include <Common/Exception.h>
 #include <IO/BufferBase.h>
@@ -37,7 +38,7 @@ public:
       */
     inline void next()
     {
-        if (!offset() && available())
+        if (!offset())
             return;
         bytes += offset();
 
@@ -60,7 +61,7 @@ public:
     /** it is desirable in the derived classes to place the next() call in the destructor,
       * so that the last data is written
       */
-    virtual ~WriteBuffer() {}
+    virtual ~WriteBuffer() = default;
 
     inline void nextIfAtEnd()
     {
@@ -73,6 +74,9 @@ public:
     {
         size_t bytes_copied = 0;
 
+        /// Produces endless loop
+        assert(!working_buffer.empty());
+
         while (bytes_copied < n)
         {
             nextIfAtEnd();
@@ -91,8 +95,15 @@ public:
         ++pos;
     }
 
-    virtual void sync() {}
-    virtual void finalize() {}
+    virtual void sync()
+    {
+        next();
+    }
+
+    virtual void finalize()
+    {
+        next();
+    }
 
 private:
     /** Write the data in the buffer (from the beginning of the buffer to the current position).
diff --git a/src/IO/WriteBufferFromArena.h b/src/IO/WriteBufferFromArena.h
index b5fd9fac5a3..0e8a11fb5d6 100644
--- a/src/IO/WriteBufferFromArena.h
+++ b/src/IO/WriteBufferFromArena.h
@@ -13,7 +13,7 @@ namespace DB
   *
   * While using this object, no other allocations in arena are possible.
   */
-class WriteBufferFromArena : public WriteBuffer
+class WriteBufferFromArena final : public WriteBuffer
 {
 private:
     Arena & arena;
diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp
index aeed4862fba..b3a63842326 100644
--- a/src/IO/WriteBufferFromFile.cpp
+++ b/src/IO/WriteBufferFromFile.cpp
@@ -3,6 +3,7 @@
 #include <errno.h>
 
 #include <Common/ProfileEvents.h>
+#include <Common/MemoryTracker.h>
 
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteHelpers.h>
@@ -77,14 +78,10 @@ WriteBufferFromFile::~WriteBufferFromFile()
     if (fd < 0)
         return;
 
-    try
-    {
-        next();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+
+    next();
 
     ::close(fd);
 }
diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp
index a59ae20c588..bfd874ee396 100644
--- a/src/IO/WriteBufferFromFileDescriptor.cpp
+++ b/src/IO/WriteBufferFromFileDescriptor.cpp
@@ -8,6 +8,7 @@
 #include <Common/ProfileEvents.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/Stopwatch.h>
+#include <Common/MemoryTracker.h>
 
 #include <IO/WriteBufferFromFileDescriptor.h>
 #include <IO/WriteHelpers.h>
@@ -90,17 +91,15 @@ WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor(
 
 WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor()
 {
-    try
+    if (fd < 0)
     {
-        if (fd >= 0)
-            next();
-        else
-            assert(!offset() && "attempt to write after close");
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
+        assert(!offset() && "attempt to write after close");
+        return;
     }
+
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    next();
 }
 
 
diff --git a/src/IO/WriteBufferFromOStream.cpp b/src/IO/WriteBufferFromOStream.cpp
index 2c45a21a0a3..cf731934c93 100644
--- a/src/IO/WriteBufferFromOStream.cpp
+++ b/src/IO/WriteBufferFromOStream.cpp
@@ -1,5 +1,5 @@
 #include <IO/WriteBufferFromOStream.h>
-#include <Common/Exception.h>
+#include <Common/MemoryTracker.h>
 
 
 namespace DB
@@ -42,14 +42,9 @@ WriteBufferFromOStream::WriteBufferFromOStream(
 
 WriteBufferFromOStream::~WriteBufferFromOStream()
 {
-    try
-    {
-        next();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    next();
 }
 
 }
diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp
index c05dc11e330..284fa5dbd97 100644
--- a/src/IO/WriteBufferFromPocoSocket.cpp
+++ b/src/IO/WriteBufferFromPocoSocket.cpp
@@ -5,6 +5,7 @@
 #include <Common/Exception.h>
 #include <Common/NetException.h>
 #include <Common/Stopwatch.h>
+#include <Common/MemoryTracker.h>
 
 
 namespace ProfileEvents
@@ -70,14 +71,9 @@ WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_
 
 WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket()
 {
-    try
-    {
-        next();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    next();
 }
 
 }
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index 09aabb1b21d..a6ec60b295f 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -4,6 +4,7 @@
 
 #    include <IO/WriteBufferFromS3.h>
 #    include <IO/WriteHelpers.h>
+#    include <Common/MemoryTracker.h>
 
 #    include <aws/s3/S3Client.h>
 #    include <aws/s3/model/CreateMultipartUploadRequest.h>
@@ -78,6 +79,8 @@ void WriteBufferFromS3::nextImpl()
 
 void WriteBufferFromS3::finalize()
 {
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
     finalizeImpl();
 }
 
@@ -104,14 +107,7 @@ void WriteBufferFromS3::finalizeImpl()
 
 WriteBufferFromS3::~WriteBufferFromS3()
 {
-    try
-    {
-        finalizeImpl();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    finalizeImpl();
 }
 
 void WriteBufferFromS3::createMultipartUpload()
diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h
index 2a9810f3461..1dcf2c3f327 100644
--- a/src/IO/WriteBufferFromVector.h
+++ b/src/IO/WriteBufferFromVector.h
@@ -3,6 +3,7 @@
 #include <vector>
 
 #include <IO/WriteBuffer.h>
+#include <Common/MemoryTracker.h>
 
 
 namespace DB
@@ -93,14 +94,9 @@ public:
 
     ~WriteBufferFromVector() override
     {
-        try
-        {
-            finalize();
-        }
-        catch (...)
-        {
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-        }
+        /// FIXME move final flush into the caller
+        MemoryTracker::LockExceptionInThread lock;
+        finalize();
     }
 };
 
diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp
index f1f04e9805b..1071ac1078d 100644
--- a/src/IO/WriteBufferValidUTF8.cpp
+++ b/src/IO/WriteBufferValidUTF8.cpp
@@ -1,5 +1,6 @@
 #include <Poco/UTF8Encoding.h>
 #include <IO/WriteBufferValidUTF8.h>
+#include <Common/MemoryTracker.h>
 #include <common/types.h>
 
 #ifdef __SSE2__
@@ -136,14 +137,9 @@ void WriteBufferValidUTF8::finish()
 
 WriteBufferValidUTF8::~WriteBufferValidUTF8()
 {
-    try
-    {
-        finish();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    finish();
 }
 
 }
diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index 9072f306bd9..a382ae13cdd 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -709,7 +709,7 @@ inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf)
 template<typename DecimalType>
 inline void writeDecimalTypeFractionalText(typename DecimalType::NativeType fractional, UInt32 scale, WriteBuffer & buf)
 {
-    static constexpr UInt32 MaxScale = DecimalUtils::maxPrecision<DecimalType>();
+    static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DecimalType>;
 
     char data[20] = {'0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'};
     static_assert(sizeof(data) >= MaxScale);
@@ -831,19 +831,19 @@ inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTI
 template <char date_delimeter = '-', char time_delimeter = ':', char between_date_time_delimiter = ' ', char fractional_time_delimiter = '.'>
 inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance())
 {
-    static constexpr UInt32 MaxScale = DecimalUtils::maxPrecision<DateTime64>();
+    static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DateTime64>;
     scale = scale > MaxScale ? MaxScale : scale;
 
-    auto c = DecimalUtils::split(datetime64, scale);
-    const auto & values = date_lut.getValues(c.whole);
+    auto components = DecimalUtils::split(datetime64, scale);
+    const auto & values = date_lut.getValues(components.whole);
     writeDateTimeText<date_delimeter, time_delimeter, between_date_time_delimiter>(
         LocalDateTime(values.year, values.month, values.day_of_month,
-            date_lut.toHour(c.whole), date_lut.toMinute(c.whole), date_lut.toSecond(c.whole)), buf);
+            date_lut.toHour(components.whole), date_lut.toMinute(components.whole), date_lut.toSecond(components.whole)), buf);
 
     if (scale > 0)
     {
         buf.write(fractional_time_delimiter);
-        writeDecimalTypeFractionalText<DateTime64>(c.fractional, scale, buf);
+        writeDecimalTypeFractionalText<DateTime64>(components.fractional, scale, buf);
     }
 }
 
@@ -887,16 +887,16 @@ inline void writeDateTimeTextISO(DateTime64 datetime64, UInt32 scale, WriteBuffe
 
 inline void writeDateTimeUnixTimestamp(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf)
 {
-    static constexpr UInt32 MaxScale = DecimalUtils::maxPrecision<DateTime64>();
+    static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DateTime64>;
     scale = scale > MaxScale ? MaxScale : scale;
 
-    auto c = DecimalUtils::split(datetime64, scale);
-    writeIntText(c.whole, buf);
+    auto components = DecimalUtils::split(datetime64, scale);
+    writeIntText(components.whole, buf);
 
-    if (scale > 0)
+    if (scale > 0) //-V547
     {
         buf.write('.');
-        writeDecimalTypeFractionalText<DateTime64>(c.fractional, scale, buf);
+        writeDecimalTypeFractionalText<DateTime64>(components.fractional, scale, buf);
     }
 }
 
@@ -910,6 +910,7 @@ inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBin
 inline void writeBinary(const std::string_view & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
 inline void writeBinary(const Int128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
 inline void writeBinary(const UInt128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
+inline void writeBinary(const UUID & x, WriteBuffer & buf) { writePODBinary(x, buf); }
 inline void writeBinary(const DummyUInt256 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
 inline void writeBinary(const Decimal32 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
 inline void writeBinary(const Decimal64 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
diff --git a/src/IO/ZlibDeflatingWriteBuffer.cpp b/src/IO/ZlibDeflatingWriteBuffer.cpp
index 8efe96877e4..4b838ac6d0a 100644
--- a/src/IO/ZlibDeflatingWriteBuffer.cpp
+++ b/src/IO/ZlibDeflatingWriteBuffer.cpp
@@ -1,5 +1,7 @@
 #include <IO/ZlibDeflatingWriteBuffer.h>
 #include <Common/MemorySanitizer.h>
+#include <Common/MemoryTracker.h>
+#include <Common/Exception.h>
 
 
 namespace DB
@@ -46,16 +48,21 @@ ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer(
 
 ZlibDeflatingWriteBuffer::~ZlibDeflatingWriteBuffer()
 {
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+
+    finish();
+
     try
     {
-        finish();
-
         int rc = deflateEnd(&zstr);
         if (rc != Z_OK)
             throw Exception(std::string("deflateEnd failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED);
     }
     catch (...)
     {
+        /// It is OK not to terminate under an error from deflateEnd()
+        /// since all data already written to the stream.
         tryLogCurrentException(__PRETTY_FUNCTION__);
     }
 }
diff --git a/src/IO/ZlibInflatingReadBuffer.cpp b/src/IO/ZlibInflatingReadBuffer.cpp
index 0b23bef1b10..bea83c74e21 100644
--- a/src/IO/ZlibInflatingReadBuffer.cpp
+++ b/src/IO/ZlibInflatingReadBuffer.cpp
@@ -70,7 +70,7 @@ bool ZlibInflatingReadBuffer::nextImpl()
         if (in->eof())
         {
             eof = true;
-            return working_buffer.size() != 0;
+            return !working_buffer.empty();
         }
         else
         {
diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp
index df28820e382..9b79d5ae513 100644
--- a/src/IO/ZstdDeflatingWriteBuffer.cpp
+++ b/src/IO/ZstdDeflatingWriteBuffer.cpp
@@ -1,4 +1,6 @@
 #include <IO/ZstdDeflatingWriteBuffer.h>
+#include <Common/MemoryTracker.h>
+#include <Common/Exception.h>
 
 namespace DB
 {
@@ -28,14 +30,22 @@ ZstdDeflatingWriteBuffer::ZstdDeflatingWriteBuffer(
 
 ZstdDeflatingWriteBuffer::~ZstdDeflatingWriteBuffer()
 {
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+
+    finish();
+
     try
     {
-        finish();
-
-        ZSTD_freeCCtx(cctx);
+        int err = ZSTD_freeCCtx(cctx);
+        /// This is just in case, since it is impossible to get an error by using this wrapper.
+        if (unlikely(err))
+            throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD_freeCCtx failed: error code: {}; zstd version: {}", err, ZSTD_VERSION_STRING);
     }
     catch (...)
     {
+        /// It is OK not to terminate under an error from ZSTD_freeCCtx()
+        /// since all data already written to the stream.
         tryLogCurrentException(__PRETTY_FUNCTION__);
     }
 }
diff --git a/src/IO/ZstdInflatingReadBuffer.cpp b/src/IO/ZstdInflatingReadBuffer.cpp
index 94a0b56fc6d..b441a6a7210 100644
--- a/src/IO/ZstdInflatingReadBuffer.cpp
+++ b/src/IO/ZstdInflatingReadBuffer.cpp
@@ -54,7 +54,7 @@ bool ZstdInflatingReadBuffer::nextImpl()
     if (in->eof())
     {
         eof = true;
-        return working_buffer.size() != 0;
+        return !working_buffer.empty();
     }
 
     return true;
diff --git a/src/IO/ya.make b/src/IO/ya.make
index 2ef8bd0a986..980719aa74f 100644
--- a/src/IO/ya.make
+++ b/src/IO/ya.make
@@ -26,6 +26,7 @@ SRCS(
     CascadeWriteBuffer.cpp
     CompressionMethod.cpp
     DoubleConverter.cpp
+    HTTPChunkedReadBuffer.cpp
     HTTPCommon.cpp
     HashingWriteBuffer.cpp
     HexWriteBuffer.cpp
@@ -56,7 +57,6 @@ SRCS(
     WriteBufferFromFileDescriptor.cpp
     WriteBufferFromFileDescriptorDiscardOnFailure.cpp
     WriteBufferFromHTTP.cpp
-    WriteBufferFromHTTPServerResponse.cpp
     WriteBufferFromOStream.cpp
     WriteBufferFromPocoSocket.cpp
     WriteBufferFromTemporaryFile.cpp
diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index e71c60bb540..12942371d4f 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -39,16 +39,26 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
     for (const auto & input : inputs_)
     {
         if (input.column && isColumnConst(*input.column))
+        {
             addInput(input, true);
+
+            /// Here we also add column.
+            /// It will allow to remove input which is actually constant (after projection).
+            /// Also, some transforms from query pipeline may randomly materialize constants,
+            ///   without any respect to header structure. So, it is a way to drop materialized column and use
+            ///   constant value from header.
+            /// We cannot remove such input right now cause inputs positions are important in some cases.
+            addColumn(input, true);
+        }
         else
             addInput(input.name, input.type, true);
     }
 }
 
-ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace)
+ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace, bool add_to_index)
 {
     auto it = index.find(node.result_name);
-    if (it != index.end() && !can_replace)
+    if (it != index.end() && !can_replace && add_to_index)
         throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
 
     auto & res = nodes.emplace_back(std::move(node));
@@ -56,7 +66,8 @@ ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace)
     if (res.type == ActionType::INPUT)
         inputs.emplace_back(&res);
 
-    index.replace(&res);
+    if (add_to_index)
+        index.replace(&res);
     return res;
 }
 
@@ -90,7 +101,7 @@ const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool
     return addNode(std::move(node), can_replace);
 }
 
-const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace)
+const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace, bool materialize)
 {
     if (!column.column)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name);
@@ -101,7 +112,22 @@ const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, boo
     node.result_name = std::move(column.name);
     node.column = std::move(column.column);
 
-    return addNode(std::move(node), can_replace);
+    auto * res = &addNode(std::move(node), can_replace, !materialize);
+
+    if (materialize)
+    {
+        auto & name = res->result_name;
+
+        FunctionOverloadResolverPtr func_builder_materialize =
+                std::make_shared<FunctionOverloadResolverAdaptor>(
+                        std::make_unique<DefaultOverloadResolver>(
+                                std::make_shared<FunctionMaterialize>()));
+
+        res = &addFunction(func_builder_materialize, {res}, {}, true, false);
+        res = &addAlias(*res, name, true);
+    }
+
+    return *res;
 }
 
 const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace)
@@ -116,7 +142,6 @@ ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool ca
     node.result_type = child.result_type;
     node.result_name = std::move(alias);
     node.column = child.column;
-    node.allow_constant_folding = child.allow_constant_folding;
     node.children.emplace_back(&child);
 
     return addNode(std::move(node), can_replace);
@@ -143,7 +168,8 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
         const FunctionOverloadResolverPtr & function,
         const Names & argument_names,
         std::string result_name,
-        const Context & context [[maybe_unused]])
+        const Context & context [[maybe_unused]],
+        bool can_replace)
 {
     const auto & all_settings = context.getSettingsRef();
     settings.max_temporary_columns = all_settings.max_temporary_columns;
@@ -162,14 +188,15 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
     for (const auto & name : argument_names)
         children.push_back(&getNode(name));
 
-    return addFunction(function, children, std::move(result_name), false);
+    return addFunction(function, children, std::move(result_name), can_replace);
 }
 
 ActionsDAG::Node & ActionsDAG::addFunction(
         const FunctionOverloadResolverPtr & function,
         Inputs children,
         std::string result_name,
-        bool can_replace)
+        bool can_replace,
+        bool add_to_index)
 {
     size_t num_arguments = children.size();
 
@@ -184,7 +211,6 @@ ActionsDAG::Node & ActionsDAG::addFunction(
     for (size_t i = 0; i < num_arguments; ++i)
     {
         auto & child = *node.children[i];
-        node.allow_constant_folding = node.allow_constant_folding && child.allow_constant_folding;
 
         ColumnWithTypeAndName argument;
         argument.column = child.column;
@@ -250,7 +276,7 @@ ActionsDAG::Node & ActionsDAG::addFunction(
 
     node.result_name = std::move(result_name);
 
-    return addNode(std::move(node), can_replace);
+    return addNode(std::move(node), can_replace, add_to_index);
 }
 
 
@@ -349,10 +375,15 @@ void ActionsDAG::removeUnusedActions()
         stack.push(node);
     }
 
-    /// We cannot remove arrayJoin because it changes the number of rows.
     for (auto & node : nodes)
     {
-        if (node.type == ActionType::ARRAY_JOIN && visited_nodes.count(&node) == 0)
+        /// We cannot remove function with side effects even if it returns constant (e.g. ignore(...)).
+        bool prevent_constant_folding = node.column && isColumnConst(*node.column) && !node.allow_constant_folding;
+        /// We cannot remove arrayJoin because it changes the number of rows.
+        bool is_array_join = node.type == ActionType::ARRAY_JOIN;
+
+        bool must_keep_node = is_array_join || prevent_constant_folding;
+        if (must_keep_node && visited_nodes.count(&node) == 0)
         {
             visited_nodes.insert(&node);
             stack.push(&node);
@@ -410,7 +441,6 @@ void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector<Node *
             node.result_type = child->result_type;
             node.result_name = std::move(item.second);
             node.column = child->column;
-            node.allow_constant_folding = child->allow_constant_folding;
             node.children.emplace_back(child);
 
             auto & alias = addNode(std::move(node), true);
@@ -454,36 +484,42 @@ bool ActionsDAG::tryRestoreColumn(const std::string & column_name)
     return false;
 }
 
-void ActionsDAG::removeUnusedInput(const std::string & column_name)
+bool ActionsDAG::removeUnusedResult(const std::string & column_name)
 {
+    /// Find column in index and remove.
+    const Node * col;
+    {
+        auto it = index.begin();
+        for (; it != index.end(); ++it)
+            if ((*it)->result_name == column_name)
+                break;
+
+        if (it == index.end())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found result {} in ActionsDAG\n{}", column_name, dumpDAG());
+
+        col = *it;
+        index.remove(it);
+    }
+
+    /// Check if column is in input.
     auto it = inputs.begin();
     for (; it != inputs.end(); ++it)
-        if ((*it)->result_name == column_name)
+        if (*it == col)
             break;
 
     if (it == inputs.end())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found input {} in ActionsDAG\n{}", column_name, dumpDAG());
+        return false;
 
-    auto * input = *it;
+    /// Check column has no dependent.
     for (const auto & node : nodes)
         for (const auto * child : node.children)
-            if (input == child)
-                throw Exception(ErrorCodes::LOGICAL_ERROR,
-                                "Cannot remove input {} because it has dependent nodes in ActionsDAG\n{}",
-                                column_name, dumpDAG());
-
-    for (auto jt = index.begin(); jt != index.end(); ++jt)
-    {
-        if (*jt == input)
-        {
-            index.remove(jt);
-            break;
-        }
-    }
+            if (col == child)
+                return false;
 
+    /// Remove from nodes and inputs.
     for (auto jt = nodes.begin(); jt != nodes.end(); ++jt)
     {
-        if (&(*jt) == input)
+        if (&(*jt) == *it)
         {
             nodes.erase(jt);
             break;
@@ -491,6 +527,7 @@ void ActionsDAG::removeUnusedInput(const std::string & column_name)
     }
 
     inputs.erase(it);
+    return true;
 }
 
 ActionsDAGPtr ActionsDAG::clone() const
@@ -609,15 +646,35 @@ bool ActionsDAG::hasStatefulFunctions() const
     return false;
 }
 
-bool ActionsDAG::empty() const
+bool ActionsDAG::trivial() const
 {
     for (const auto & node : nodes)
-        if (node.type != ActionType::INPUT)
+        if (node.type == ActionType::FUNCTION || node.type == ActionType::ARRAY_JOIN)
             return false;
 
     return true;
 }
 
+void ActionsDAG::addMaterializingOutputActions()
+{
+    FunctionOverloadResolverPtr func_builder_materialize =
+            std::make_shared<FunctionOverloadResolverAdaptor>(
+                    std::make_unique<DefaultOverloadResolver>(
+                            std::make_shared<FunctionMaterialize>()));
+
+    Index new_index;
+    std::vector<Node *> index_nodes(index.begin(), index.end());
+    for (auto * node : index_nodes)
+    {
+        auto & name = node->result_name;
+        node = &addFunction(func_builder_materialize, {node}, {}, true, false);
+        node = &addAlias(*node, name, true);
+        new_index.insert(node);
+    }
+
+    index.swap(new_index);
+}
+
 ActionsDAGPtr ActionsDAG::makeConvertingActions(
     const ColumnsWithTypeAndName & source,
     const ColumnsWithTypeAndName & result,
@@ -727,6 +784,23 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
     return actions_dag;
 }
 
+ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column)
+{
+    auto adding_column_action = std::make_shared<ActionsDAG>();
+    FunctionOverloadResolverPtr func_builder_materialize =
+            std::make_shared<FunctionOverloadResolverAdaptor>(
+                    std::make_unique<DefaultOverloadResolver>(
+                            std::make_shared<FunctionMaterialize>()));
+
+    auto column_name = column.name;
+    const auto & column_node = adding_column_action->addColumn(std::move(column));
+    Inputs inputs = {const_cast<Node *>(&column_node)};
+    auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}, true);
+    adding_column_action->addAlias(function_node, std::move(column_name), true);
+
+    return adding_column_action;
+}
+
 ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
 {
     /// first: x (1), x (2), y ==> x (2), z, x (3)
@@ -844,7 +918,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
     return std::make_shared<ActionsDAG>(std::move(first));
 }
 
-std::pair<ActionsDAGPtr, ActionsDAGPtr> ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
+ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
 {
     /// Split DAG into two parts.
     /// (first_nodes, first_index) is a part which will have split_list in result.
@@ -1045,7 +1119,7 @@ std::pair<ActionsDAGPtr, ActionsDAGPtr> ActionsDAG::split(std::unordered_set<con
     return {std::move(first_actions), std::move(second_actions)};
 }
 
-std::pair<ActionsDAGPtr, ActionsDAGPtr>  ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const
+ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const
 {
 
     struct Frame
@@ -1113,7 +1187,7 @@ std::pair<ActionsDAGPtr, ActionsDAGPtr>  ActionsDAG::splitActionsBeforeArrayJoin
     return res;
 }
 
-std::pair<ActionsDAGPtr, ActionsDAGPtr> ActionsDAG::splitActionsForFilter(const std::string & column_name) const
+ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & column_name) const
 {
     auto it = index.begin();
     for (; it != index.end(); ++it)
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index c82496b2a8a..3c8778e239a 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -198,14 +198,15 @@ public:
 
     const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false);
     const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false);
-    const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false);
+    const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false, bool materialize = false);
     const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false);
     const Node & addArrayJoin(const std::string & source_name, std::string result_name);
     const Node & addFunction(
             const FunctionOverloadResolverPtr & function,
             const Names & argument_names,
             std::string result_name,
-            const Context & context);
+            const Context & context,
+            bool can_replace = false);
 
     /// Call addAlias several times.
     void addAliases(const NamesWithAliases & aliases);
@@ -214,16 +215,17 @@ public:
 
     /// If column is not in index, try to find it in nodes and insert back into index.
     bool tryRestoreColumn(const std::string & column_name);
-    /// Find column in input. Remove it from input and index.
-    /// Checks that column in inputs and has not dependent nodes.
-    void removeUnusedInput(const std::string & column_name);
+    /// Find column in result. Remove it from index.
+    /// If columns is in inputs and has no dependent nodes, remove it from inputs too.
+    /// Return true if column was removed from inputs.
+    bool removeUnusedResult(const std::string & column_name);
 
     void projectInput() { settings.project_input = true; }
     void removeUnusedActions(const Names & required_names);
 
     bool hasArrayJoin() const;
     bool hasStatefulFunctions() const;
-    bool empty() const; /// If actions only contain inputs.
+    bool trivial() const; /// If actions has no functions or array join.
 
     const ActionsSettings & getSettings() const { return settings; }
 
@@ -231,6 +233,9 @@ public:
 
     ActionsDAGPtr clone() const;
 
+    /// For apply materialize() function for every output.
+    /// Also add aliases so the result names remain unchanged.
+    void addMaterializingOutputActions();
 
     enum class MatchColumnsMode
     {
@@ -249,27 +254,32 @@ public:
         MatchColumnsMode mode,
         bool ignore_constant_values = false); /// Do not check that constants are same. Use value from result_header.
 
+    /// Create expression which add const column and then materialize it.
+    static ActionsDAGPtr makeAddingColumnActions(ColumnWithTypeAndName column);
+
     /// Create ActionsDAG which represents expression equivalent to applying first and second actions consequently.
     /// Is used to replace `(first -> second)` expression chain to single `merge(first, second)` expression.
     /// If first.settings.project_input is set, then outputs of `first` must include inputs of `second`.
     /// Otherwise, any two actions may be combined.
     static ActionsDAGPtr merge(ActionsDAG && first, ActionsDAG && second);
 
+    using SplitResult = std::pair<ActionsDAGPtr, ActionsDAGPtr>;
+
     /// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children.
     /// Execution of first then second parts on block is equivalent to execution of initial DAG.
     /// First DAG and initial DAG have equal inputs, second DAG and initial DAG has equal index (outputs).
     /// Second DAG inputs may contain less inputs then first DAG (but also include other columns).
-    std::pair<ActionsDAGPtr, ActionsDAGPtr> split(std::unordered_set<const Node *> split_nodes) const;
+    SplitResult split(std::unordered_set<const Node *> split_nodes) const;
 
     /// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN.
-    std::pair<ActionsDAGPtr, ActionsDAGPtr> splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;
+    SplitResult splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;
 
     /// Splits actions into two parts. First part has minimal size sufficient for calculation of column_name.
     /// Index of initial actions must contain column_name.
-    std::pair<ActionsDAGPtr, ActionsDAGPtr> splitActionsForFilter(const std::string & column_name) const;
+    SplitResult splitActionsForFilter(const std::string & column_name) const;
 
 private:
-    Node & addNode(Node node, bool can_replace = false);
+    Node & addNode(Node node, bool can_replace = false, bool add_to_index = true);
     Node & getNode(const std::string & name);
 
     Node & addAlias(Node & child, std::string alias, bool can_replace);
@@ -277,7 +287,8 @@ private:
             const FunctionOverloadResolverPtr & function,
             Inputs children,
             std::string result_name,
-            bool can_replace);
+            bool can_replace,
+            bool add_to_index = true);
 
     ActionsDAGPtr cloneEmpty() const
     {
diff --git a/src/Interpreters/AggregateDescription.cpp b/src/Interpreters/AggregateDescription.cpp
index 2748a2abe9d..e483eb1b7a1 100644
--- a/src/Interpreters/AggregateDescription.cpp
+++ b/src/Interpreters/AggregateDescription.cpp
@@ -1,7 +1,6 @@
 #include <Interpreters/AggregateDescription.h>
 #include <Common/FieldVisitors.h>
 #include <IO/Operators.h>
-#include <Parsers/ASTFunction.h>
 
 namespace DB
 {
@@ -100,31 +99,4 @@ void AggregateDescription::explain(WriteBuffer & out, size_t indent) const
     }
 }
 
-std::string WindowFunctionDescription::dump() const
-{
-    WriteBufferFromOwnString ss;
-
-    ss << "window function '" << column_name << "\n";
-    ss << "function node " << function_node->dumpTree() << "\n";
-    ss << "aggregate function '" << aggregate_function->getName() << "'\n";
-    if (!function_parameters.empty())
-    {
-        ss << "parameters " << toString(function_parameters) << "\n";
-    }
-
-    return ss.str();
-}
-
-std::string WindowDescription::dump() const
-{
-    WriteBufferFromOwnString ss;
-
-    ss << "window '" << window_name << "'\n";
-    ss << "partition_by " << dumpSortDescription(partition_by) << "\n";
-    ss << "order_by " << dumpSortDescription(order_by) << "\n";
-    ss << "full_sort_description " << dumpSortDescription(full_sort_description) << "\n";
-
-    return ss.str();
-}
-
 }
diff --git a/src/Interpreters/AggregateDescription.h b/src/Interpreters/AggregateDescription.h
index f1fc232d04d..3af0dc38586 100644
--- a/src/Interpreters/AggregateDescription.h
+++ b/src/Interpreters/AggregateDescription.h
@@ -1,18 +1,14 @@
 #pragma once
 
 #include <AggregateFunctions/IAggregateFunction.h>
-#include <DataTypes/IDataType.h>
 #include <Core/ColumnNumbers.h>
 #include <Core/Names.h>
-#include <Core/SortDescription.h>
-#include <Parsers/IAST_fwd.h>
+#include <Core/Types.h>
 
 
 namespace DB
 {
 
-class ASTFunction;
-
 struct AggregateDescription
 {
     AggregateFunctionPtr function;
@@ -26,44 +22,4 @@ struct AggregateDescription
 
 using AggregateDescriptions = std::vector<AggregateDescription>;
 
-
-struct WindowFunctionDescription
-{
-    std::string column_name;
-    const ASTFunction * function_node;
-    AggregateFunctionPtr aggregate_function;
-    Array function_parameters;
-    DataTypes argument_types;
-    Names argument_names;
-
-    std::string dump() const;
-};
-
-struct WindowDescription
-{
-    std::string window_name;
-
-    // We don't care about the particular order of keys for PARTITION BY, only
-    // that they are sorted. For now we always require ASC, but we could be more
-    // flexible and match any direction, or even different order of columns.
-    SortDescription partition_by;
-
-    SortDescription order_by;
-
-    // To calculate the window function, we sort input data first by PARTITION BY,
-    // then by ORDER BY. This field holds this combined sort order.
-    SortDescription full_sort_description;
-
-    // No frame info as of yet.
-
-    // The window functions that are calculated for this window.
-    std::vector<WindowFunctionDescription> window_functions;
-
-    std::string dump() const;
-};
-
-using WindowFunctionDescriptions = std::vector<WindowFunctionDescription>;
-
-using WindowDescriptions = std::unordered_map<std::string, WindowDescription>;
-
 }
diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h
index 9b0872d3df1..e896b0e14df 100644
--- a/src/Interpreters/AggregationCommon.h
+++ b/src/Interpreters/AggregationCommon.h
@@ -15,6 +15,10 @@
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnLowCardinality.h>
 
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
+#include <tmmintrin.h>
+#endif
+
 
 template <>
 struct DefaultHash<StringRef> : public StringRefHash {};
@@ -77,12 +81,8 @@ static inline T ALWAYS_INLINE packFixed(
     const ColumnRawPtrs * low_cardinality_positions [[maybe_unused]] = nullptr,
     const Sizes * low_cardinality_sizes [[maybe_unused]] = nullptr)
 {
-    union
-    {
-        T key;
-        char bytes[sizeof(key)] = {};
-    };
-
+    T key{};
+    char * bytes = reinterpret_cast<char *>(&key);
     size_t offset = 0;
 
     for (size_t j = 0; j < keys_size; ++j)
@@ -259,4 +259,36 @@ static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous(
 }
 
 
+/** Pack elements with shuffle instruction.
+  * See the explanation in ColumnsHashing.h
+  */
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
+template <typename T>
+static T inline packFixedShuffle(
+    const char * __restrict * __restrict srcs,
+    size_t num_srcs,
+    const size_t * __restrict elem_sizes,
+    size_t idx,
+    const uint8_t * __restrict masks)
+{
+    assert(num_srcs > 0);
+
+    __m128i res = _mm_shuffle_epi8(
+        _mm_loadu_si128(reinterpret_cast<const __m128i *>(srcs[0] + elem_sizes[0] * idx)),
+        _mm_loadu_si128(reinterpret_cast<const __m128i *>(masks)));
+
+    for (size_t i = 1; i < num_srcs; ++i)
+    {
+        res = _mm_xor_si128(res,
+            _mm_shuffle_epi8(
+                _mm_loadu_si128(reinterpret_cast<const __m128i *>(srcs[i] + elem_sizes[i] * idx)),
+                _mm_loadu_si128(reinterpret_cast<const __m128i *>(&masks[i * sizeof(T)]))));
+    }
+
+    T out;
+    __builtin_memcpy(&out, &res, sizeof(T));
+    return out;
+}
+#endif
+
 }
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index d83fef72882..abff6f21acf 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -19,7 +19,6 @@
 #include <Common/assert_cast.h>
 #include <AggregateFunctions/AggregateFunctionArray.h>
 #include <AggregateFunctions/AggregateFunctionState.h>
-#include <Disks/StoragePolicy.h>
 #include <IO/Operators.h>
 
 
@@ -559,7 +558,7 @@ void NO_INLINE Aggregator::executeImplBatch(
 
     /// Generic case.
 
-    PODArray<AggregateDataPtr> places(rows);
+    std::unique_ptr<AggregateDataPtr[]> places(new AggregateDataPtr[rows]);
 
     /// For all rows.
     for (size_t i = 0; i < rows; ++i)
@@ -590,9 +589,9 @@ void NO_INLINE Aggregator::executeImplBatch(
     for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
     {
         if (inst->offsets)
-            inst->batch_that->addBatchArray(rows, places.data(), inst->state_offset, inst->batch_arguments, inst->offsets, aggregates_pool);
+            inst->batch_that->addBatchArray(rows, places.get(), inst->state_offset, inst->batch_arguments, inst->offsets, aggregates_pool);
         else
-            inst->batch_that->addBatch(rows, places.data(), inst->state_offset, inst->batch_arguments, aggregates_pool);
+            inst->batch_that->addBatch(rows, places.get(), inst->state_offset, inst->batch_arguments, aggregates_pool);
     }
 }
 
diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h
index 2a1224b0b48..c5bcc1eb27f 100644
--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@@ -365,7 +365,13 @@ struct AggregationMethodKeysFixed
     template <typename Other>
     AggregationMethodKeysFixed(const Other & other) : data(other.data) {}
 
-    using State = ColumnsHashing::HashMethodKeysFixed<typename Data::value_type, Key, Mapped, has_nullable_keys, has_low_cardinality, use_cache>;
+    using State = ColumnsHashing::HashMethodKeysFixed<
+        typename Data::value_type,
+        Key,
+        Mapped,
+        has_nullable_keys,
+        has_low_cardinality,
+        use_cache>;
 
     static const bool low_cardinality_optimization = false;
 
diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp
index d544ceb81a2..5c071ed9134 100644
--- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp
+++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp
@@ -7,6 +7,8 @@
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Interpreters/ArithmeticOperationsInAgrFuncOptimize.h>
 
+#include <Poco/String.h>
+
 namespace DB
 {
 
@@ -89,15 +91,18 @@ const String & changeNameIfNeeded(const String & func_name, const String & child
 
 ASTPtr tryExchangeFunctions(const ASTFunction & func)
 {
-    static const std::unordered_map<String, std::unordered_set<String>> supported = {
-        { "sum", { "multiply", "divide" } },
-        { "min", { "multiply", "divide", "plus", "minus" } },
-        { "max", { "multiply", "divide", "plus", "minus" } }
-    };
+    static const std::unordered_map<String, std::unordered_set<String>> supported
+        = {{"sum", {"multiply", "divide"}},
+           {"min", {"multiply", "divide", "plus", "minus"}},
+           {"max", {"multiply", "divide", "plus", "minus"}},
+           {"avg", {"multiply", "divide", "plus", "minus"}}};
+
+    /// Aggregate functions[sum|min|max|avg] is case-insensitive, so we use lower cases name
+    auto lower_name = Poco::toLower(func.name);
 
     const ASTFunction * child_func = getInternalFunction(func);
-    if (!child_func || !child_func->arguments || child_func->arguments->children.size() != 2 ||
-        !supported.count(func.name) || !supported.find(func.name)->second.count(child_func->name))
+    if (!child_func || !child_func->arguments || child_func->arguments->children.size() != 2 || !supported.count(lower_name)
+        || !supported.find(lower_name)->second.count(child_func->name))
         return {};
 
     /// Cannot rewrite function with alias cause alias could become undefined
@@ -116,12 +121,12 @@ ASTPtr tryExchangeFunctions(const ASTFunction & func)
         if (child_func->name == "divide")
             return {};
 
-        const String & new_name = changeNameIfNeeded(func.name, child_func->name, *first_literal);
+        const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *first_literal);
         optimized_ast = exchangeExtractFirstArgument(new_name, *child_func);
     }
     else if (second_literal) /// second or both are consts
     {
-        const String & new_name = changeNameIfNeeded(func.name, child_func->name, *second_literal);
+        const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *second_literal);
         optimized_ast = exchangeExtractSecondArgument(new_name, *child_func);
     }
 
diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h
index 5d445335045..81d936aeba5 100644
--- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h
+++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h
@@ -11,7 +11,7 @@ class ASTFunction;
 /// Extract constant arguments out of aggregate functions from child functions
 /// 'sum(a * 2)' -> 'sum(a) * 2'
 /// Rewrites:   sum([multiply|divide]) -> [multiply|divide](sum)
-///             [min|max]([multiply|divide|plus|minus]) -> [multiply|divide|plus|minus]([min|max])
+///             [min|max|avg]([multiply|divide|plus|minus]) -> [multiply|divide|plus|minus]([min|max|avg])
 /// TODO: groupBitAnd, groupBitOr, groupBitXor
 /// TODO: better constant detection: f(const) is not detected as const.
 /// TODO: 'f((2 * n) * n)' -> '2 * f(n * n)'
diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h
index 88c2221be76..f727b1d6b48 100644
--- a/src/Interpreters/AsynchronousMetrics.h
+++ b/src/Interpreters/AsynchronousMetrics.h
@@ -58,6 +58,9 @@ public:
     /// Separate method allows to initialize the `servers` variable beforehand.
     void start()
     {
+        /// Update once right now, to make metrics available just after server start
+        /// (without waiting for asynchronous_metrics_update_period_s).
+        update();
         thread = std::make_unique<ThreadFromGlobalPool>([this] { run(); });
     }
 
diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
index 3b3fdaa65cb..a0ea27e9905 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
@@ -16,6 +16,26 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
+void addAndTerm(ASTPtr & ast, const ASTPtr & term)
+{
+    if (!ast)
+        ast = term;
+    else
+        ast = makeASTFunction("and", ast, term);
+}
+
+/// If this is an inner join and the expression related to less than 2 tables, then move it to WHERE
+bool canMoveToWhere(std::pair<size_t, size_t> table_numbers, ASTTableJoin::Kind kind)
+{
+    return kind == ASTTableJoin::Kind::Inner &&
+        (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0);
+}
+
+}
+
 void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast,
                                                  const std::pair<size_t, size_t> & table_no)
 {
@@ -29,7 +49,8 @@ void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const
     else
         throw Exception("Cannot detect left and right JOIN keys. JOIN ON section is ambiguous.",
                         ErrorCodes::AMBIGUOUS_COLUMN_NAME);
-    has_some = true;
+    if (table_no.first != table_no.second && table_no.first > 0 && table_no.second > 0)
+        has_some = true;
 }
 
 void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast,
@@ -78,22 +99,45 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
     {
         ASTPtr left = func.arguments->children.at(0);
         ASTPtr right = func.arguments->children.at(1);
-        auto table_numbers = getTableNumbers(ast, left, right, data);
-        data.addJoinKeys(left, right, table_numbers);
-    }
-    else if (inequality != ASOF::Inequality::None)
-    {
-        if (!data.is_asof)
-            throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
-                            ErrorCodes::NOT_IMPLEMENTED);
+        auto table_numbers = getTableNumbers(left, right, data);
 
+        if (canMoveToWhere(table_numbers, data.kind))
+        {
+            addAndTerm(data.new_where_conditions, ast);
+        }
+        else
+        {
+            if (data.kind == ASTTableJoin::Kind::Inner)
+            {
+                addAndTerm(data.new_on_expression, ast);
+            }
+            data.addJoinKeys(left, right, table_numbers);
+        }
+    }
+    else if (inequality != ASOF::Inequality::None && !data.is_asof)
+    {
+        ASTPtr left = func.arguments->children.at(0);
+        ASTPtr right = func.arguments->children.at(1);
+        auto table_numbers = getTableNumbers(left, right, data);
+        if (canMoveToWhere(table_numbers, data.kind))
+        {
+            addAndTerm(data.new_where_conditions, ast);
+        }
+        else
+        {
+            throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
+                ErrorCodes::NOT_IMPLEMENTED);
+        }
+    }
+    else if (inequality != ASOF::Inequality::None && data.is_asof)
+    {
         if (data.asof_left_key || data.asof_right_key)
             throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'",
-                            ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
+                ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
 
         ASTPtr left = func.arguments->children.at(0);
         ASTPtr right = func.arguments->children.at(1);
-        auto table_numbers = getTableNumbers(ast, left, right, data);
+        auto table_numbers = getTableNumbers(left, right, data);
 
         data.addAsofJoinKeys(left, right, table_numbers, inequality);
     }
@@ -118,7 +162,8 @@ void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<co
         getIdentifiers(child, out);
 }
 
-std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast,
+
+std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast,
                                                                     Data & data)
 {
     std::vector<const ASTIdentifier *> left_identifiers;
@@ -127,23 +172,13 @@ std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr
     getIdentifiers(left_ast, left_identifiers);
     getIdentifiers(right_ast, right_identifiers);
 
-    if (left_identifiers.empty() || right_identifiers.empty())
-    {
-        throw Exception("Not equi-join ON expression: " + queryToString(expr) + ". No columns in one of equality side.",
-                        ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
-    }
+    size_t left_idents_table = 0;
+    size_t right_idents_table = 0;
 
-    size_t left_idents_table = getTableForIdentifiers(left_identifiers, data);
-    size_t right_idents_table = getTableForIdentifiers(right_identifiers, data);
-
-    if (left_idents_table && left_idents_table == right_idents_table)
-    {
-        auto left_name = queryToString(*left_identifiers[0]);
-        auto right_name = queryToString(*right_identifiers[0]);
-
-        throw Exception("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name
-            + " are from the same table but from different arguments of equal function", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
-    }
+    if (!left_identifiers.empty())
+        left_idents_table = getTableForIdentifiers(left_identifiers, data);
+    if (!right_identifiers.empty())
+        right_idents_table = getTableForIdentifiers(right_identifiers, data);
 
     return std::make_pair(left_idents_table, right_idents_table);
 }
diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h
index 54e008a114e..aa2fd80d07c 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.h
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.h
@@ -5,6 +5,7 @@
 #include <Interpreters/InDepthNodeVisitor.h>
 #include <Interpreters/DatabaseAndTableWithAlias.h>
 #include <Interpreters/Aliases.h>
+#include <Parsers/ASTTablesInSelectQuery.h>
 
 
 namespace DB
@@ -30,8 +31,11 @@ public:
         const TableWithColumnNamesAndTypes & right_table;
         const Aliases & aliases;
         const bool is_asof{false};
+        ASTTableJoin::Kind kind;
         ASTPtr asof_left_key{};
         ASTPtr asof_right_key{};
+        ASTPtr new_on_expression{};
+        ASTPtr new_where_conditions{};
         bool has_some{false};
 
         void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair<size_t, size_t> & table_no);
@@ -57,7 +61,7 @@ private:
     static void visit(const ASTFunction & func, const ASTPtr & ast, Data & data);
 
     static void getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out);
-    static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data);
+    static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data);
     static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases);
     static size_t getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data);
 };
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 69e09b36e64..98e4a87fba3 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -12,7 +12,7 @@
 #include <Common/Stopwatch.h>
 #include <Common/formatReadable.h>
 #include <Common/thread_local_rng.h>
-#include <Common/ZooKeeper/TestKeeperStorageDispatcher.h>
+#include <Coordination/NuKeeperStorageDispatcher.h>
 #include <Compression/ICompressionCodec.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <Formats/FormatFactory.h>
@@ -50,7 +50,6 @@
 #include <Interpreters/SystemLog.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/DDLWorker.h>
-#include <Common/DNSResolver.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/UncompressedCache.h>
 #include <Parsers/ASTCreateQuery.h>
@@ -65,6 +64,7 @@
 #include <Common/RemoteHostFilter.h>
 #include <Interpreters/DatabaseCatalog.h>
 #include <Storages/MergeTree/BackgroundJobsExecutor.h>
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
 
 
 namespace ProfileEvents
@@ -305,8 +305,10 @@ struct ContextShared
     mutable zkutil::ZooKeeperPtr zookeeper;                 /// Client for ZooKeeper.
     ConfigurationPtr zookeeper_config;                      /// Stores zookeeper configs
 
-    mutable std::mutex test_keeper_storage_dispatcher_mutex;
-    mutable std::shared_ptr<zkutil::TestKeeperStorageDispatcher> test_keeper_storage_dispatcher;
+#if USE_NURAFT
+    mutable std::mutex nu_keeper_storage_dispatcher_mutex;
+    mutable std::shared_ptr<NuKeeperStorageDispatcher> nu_keeper_storage_dispatcher;
+#endif
     mutable std::mutex auxiliary_zookeepers_mutex;
     mutable std::map<String, zkutil::ZooKeeperPtr> auxiliary_zookeepers;    /// Map for auxiliary ZooKeeper clients.
     ConfigurationPtr auxiliary_zookeepers_config;           /// Stores auxiliary zookeepers configs
@@ -331,6 +333,7 @@ struct ContextShared
     mutable std::optional<ExternalModelsLoader> external_models_loader;
     String default_profile_name;                            /// Default profile name used for default values.
     String system_profile_name;                             /// Profile used by system processes
+    String buffer_profile_name;                             /// Profile used by Buffer engine for flushing to the underlying
     AccessControlManager access_control_manager;
     mutable UncompressedCachePtr uncompressed_cache;        /// The cache of decompressed blocks.
     mutable MarkCachePtr mark_cache;                        /// Cache of marks in compressed files.
@@ -339,9 +342,11 @@ struct ContextShared
     ReplicatedFetchList replicated_fetch_list;
     ConfigurationPtr users_config;                          /// Config with the users, profiles and quotas sections.
     InterserverIOHandler interserver_io_handler;            /// Handler for interserver communication.
+
     mutable std::optional<BackgroundSchedulePool> buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
     mutable std::optional<BackgroundSchedulePool> schedule_pool;    /// A thread pool that can run different jobs in background (used in replicated tables)
     mutable std::optional<BackgroundSchedulePool> distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
+    mutable std::optional<BackgroundSchedulePool> message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka)
     MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
     std::unique_ptr<DDLWorker> ddl_worker;                  /// Process ddl commands from zk.
     /// Rules for selecting the compression settings, depending on the size of the part.
@@ -440,14 +445,14 @@ struct ContextShared
         buffer_flush_schedule_pool.reset();
         schedule_pool.reset();
         distributed_schedule_pool.reset();
+        message_broker_schedule_pool.reset();
         ddl_worker.reset();
 
         /// Stop trace collector if any
         trace_collector.reset();
         /// Stop zookeeper connection
         zookeeper.reset();
-        /// Stop test_keeper storage
-        test_keeper_storage_dispatcher.reset();
+
     }
 
     bool hasTraceCollector() const
@@ -1136,12 +1141,6 @@ String Context::getCurrentDatabase() const
 }
 
 
-String Context::getCurrentQueryId() const
-{
-    return client_info.current_query_id;
-}
-
-
 String Context::getInitialQueryId() const
 {
     return client_info.initial_query_id;
@@ -1301,6 +1300,13 @@ Context & Context::getGlobalContext()
     return *global_context;
 }
 
+const Context & Context::getBufferContext() const
+{
+    if (!buffer_context)
+        throw Exception("Logical error: there is no buffer context", ErrorCodes::LOGICAL_ERROR);
+    return *buffer_context;
+}
+
 
 const EmbeddedDictionaries & Context::getEmbeddedDictionaries() const
 {
@@ -1526,6 +1532,17 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() const
     return *shared->distributed_schedule_pool;
 }
 
+BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const
+{
+    auto lock = getLock();
+    if (!shared->message_broker_schedule_pool)
+        shared->message_broker_schedule_pool.emplace(
+            settings.background_message_broker_schedule_pool_size,
+            CurrentMetrics::BackgroundDistributedSchedulePoolTask,
+            "BgMsgBrkSchPool");
+    return *shared->message_broker_schedule_pool;
+}
+
 bool Context::hasDistributedDDL() const
 {
     return getConfigRef().has("distributed_ddl");
@@ -1536,6 +1553,7 @@ void Context::setDDLWorker(std::unique_ptr<DDLWorker> ddl_worker)
     auto lock = getLock();
     if (shared->ddl_worker)
         throw Exception("DDL background thread has already been initialized", ErrorCodes::LOGICAL_ERROR);
+    ddl_worker->startup();
     shared->ddl_worker = std::move(ddl_worker);
 }
 
@@ -1568,15 +1586,48 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const
     return shared->zookeeper;
 }
 
-std::shared_ptr<zkutil::TestKeeperStorageDispatcher> & Context::getTestKeeperStorageDispatcher() const
-{
-    std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex);
-    if (!shared->test_keeper_storage_dispatcher)
-        shared->test_keeper_storage_dispatcher = std::make_shared<zkutil::TestKeeperStorageDispatcher>();
 
-    return shared->test_keeper_storage_dispatcher;
+void Context::initializeNuKeeperStorageDispatcher() const
+{
+#if USE_NURAFT
+    std::lock_guard lock(shared->nu_keeper_storage_dispatcher_mutex);
+
+    if (shared->nu_keeper_storage_dispatcher)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize NuKeeper multiple times");
+
+    const auto & config = getConfigRef();
+    if (config.has("test_keeper_server"))
+    {
+        shared->nu_keeper_storage_dispatcher = std::make_shared<NuKeeperStorageDispatcher>();
+        shared->nu_keeper_storage_dispatcher->initialize(config);
+    }
+#endif
 }
 
+#if USE_NURAFT
+std::shared_ptr<NuKeeperStorageDispatcher> & Context::getNuKeeperStorageDispatcher() const
+{
+    std::lock_guard lock(shared->nu_keeper_storage_dispatcher_mutex);
+    if (!shared->nu_keeper_storage_dispatcher)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "NuKeeper must be initialized before requests");
+
+    return shared->nu_keeper_storage_dispatcher;
+}
+#endif
+
+void Context::shutdownNuKeeperStorageDispatcher() const
+{
+#if USE_NURAFT
+    std::lock_guard lock(shared->nu_keeper_storage_dispatcher_mutex);
+    if (shared->nu_keeper_storage_dispatcher)
+    {
+        shared->nu_keeper_storage_dispatcher->shutdown();
+        shared->nu_keeper_storage_dispatcher.reset();
+    }
+#endif
+}
+
+
 zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const
 {
     std::lock_guard lock(shared->auxiliary_zookeepers_mutex);
@@ -2212,6 +2263,10 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi
 
     shared->system_profile_name = config.getString("system_profile", shared->default_profile_name);
     setProfile(shared->system_profile_name);
+
+    shared->buffer_profile_name = config.getString("buffer_profile", shared->system_profile_name);
+    buffer_context = std::make_shared<Context>(*this);
+    buffer_context->setProfile(shared->buffer_profile_name);
 }
 
 String Context::getDefaultProfileName() const
@@ -2498,4 +2553,35 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w
     return StorageID::createEmpty();
 }
 
+void Context::initZooKeeperMetadataTransaction(ZooKeeperMetadataTransactionPtr txn, [[maybe_unused]] bool attach_existing)
+{
+    assert(!metadata_transaction);
+    assert(attach_existing || query_context == this);
+    metadata_transaction = std::move(txn);
+}
+
+ZooKeeperMetadataTransactionPtr Context::getZooKeeperMetadataTransaction() const
+{
+    assert(!metadata_transaction || hasQueryContext());
+    return metadata_transaction;
+}
+
+PartUUIDsPtr Context::getPartUUIDs()
+{
+    auto lock = getLock();
+    if (!part_uuids)
+        part_uuids = std::make_shared<PartUUIDs>();
+
+    return part_uuids;
+}
+
+PartUUIDsPtr Context::getIgnoredPartUUIDs()
+{
+    auto lock = getLock();
+    if (!ignored_part_uuids)
+        ignored_part_uuids = std::make_shared<PartUUIDs>();
+
+    return ignored_part_uuids;
+}
+
 }
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 8e15d0a4fed..563fb172488 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -40,7 +40,6 @@ namespace Poco
 namespace zkutil
 {
     class ZooKeeper;
-    class TestKeeperStorageDispatcher;
 }
 
 
@@ -102,11 +101,14 @@ using DiskPtr = std::shared_ptr<IDisk>;
 class DiskSelector;
 using DiskSelectorPtr = std::shared_ptr<const DiskSelector>;
 using DisksMap = std::map<String, DiskPtr>;
-class StoragePolicy;
-using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
+class IStoragePolicy;
+using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
 using StoragePoliciesMap = std::map<String, StoragePolicyPtr>;
 class StoragePolicySelector;
 using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
+struct PartUUIDs;
+using PartUUIDsPtr = std::shared_ptr<PartUUIDs>;
+class NuKeeperStorageDispatcher;
 
 class IOutputFormat;
 using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
@@ -115,6 +117,8 @@ using VolumePtr = std::shared_ptr<IVolume>;
 struct NamedSession;
 struct BackgroundTaskSchedulingSettings;
 
+class ZooKeeperMetadataTransaction;
+using ZooKeeperMetadataTransactionPtr = std::shared_ptr<ZooKeeperMetadataTransaction>;
 
 #if USE_EMBEDDED_COMPILER
 class CompiledExpressionCache;
@@ -252,6 +256,7 @@ private:
     Context * query_context = nullptr;
     Context * session_context = nullptr;    /// Session context or nullptr. Could be equal to this.
     Context * global_context = nullptr;     /// Global context. Could be equal to this.
+    std::shared_ptr<Context> buffer_context;/// Buffer context. Could be equal to this.
 
 public:
     // Top-level OpenTelemetry trace context for the query. Makes sense only for
@@ -264,6 +269,9 @@ private:
     using SampleBlockCache = std::unordered_map<std::string, Block>;
     mutable SampleBlockCache sample_block_cache;
 
+    PartUUIDsPtr part_uuids; /// set of parts' uuids, is used for query parts deduplication
+    PartUUIDsPtr ignored_part_uuids; /// set of parts' uuids are meant to be excluded from query processing
+
     NameToNameMap query_parameters;   /// Dictionary with query parameters for prepared statements.
                                                      /// (key=name, value)
 
@@ -273,6 +281,12 @@ private:
                                    /// to be customized in HTTP and TCP servers by overloading the customizeContext(DB::Context&)
                                    /// methods.
 
+    ZooKeeperMetadataTransactionPtr metadata_transaction;    /// Distributed DDL context. I'm not sure if it's a suitable place for this,
+                                                    /// but it's the easiest way to pass this through the whole stack from executeQuery(...)
+                                                    /// to DatabaseOnDisk::commitCreateTable(...) or IStorage::alter(...) without changing
+                                                    /// thousands of signatures.
+                                                    /// And I hope it will be replaced with more common Transaction sometime.
+
     /// Use copy constructor or createGlobal() instead
     Context();
 
@@ -436,7 +450,7 @@ public:
     StoragePtr getViewSource();
 
     String getCurrentDatabase() const;
-    String getCurrentQueryId() const;
+    String getCurrentQueryId() const { return client_info.current_query_id; }
 
     /// Id of initiating query for distributed queries; or current query id if it's not a distributed query.
     String getInitialQueryId() const;
@@ -528,6 +542,7 @@ public:
     const Context & getQueryContext() const;
     Context & getQueryContext();
     bool hasQueryContext() const { return query_context != nullptr; }
+    bool isInternalSubquery() const { return hasQueryContext() && query_context != this; }
 
     const Context & getSessionContext() const;
     Context & getSessionContext();
@@ -537,6 +552,8 @@ public:
     Context & getGlobalContext();
     bool hasGlobalContext() const { return global_context != nullptr; }
 
+    const Context & getBufferContext() const;
+
     void setQueryContext(Context & context_) { query_context = &context_; }
     void setSessionContext(Context & context_) { session_context = &context_; }
 
@@ -573,8 +590,11 @@ public:
     /// Same as above but return a zookeeper connection from auxiliary_zookeepers configuration entry.
     std::shared_ptr<zkutil::ZooKeeper> getAuxiliaryZooKeeper(const String & name) const;
 
-
-    std::shared_ptr<zkutil::TestKeeperStorageDispatcher> & getTestKeeperStorageDispatcher() const;
+#if USE_NURAFT
+    std::shared_ptr<NuKeeperStorageDispatcher> & getNuKeeperStorageDispatcher() const;
+#endif
+    void initializeNuKeeperStorageDispatcher() const;
+    void shutdownNuKeeperStorageDispatcher() const;
 
     /// Set auxiliary zookeepers configuration at server starting or configuration reloading.
     void reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config);
@@ -611,6 +631,7 @@ public:
 
     BackgroundSchedulePool & getBufferFlushSchedulePool() const;
     BackgroundSchedulePool & getSchedulePool() const;
+    BackgroundSchedulePool & getMessageBrokerSchedulePool() const;
     BackgroundSchedulePool & getDistributedSchedulePool() const;
 
     /// Has distributed_ddl configuration or not.
@@ -725,6 +746,11 @@ public:
     IHostContextPtr & getHostContext();
     const IHostContextPtr & getHostContext() const;
 
+    /// Initialize context of distributed DDL query with Replicated database.
+    void initZooKeeperMetadataTransaction(ZooKeeperMetadataTransactionPtr txn, bool attach_existing = false);
+    /// Returns context of current distributed DDL query or nullptr.
+    ZooKeeperMetadataTransactionPtr getZooKeeperMetadataTransaction() const;
+
     struct MySQLWireContext
     {
         uint8_t sequence_id = 0;
@@ -733,6 +759,9 @@ public:
     };
 
     MySQLWireContext mysql;
+
+    PartUUIDsPtr getPartUUIDs();
+    PartUUIDsPtr getIgnoredPartUUIDs();
 private:
     std::unique_lock<std::recursive_mutex> getLock() const;
 
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
new file mode 100644
index 00000000000..4be465d3de4
--- /dev/null
+++ b/src/Interpreters/DDLTask.cpp
@@ -0,0 +1,344 @@
+#include <Interpreters/DDLTask.h>
+#include <Common/DNSResolver.h>
+#include <Common/isLocalAddress.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <IO/Operators.h>
+#include <IO/ReadBufferFromString.h>
+#include <Poco/Net/NetException.h>
+#include <common/logger_useful.h>
+#include <Parsers/ParserQuery.h>
+#include <Parsers/parseQuery.h>
+#include <Parsers/ASTQueryWithOnCluster.h>
+#include <Parsers/ASTQueryWithTableAndOutput.h>
+#include <Databases/DatabaseReplicated.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_FORMAT_VERSION;
+    extern const int UNKNOWN_TYPE_OF_QUERY;
+    extern const int INCONSISTENT_CLUSTER_DEFINITION;
+}
+
+HostID HostID::fromString(const String & host_port_str)
+{
+    HostID res;
+    std::tie(res.host_name, res.port) = Cluster::Address::fromString(host_port_str);
+    return res;
+}
+
+bool HostID::isLocalAddress(UInt16 clickhouse_port) const
+{
+    try
+    {
+        return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port);
+    }
+    catch (const Poco::Net::NetException &)
+    {
+        /// Avoid "Host not found" exceptions
+        return false;
+    }
+}
+
+
+String DDLLogEntry::toString() const
+{
+    WriteBufferFromOwnString wb;
+
+    Strings host_id_strings(hosts.size());
+    std::transform(hosts.begin(), hosts.end(), host_id_strings.begin(), HostID::applyToString);
+
+    auto version = CURRENT_VERSION;
+    wb << "version: " << version << "\n";
+    wb << "query: " << escape << query << "\n";
+    wb << "hosts: " << host_id_strings << "\n";
+    wb << "initiator: " << initiator << "\n";
+
+    return wb.str();
+}
+
+void DDLLogEntry::parse(const String & data)
+{
+    ReadBufferFromString rb(data);
+
+    int version;
+    rb >> "version: " >> version >> "\n";
+
+    if (version != CURRENT_VERSION)
+        throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}", version);
+
+    Strings host_id_strings;
+    rb >> "query: " >> escape >> query >> "\n";
+    rb >> "hosts: " >> host_id_strings >> "\n";
+
+    if (!rb.eof())
+        rb >> "initiator: " >> initiator >> "\n";
+    else
+        initiator.clear();
+
+    assertEOF(rb);
+
+    hosts.resize(host_id_strings.size());
+    std::transform(host_id_strings.begin(), host_id_strings.end(), hosts.begin(), HostID::fromString);
+}
+
+
+void DDLTaskBase::parseQueryFromEntry(const Context & context)
+{
+    const char * begin = entry.query.data();
+    const char * end = begin + entry.query.size();
+
+    ParserQuery parser_query(end);
+    String description;
+    query = parseQuery(parser_query, begin, end, description, 0, context.getSettingsRef().max_parser_depth);
+}
+
+std::unique_ptr<Context> DDLTaskBase::makeQueryContext(Context & from_context, const ZooKeeperPtr & /*zookeeper*/)
+{
+    auto query_context = std::make_unique<Context>(from_context);
+    query_context->makeQueryContext();
+    query_context->setCurrentQueryId(""); // generate random query_id
+    query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+    return query_context;
+}
+
+
+bool DDLTask::findCurrentHostID(const Context & global_context, Poco::Logger * log)
+{
+    bool host_in_hostlist = false;
+
+    for (const HostID & host : entry.hosts)
+    {
+        auto maybe_secure_port = global_context.getTCPPortSecure();
+
+        /// The port is considered local if it matches TCP or TCP secure port that the server is listening.
+        bool is_local_port = (maybe_secure_port && host.isLocalAddress(*maybe_secure_port))
+                             || host.isLocalAddress(global_context.getTCPPort());
+
+        if (!is_local_port)
+            continue;
+
+        if (host_in_hostlist)
+        {
+            /// This check could be slow a little bit
+            LOG_WARNING(log, "There are two the same ClickHouse instances in task {}: {} and {}. Will use the first one only.",
+                             entry_name, host_id.readableString(), host.readableString());
+        }
+        else
+        {
+            host_in_hostlist = true;
+            host_id = host;
+            host_id_str = host.toString();
+        }
+    }
+
+    return host_in_hostlist;
+}
+
+void DDLTask::setClusterInfo(const Context & context, Poco::Logger * log)
+{
+    auto * query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(query.get());
+    if (!query_on_cluster)
+        throw Exception("Received unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);
+
+    cluster_name = query_on_cluster->cluster;
+    cluster = context.tryGetCluster(cluster_name);
+
+    if (!cluster)
+        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
+                        "DDL task {} contains current host {} in cluster {}, but there are no such cluster here.",
+                        entry_name, host_id.readableString(), cluster_name);
+
+    /// Try to find host from task host list in cluster
+    /// At the first, try find exact match (host name and ports should be literally equal)
+    /// If the attempt fails, try find it resolving host name of each instance
+
+    if (!tryFindHostInCluster())
+    {
+        LOG_WARNING(log, "Not found the exact match of host {} from task {} in cluster {} definition. Will try to find it using host name resolving.",
+                         host_id.readableString(), entry_name, cluster_name);
+
+        if (!tryFindHostInClusterViaResolving(context))
+            throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION, "Not found host {} in definition of cluster {}",
+                                                                 host_id.readableString(), cluster_name);
+
+        LOG_INFO(log, "Resolved host {} from task {} as host {} in definition of cluster {}",
+                 host_id.readableString(), entry_name, address_in_cluster.readableString(), cluster_name);
+    }
+
+    query = query_on_cluster->getRewrittenASTWithoutOnCluster(address_in_cluster.default_database);
+    query_on_cluster = nullptr;
+}
+
+bool DDLTask::tryFindHostInCluster()
+{
+    const auto & shards = cluster->getShardsAddresses();
+    bool found_exact_match = false;
+    String default_database;
+
+    for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
+    {
+        for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
+        {
+            const Cluster::Address & address = shards[shard_num][replica_num];
+
+            if (address.host_name == host_id.host_name && address.port == host_id.port)
+            {
+                if (found_exact_match)
+                {
+                    if (default_database == address.default_database)
+                    {
+                        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
+                                        "There are two exactly the same ClickHouse instances {} in cluster {}",
+                                        address.readableString(), cluster_name);
+                    }
+                    else
+                    {
+                        /* Circular replication is used.
+                         * It is when every physical node contains
+                         * replicas of different shards of the same table.
+                         * To distinguish one replica from another on the same node,
+                         * every shard is placed into separate database.
+                         * */
+                        is_circular_replicated = true;
+                        auto * query_with_table = dynamic_cast<ASTQueryWithTableAndOutput *>(query.get());
+                        if (!query_with_table || query_with_table->database.empty())
+                        {
+                            throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
+                                            "For a distributed DDL on circular replicated cluster its table name must be qualified by database name.");
+                        }
+                        if (default_database == query_with_table->database)
+                            return true;
+                    }
+                }
+                found_exact_match = true;
+                host_shard_num = shard_num;
+                host_replica_num = replica_num;
+                address_in_cluster = address;
+                default_database = address.default_database;
+            }
+        }
+    }
+
+    return found_exact_match;
+}
+
+bool DDLTask::tryFindHostInClusterViaResolving(const Context & context)
+{
+    const auto & shards = cluster->getShardsAddresses();
+    bool found_via_resolving = false;
+
+    for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
+    {
+        for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
+        {
+            const Cluster::Address & address = shards[shard_num][replica_num];
+
+            if (auto resolved = address.getResolvedAddress();
+                resolved && (isLocalAddress(*resolved, context.getTCPPort())
+                             || (context.getTCPPortSecure() && isLocalAddress(*resolved, *context.getTCPPortSecure()))))
+            {
+                if (found_via_resolving)
+                {
+                    throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
+                                    "There are two the same ClickHouse instances in cluster {} : {} and {}",
+                                    cluster_name, address_in_cluster.readableString(), address.readableString());
+                }
+                else
+                {
+                    found_via_resolving = true;
+                    host_shard_num = shard_num;
+                    host_replica_num = replica_num;
+                    address_in_cluster = address;
+                }
+            }
+        }
+    }
+
+    return found_via_resolving;
+}
+
+String DDLTask::getShardID() const
+{
+    /// Generate unique name for shard node, it will be used to execute the query by only single host
+    /// Shard node name has format 'replica_name1,replica_name2,...,replica_nameN'
+    /// Where replica_name is 'replica_config_host_name:replica_port'
+
+    auto shard_addresses = cluster->getShardsAddresses().at(host_shard_num);
+
+    Strings replica_names;
+    for (const Cluster::Address & address : shard_addresses)
+        replica_names.emplace_back(address.readableString());
+    std::sort(replica_names.begin(), replica_names.end());
+
+    String res;
+    for (auto it = replica_names.begin(); it != replica_names.end(); ++it)
+        res += *it + (std::next(it) != replica_names.end() ? "," : "");
+
+    return res;
+}
+
+DatabaseReplicatedTask::DatabaseReplicatedTask(const String & name, const String & path, DatabaseReplicated * database_)
+    : DDLTaskBase(name, path)
+    , database(database_)
+{
+    host_id_str = database->getFullReplicaName();
+}
+
+String DatabaseReplicatedTask::getShardID() const
+{
+    return database->shard_name;
+}
+
+std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from_context, const ZooKeeperPtr & zookeeper)
+{
+    auto query_context = DDLTaskBase::makeQueryContext(from_context, zookeeper);
+    query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+    query_context->setCurrentDatabase(database->getDatabaseName());
+
+    auto txn = std::make_shared<ZooKeeperMetadataTransaction>(zookeeper, database->zookeeper_path, is_initial_query);
+    query_context->initZooKeeperMetadataTransaction(txn);
+
+    if (is_initial_query)
+    {
+        txn->addOp(zkutil::makeRemoveRequest(entry_path + "/try", -1));
+        txn->addOp(zkutil::makeCreateRequest(entry_path + "/committed", host_id_str, zkutil::CreateMode::Persistent));
+        txn->addOp(zkutil::makeSetRequest(database->zookeeper_path + "/max_log_ptr", toString(getLogEntryNumber(entry_name)), -1));
+    }
+
+    txn->addOp(zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1));
+
+    for (auto & op : ops)
+        txn->addOp(std::move(op));
+    ops.clear();
+
+    return query_context;
+}
+
+String DDLTaskBase::getLogEntryName(UInt32 log_entry_number)
+{
+    constexpr size_t seq_node_digits = 10;
+    String number = toString(log_entry_number);
+    String name = "query-" + String(seq_node_digits - number.size(), '0') + number;
+    return name;
+}
+
+UInt32 DDLTaskBase::getLogEntryNumber(const String & log_entry_name)
+{
+    constexpr const char * name = "query-";
+    assert(startsWith(log_entry_name, name));
+    return parse<UInt32>(log_entry_name.substr(strlen(name)));
+}
+
+void ZooKeeperMetadataTransaction::commit()
+{
+    assert(state == CREATED);
+    state = FAILED;
+    current_zookeeper->multi(ops);
+    state = COMMITTED;
+}
+
+}
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
new file mode 100644
index 00000000000..18c1f4c80cd
--- /dev/null
+++ b/src/Interpreters/DDLTask.h
@@ -0,0 +1,195 @@
+#pragma once
+#include <Core/Types.h>
+#include <Interpreters/Cluster.h>
+#include <Common/ZooKeeper/Types.h>
+
+namespace Poco
+{
+class Logger;
+}
+
+namespace zkutil
+{
+class ZooKeeper;
+}
+
+namespace DB
+{
+
+class ASTQueryWithOnCluster;
+using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
+class DatabaseReplicated;
+
+class ZooKeeperMetadataTransaction;
+using ZooKeeperMetadataTransactionPtr = std::shared_ptr<ZooKeeperMetadataTransaction>;
+
+struct HostID
+{
+    String host_name;
+    UInt16 port;
+
+    HostID() = default;
+
+    explicit HostID(const Cluster::Address & address)
+        : host_name(address.host_name), port(address.port) {}
+
+    static HostID fromString(const String & host_port_str);
+
+    String toString() const
+    {
+        return Cluster::Address::toString(host_name, port);
+    }
+
+    String readableString() const
+    {
+        return host_name + ":" + DB::toString(port);
+    }
+
+    bool isLocalAddress(UInt16 clickhouse_port) const;
+
+    static String applyToString(const HostID & host_id)
+    {
+        return host_id.toString();
+    }
+};
+
+
+struct DDLLogEntry
+{
+    String query;
+    std::vector<HostID> hosts;
+    String initiator; // optional
+
+    static constexpr int CURRENT_VERSION = 1;
+
+    String toString() const;
+
+    void parse(const String & data);
+};
+
+struct DDLTaskBase
+{
+    const String entry_name;
+    const String entry_path;
+
+    DDLLogEntry entry;
+
+    String host_id_str;
+    ASTPtr query;
+
+    bool is_initial_query = false;
+    bool is_circular_replicated = false;
+    bool execute_on_leader = false;
+
+    Coordination::Requests ops;
+    ExecutionStatus execution_status;
+    bool was_executed = false;
+
+    std::atomic_bool completely_processed = false;
+
+    DDLTaskBase(const String & name, const String & path) : entry_name(name), entry_path(path) {}
+    DDLTaskBase(const DDLTaskBase &) = delete;
+    virtual ~DDLTaskBase() = default;
+
+    void parseQueryFromEntry(const Context & context);
+
+    virtual String getShardID() const = 0;
+
+    virtual std::unique_ptr<Context> makeQueryContext(Context & from_context, const ZooKeeperPtr & zookeeper);
+
+    inline String getActiveNodePath() const { return entry_path + "/active/" + host_id_str; }
+    inline String getFinishedNodePath() const { return entry_path + "/finished/" + host_id_str; }
+    inline String getShardNodePath() const { return entry_path + "/shards/" + getShardID(); }
+
+    static String getLogEntryName(UInt32 log_entry_number);
+    static UInt32 getLogEntryNumber(const String & log_entry_name);
+};
+
+struct DDLTask : public DDLTaskBase
+{
+    DDLTask(const String & name, const String & path) : DDLTaskBase(name, path) {}
+
+    bool findCurrentHostID(const Context & global_context, Poco::Logger * log);
+
+    void setClusterInfo(const Context & context, Poco::Logger * log);
+
+    String getShardID() const override;
+
+private:
+    bool tryFindHostInCluster();
+    bool tryFindHostInClusterViaResolving(const Context & context);
+
+    HostID host_id;
+    String cluster_name;
+    ClusterPtr cluster;
+    Cluster::Address address_in_cluster;
+    size_t host_shard_num;
+    size_t host_replica_num;
+};
+
+struct DatabaseReplicatedTask : public DDLTaskBase
+{
+    DatabaseReplicatedTask(const String & name, const String & path, DatabaseReplicated * database_);
+
+    String getShardID() const override;
+    std::unique_ptr<Context> makeQueryContext(Context & from_context, const ZooKeeperPtr & zookeeper) override;
+
+    DatabaseReplicated * database;
+};
+
+/// The main purpose of ZooKeeperMetadataTransaction is to execute all zookeeper operation related to query
+/// in a single transaction when we performed all required checks and ready to "commit" changes.
+/// For example, create ALTER_METADATA entry in ReplicatedMergeTree log,
+/// create path/to/entry/finished/host_id node in distributed DDL queue to mark query as executed and
+/// update metadata in path/to/replicated_database/metadata/table_name
+/// It's used for DatabaseReplicated.
+/// TODO we can also use it for ordinary ON CLUSTER queries
+class ZooKeeperMetadataTransaction
+{
+    enum State
+    {
+        CREATED,
+        COMMITTED,
+        FAILED
+    };
+
+    State state = CREATED;
+    ZooKeeperPtr current_zookeeper;
+    String zookeeper_path;
+    bool is_initial_query;
+    Coordination::Requests ops;
+
+public:
+    ZooKeeperMetadataTransaction(const ZooKeeperPtr & current_zookeeper_, const String & zookeeper_path_, bool is_initial_query_)
+    : current_zookeeper(current_zookeeper_)
+    , zookeeper_path(zookeeper_path_)
+    , is_initial_query(is_initial_query_)
+    {
+    }
+
+    bool isInitialQuery() const { return is_initial_query; }
+
+    bool isExecuted() const { return state != CREATED; }
+
+    String getDatabaseZooKeeperPath() const { return zookeeper_path; }
+
+    void addOp(Coordination::RequestPtr && op)
+    {
+        assert(!isExecuted());
+        ops.emplace_back(op);
+    }
+
+    void moveOpsTo(Coordination::Requests & other_ops)
+    {
+        assert(!isExecuted());
+        std::move(ops.begin(), ops.end(), std::back_inserter(other_ops));
+        ops.clear();
+        state = COMMITTED;
+    }
+
+    void commit();
+
+    ~ZooKeeperMetadataTransaction() { assert(isExecuted() || std::uncaught_exception()); }
+};
+
+}
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index b1d9f872daa..67f716c235c 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -1,6 +1,7 @@
 #include <filesystem>
 
 #include <Interpreters/DDLWorker.h>
+#include <Interpreters/DDLTask.h>
 #include <Parsers/ASTAlterQuery.h>
 #include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTOptimizeQuery.h>
@@ -11,129 +12,43 @@
 #include <Parsers/queryToString.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
-#include <IO/Operators.h>
 #include <IO/ReadBufferFromString.h>
-#include <DataStreams/IBlockInputStream.h>
+#include <Storages/IStorage.h>
 #include <Interpreters/executeQuery.h>
 #include <Interpreters/Cluster.h>
-#include <Interpreters/AddDefaultDatabaseVisitor.h>
 #include <Interpreters/Context.h>
-#include <Access/AccessRightsElement.h>
-#include <Access/ContextAccess.h>
-#include <Common/Macros.h>
 #include <Common/setThreadName.h>
-#include <Common/Stopwatch.h>
 #include <Common/randomSeed.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/isLocalAddress.h>
-#include <Common/quoteString.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Poco/Timestamp.h>
 #include <common/sleep.h>
 #include <common/getFQDNOrHostName.h>
+#include <common/logger_useful.h>
+#include <random>
 #include <pcg_random.hpp>
 
 namespace fs = std::filesystem;
 
-namespace CurrentMetrics
-{
-    extern const Metric MaxDDLEntryID;
-}
 
 namespace DB
 {
 
 namespace ErrorCodes
 {
-    extern const int NOT_IMPLEMENTED;
     extern const int LOGICAL_ERROR;
-    extern const int UNKNOWN_FORMAT_VERSION;
-    extern const int INCONSISTENT_CLUSTER_DEFINITION;
     extern const int TIMEOUT_EXCEEDED;
-    extern const int UNKNOWN_TYPE_OF_QUERY;
     extern const int UNFINISHED;
-    extern const int QUERY_IS_PROHIBITED;
+    extern const int NOT_A_LEADER;
+    extern const int KEEPER_EXCEPTION;
+    extern const int CANNOT_ASSIGN_ALTER;
+    extern const int CANNOT_ALLOCATE_MEMORY;
+    extern const int MEMORY_LIMIT_EXCEEDED;
 }
 
 
-String DDLLogEntry::toString()
-{
-    WriteBufferFromOwnString wb;
-
-    Strings host_id_strings(hosts.size());
-    std::transform(hosts.begin(), hosts.end(), host_id_strings.begin(), HostID::applyToString);
-
-    auto version = CURRENT_VERSION;
-    wb << "version: " << version << "\n";
-    wb << "query: " << escape << query << "\n";
-    wb << "hosts: " << host_id_strings << "\n";
-    wb << "initiator: " << initiator << "\n";
-
-    return wb.str();
-}
-
-void DDLLogEntry::parse(const String & data)
-{
-    ReadBufferFromString rb(data);
-
-    int version;
-    rb >> "version: " >> version >> "\n";
-
-    if (version != CURRENT_VERSION)
-        throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}", version);
-
-    Strings host_id_strings;
-    rb >> "query: " >> escape >> query >> "\n";
-    rb >> "hosts: " >> host_id_strings >> "\n";
-
-    if (!rb.eof())
-        rb >> "initiator: " >> initiator >> "\n";
-    else
-        initiator.clear();
-
-    assertEOF(rb);
-
-    hosts.resize(host_id_strings.size());
-    std::transform(host_id_strings.begin(), host_id_strings.end(), hosts.begin(), HostID::fromString);
-}
-
-
-struct DDLTask
-{
-    /// Stages of task lifetime correspond ordering of these data fields:
-
-    /// Stage 1: parse entry
-    String entry_name;
-    String entry_path;
-    DDLLogEntry entry;
-
-    /// Stage 2: resolve host_id and check that
-    HostID host_id;
-    String host_id_str;
-
-    /// Stage 3.1: parse query
-    ASTPtr query;
-    ASTQueryWithOnCluster * query_on_cluster = nullptr;
-
-    /// Stage 3.2: check cluster and find the host in cluster
-    String cluster_name;
-    ClusterPtr cluster;
-    Cluster::Address address_in_cluster;
-    size_t host_shard_num;
-    size_t host_replica_num;
-    bool is_circular_replicated = false;
-
-    /// Stage 3.3: execute query
-    ExecutionStatus execution_status;
-    bool was_executed = false;
-
-    /// Stage 4: commit results to ZooKeeper
-};
-
-
 namespace
 {
 
@@ -232,29 +147,22 @@ std::unique_ptr<ZooKeeperLock> createSimpleZooKeeperLock(
 }
 
 
-static bool isSupportedAlterType(int type)
-{
-    static const std::unordered_set<int> unsupported_alter_types{
-        ASTAlterCommand::ATTACH_PARTITION,
-        ASTAlterCommand::REPLACE_PARTITION,
-        ASTAlterCommand::FETCH_PARTITION,
-        ASTAlterCommand::FREEZE_PARTITION,
-        ASTAlterCommand::FREEZE_ALL,
-        ASTAlterCommand::NO_TYPE,
-    };
-
-    return unsupported_alter_types.count(type) == 0;
-}
-
-
-DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix)
+DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, const Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
+                     const String & logger_name, const CurrentMetrics::Metric * max_entry_metric_)
     : context(context_)
-    , log(&Poco::Logger::get("DDLWorker"))
+    , log(&Poco::Logger::get(logger_name))
     , pool_size(pool_size_)
-    , worker_pool(std::make_unique<ThreadPool>(pool_size))
+    , max_entry_metric(max_entry_metric_)
 {
-    CurrentMetrics::set(CurrentMetrics::MaxDDLEntryID, 0);
-    last_tasks.reserve(pool_size);
+    if (max_entry_metric)
+        CurrentMetrics::set(*max_entry_metric, 0);
+
+    if (1 < pool_size)
+    {
+        LOG_WARNING(log, "DDLWorker is configured to use multiple threads. "
+                         "It's not recommended because queries can be reordered. Also it may cause some unknown issues to appear.");
+        worker_pool = std::make_unique<ThreadPool>(pool_size);
+    }
 
     queue_dir = zk_root_dir;
     if (queue_dir.back() == '/')
@@ -277,30 +185,40 @@ DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, Context &
 
     host_fqdn = getFQDNOrHostName();
     host_fqdn_id = Cluster::Address::toString(host_fqdn, context.getTCPPort());
+}
 
+void DDLWorker::startup()
+{
     main_thread = ThreadFromGlobalPool(&DDLWorker::runMainThread, this);
     cleanup_thread = ThreadFromGlobalPool(&DDLWorker::runCleanupThread, this);
 }
 
+void DDLWorker::shutdown()
+{
+    bool prev_stop_flag = stop_flag.exchange(true);
+    if (!prev_stop_flag)
+    {
+        queue_updated_event->set();
+        cleanup_event->set();
+        main_thread.join();
+        cleanup_thread.join();
+        worker_pool.reset();
+    }
+}
 
 DDLWorker::~DDLWorker()
 {
-    stop_flag = true;
-    queue_updated_event->set();
-    cleanup_event->set();
-    worker_pool.reset();
-    main_thread.join();
-    cleanup_thread.join();
+    DDLWorker::shutdown();
 }
 
 
-DDLWorker::ZooKeeperPtr DDLWorker::tryGetZooKeeper() const
+ZooKeeperPtr DDLWorker::tryGetZooKeeper() const
 {
     std::lock_guard lock(zookeeper_mutex);
     return current_zookeeper;
 }
 
-DDLWorker::ZooKeeperPtr DDLWorker::getAndSetZooKeeper()
+ZooKeeperPtr DDLWorker::getAndSetZooKeeper()
 {
     std::lock_guard lock(zookeeper_mutex);
 
@@ -310,31 +228,14 @@ DDLWorker::ZooKeeperPtr DDLWorker::getAndSetZooKeeper()
     return current_zookeeper;
 }
 
-void DDLWorker::recoverZooKeeper()
-{
-    LOG_DEBUG(log, "Recovering ZooKeeper session after: {}", getCurrentExceptionMessage(false));
-
-    while (!stop_flag)
-    {
-        try
-        {
-            getAndSetZooKeeper();
-            break;
-        }
-        catch (...)
-        {
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-            sleepForSeconds(5);
-        }
-    }
-}
-
 
 DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper)
 {
     String node_data;
     String entry_path = fs::path(queue_dir) / entry_name;
 
+    auto task = std::make_unique<DDLTask>(entry_name, entry_path);
+
     if (!zookeeper->tryGet(entry_path, node_data))
     {
         /// It is Ok that node could be deleted just now. It means that there are no current host in node's host list.
@@ -342,12 +243,16 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
         return {};
     }
 
-    auto task = std::make_unique<DDLTask>();
-    task->entry_name = entry_name;
-    task->entry_path = entry_path;
+    auto write_error_status = [&](const String & host_id, const String & error_message, const String & reason)
+    {
+        LOG_ERROR(log, "Cannot parse DDL task {}: {}. Will try to send error status: {}", entry_name, reason, error_message);
+        createStatusDirs(entry_path, zookeeper);
+        zookeeper->tryCreate(fs::path(entry_path) / "finished" / host_id, error_message, zkutil::CreateMode::Persistent);
+    };
 
     try
     {
+        /// Stage 1: parse entry
         task->entry.parse(node_data);
     }
     catch (...)
@@ -355,55 +260,41 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
         /// What should we do if we even cannot parse host name and therefore cannot properly submit execution status?
         /// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
         /// Otherwise, that node will be ignored by DDLQueryStatusInputStream.
-
-        tryLogCurrentException(log, "Cannot parse DDL task " + entry_name + ", will try to send error status");
-
-        String status = ExecutionStatus::fromCurrentException().serializeText();
-        try
-        {
-            createStatusDirs(entry_path, zookeeper);
-            zookeeper->tryCreate(fs::path(entry_path) / "finished" / host_fqdn_id, status, zkutil::CreateMode::Persistent);
-        }
-        catch (...)
-        {
-            tryLogCurrentException(log, "Can't report the task has invalid format");
-        }
-
         out_reason = "Incorrect task format";
+        write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
         return {};
     }
 
-    bool host_in_hostlist = false;
-    for (const HostID & host : task->entry.hosts)
-    {
-        auto maybe_secure_port = context.getTCPPortSecure();
-
-        /// The port is considered local if it matches TCP or TCP secure port that the server is listening.
-        bool is_local_port = (maybe_secure_port && host.isLocalAddress(*maybe_secure_port))
-            || host.isLocalAddress(context.getTCPPort());
-
-        if (!is_local_port)
-            continue;
-
-        if (host_in_hostlist)
-        {
-            /// This check could be slow a little bit
-            LOG_WARNING(log, "There are two the same ClickHouse instances in task {}: {} and {}. Will use the first one only.", entry_name, task->host_id.readableString(), host.readableString());
-        }
-        else
-        {
-            host_in_hostlist = true;
-            task->host_id = host;
-            task->host_id_str = host.toString();
-        }
-    }
-
-    if (!host_in_hostlist)
+    /// Stage 2: resolve host_id and check if we should execute query or not
+    /// Multiple clusters can use single DDL queue path in ZooKeeper,
+    /// So we should skip task if we cannot find current host in cluster hosts list.
+    if (!task->findCurrentHostID(context, log))
     {
         out_reason = "There is no a local address in host list";
         return {};
     }
 
+    try
+    {
+        /// Stage 3.1: parse query
+        task->parseQueryFromEntry(context);
+        /// Stage 3.2: check cluster and find the host in cluster
+        task->setClusterInfo(context, log);
+    }
+    catch (...)
+    {
+        out_reason = "Cannot parse query or obtain cluster info";
+        write_error_status(task->host_id_str, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
+        return {};
+    }
+
+    if (zookeeper->exists(task->getFinishedNodePath()))
+    {
+        out_reason = "Task has been already processed";
+        return {};
+    }
+
+    /// Now task is ready for execution
     return task;
 }
 
@@ -419,188 +310,83 @@ void DDLWorker::scheduleTasks()
     LOG_DEBUG(log, "Scheduling tasks");
     auto zookeeper = tryGetZooKeeper();
 
+    for (auto & task : current_tasks)
+    {
+        /// Main thread of DDLWorker was restarted, probably due to lost connection with ZooKeeper.
+        /// We have some unfinished tasks. To avoid duplication of some queries, try to write execution status.
+        bool task_still_exists = zookeeper->exists(task->entry_path);
+        bool status_written = zookeeper->exists(task->getFinishedNodePath());
+        if (task->was_executed && !status_written && task_still_exists)
+        {
+            processTask(*task, zookeeper);
+        }
+    }
+
     Strings queue_nodes = zookeeper->getChildren(queue_dir, nullptr, queue_updated_event);
     filterAndSortQueueNodes(queue_nodes);
     if (queue_nodes.empty())
+    {
+        LOG_TRACE(log, "No tasks to schedule");
         return;
+    }
+    else if (max_tasks_in_queue < queue_nodes.size())
+        cleanup_event->set();
 
-    bool server_startup = last_tasks.empty();
+    bool server_startup = current_tasks.empty();
+    auto begin_node = queue_nodes.begin();
 
-    auto begin_node = server_startup
-        ? queue_nodes.begin()
-        : std::upper_bound(queue_nodes.begin(), queue_nodes.end(), last_tasks.back());
+    if (!server_startup)
+    {
+        /// We will recheck status of last executed tasks. It's useful if main thread was just restarted.
+        auto & min_task = *std::min_element(current_tasks.begin(), current_tasks.end());
+        String min_entry_name = last_skipped_entry_name ? std::min(min_task->entry_name, *last_skipped_entry_name) : min_task->entry_name;
+        begin_node = std::upper_bound(queue_nodes.begin(), queue_nodes.end(), min_entry_name);
+        current_tasks.clear();
+    }
 
-    for (auto it = begin_node; it != queue_nodes.end(); ++it)
+    assert(current_tasks.empty());
+
+    for (auto it = begin_node; it != queue_nodes.end() && !stop_flag; ++it)
     {
         String entry_name = *it;
+        LOG_TRACE(log, "Checking task {}", entry_name);
 
         String reason;
         auto task = initAndCheckTask(entry_name, reason, zookeeper);
         if (!task)
         {
             LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason);
-            saveTask(entry_name);
+            updateMaxDDLEntryID(entry_name);
+            last_skipped_entry_name.emplace(entry_name);
             continue;
         }
 
-        bool already_processed = zookeeper->exists(fs::path(task->entry_path)  / "finished" / task->host_id_str);
-        if (!server_startup && !task->was_executed && already_processed)
-        {
-            throw Exception(ErrorCodes::LOGICAL_ERROR,
-                "Server expects that DDL task {} should be processed, but it was already processed according to ZK",
-                entry_name);
-        }
+        auto & saved_task = saveTask(std::move(task));
 
-        if (!already_processed)
+        if (worker_pool)
         {
-            worker_pool->scheduleOrThrowOnError([this, task_ptr = task.release()]()
+            worker_pool->scheduleOrThrowOnError([this, &saved_task, &zookeeper]()
             {
                 setThreadName("DDLWorkerExec");
-                enqueueTask(DDLTaskPtr(task_ptr));
+                processTask(saved_task, zookeeper);
             });
         }
         else
         {
-            LOG_DEBUG(log, "Task {} ({}) has been already processed", entry_name, task->entry.query);
+            processTask(saved_task, zookeeper);
         }
-
-        saveTask(entry_name);
-
-        if (stop_flag)
-            break;
     }
 }
 
-void DDLWorker::saveTask(const String & entry_name)
+DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task)
 {
-    if (last_tasks.size() == pool_size)
-    {
-        last_tasks.erase(last_tasks.begin());
-    }
-    last_tasks.emplace_back(entry_name);
+    current_tasks.remove_if([](const DDLTaskPtr & t) { return t->completely_processed.load(); });
+    assert(current_tasks.size() <= pool_size);
+    current_tasks.emplace_back(std::move(task));
+    return *current_tasks.back();
 }
 
-/// Parses query and resolves cluster and host in cluster
-void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
-{
-    {
-        const char * begin = task.entry.query.data();
-        const char * end = begin + task.entry.query.size();
-
-        ParserQuery parser_query(end);
-        String description;
-        task.query = parseQuery(parser_query, begin, end, description, 0, context.getSettingsRef().max_parser_depth);
-    }
-
-    // XXX: serious design flaw since `ASTQueryWithOnCluster` is not inherited from `IAST`!
-    if (!task.query || !(task.query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(task.query.get())))
-        throw Exception("Received unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);
-
-    task.cluster_name = task.query_on_cluster->cluster;
-    task.cluster = context.tryGetCluster(task.cluster_name);
-    if (!task.cluster)
-        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-            "DDL task {} contains current host {} in cluster {}, but there are no such cluster here.",
-            task.entry_name, task.host_id.readableString(), task.cluster_name);
-
-    /// Try to find host from task host list in cluster
-    /// At the first, try find exact match (host name and ports should be literally equal)
-    /// If the attempt fails, try find it resolving host name of each instance
-    const auto & shards = task.cluster->getShardsAddresses();
-
-    bool found_exact_match = false;
-    String default_database;
-    for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
-    {
-        for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
-        {
-            const Cluster::Address & address = shards[shard_num][replica_num];
-
-            if (address.host_name == task.host_id.host_name && address.port == task.host_id.port)
-            {
-                if (found_exact_match)
-                {
-                    if (default_database == address.default_database)
-                    {
-                        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-                            "There are two exactly the same ClickHouse instances {} in cluster {}",
-                            address.readableString(), task.cluster_name);
-                    }
-                    else
-                    {
-                        /* Circular replication is used.
-                         * It is when every physical node contains
-                         * replicas of different shards of the same table.
-                         * To distinguish one replica from another on the same node,
-                         * every shard is placed into separate database.
-                         * */
-                        task.is_circular_replicated = true;
-                        auto * query_with_table = dynamic_cast<ASTQueryWithTableAndOutput *>(task.query.get());
-                        if (!query_with_table || query_with_table->database.empty())
-                        {
-                            throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-                                "For a distributed DDL on circular replicated cluster its table name must be qualified by database name.");
-                        }
-                        if (default_database == query_with_table->database)
-                            return;
-                    }
-                }
-                found_exact_match = true;
-                task.host_shard_num = shard_num;
-                task.host_replica_num = replica_num;
-                task.address_in_cluster = address;
-                default_database = address.default_database;
-            }
-        }
-    }
-
-    if (found_exact_match)
-        return;
-
-    LOG_WARNING(log, "Not found the exact match of host {} from task {} in cluster {} definition. Will try to find it using host name resolving.", task.host_id.readableString(), task.entry_name, task.cluster_name);
-
-    bool found_via_resolving = false;
-    for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
-    {
-        for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
-        {
-            const Cluster::Address & address = shards[shard_num][replica_num];
-
-            if (auto resolved = address.getResolvedAddress();
-                resolved && (isLocalAddress(*resolved, context.getTCPPort())
-                    || (context.getTCPPortSecure() && isLocalAddress(*resolved, *context.getTCPPortSecure()))))
-            {
-                if (found_via_resolving)
-                {
-                    throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-                        "There are two the same ClickHouse instances in cluster {} : {} and {}",
-                        task.cluster_name, task.address_in_cluster.readableString(), address.readableString());
-                }
-                else
-                {
-                    found_via_resolving = true;
-                    task.host_shard_num = shard_num;
-                    task.host_replica_num = replica_num;
-                    task.address_in_cluster = address;
-                }
-            }
-        }
-    }
-
-    if (!found_via_resolving)
-    {
-        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-            "Not found host {} in definition of cluster {}",
-            task.host_id.readableString(), task.cluster_name);
-    }
-    else
-    {
-        LOG_INFO(log, "Resolved host {} from task {} as host {} in definition of cluster {}", task.host_id.readableString(), task.entry_name, task.address_in_cluster.readableString(), task.cluster_name);
-    }
-}
-
-
-bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, ExecutionStatus & status)
+bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
 {
     /// Add special comment at the start of query to easily identify DDL-produced queries in query_log
     String query_prefix = "/* ddl_entry=" + task.entry_name + " */ ";
@@ -609,116 +395,136 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
     ReadBufferFromString istr(query_to_execute);
     String dummy_string;
     WriteBufferFromString ostr(dummy_string);
+    std::optional<CurrentThread::QueryScope> query_scope;
 
     try
     {
-        auto current_context = std::make_unique<Context>(context);
-        current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
-        current_context->setCurrentQueryId(""); // generate random query_id
-        executeQuery(istr, ostr, false, *current_context, {});
+        auto query_context = task.makeQueryContext(context, zookeeper);
+        if (!task.is_initial_query)
+            query_scope.emplace(*query_context);
+        executeQuery(istr, ostr, !task.is_initial_query, *query_context, {});
+
+        if (auto txn = query_context->getZooKeeperMetadataTransaction())
+        {
+            /// Most queries commit changes to ZooKeeper right before applying local changes,
+            /// but some queries does not support it, so we have to do it here.
+            if (!txn->isExecuted())
+                txn->commit();
+        }
+    }
+    catch (const DB::Exception & e)
+    {
+        if (task.is_initial_query)
+            throw;
+
+        task.execution_status = ExecutionStatus::fromCurrentException();
+        tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
+
+        /// We use return value of tryExecuteQuery(...) in tryExecuteQueryOnLeaderReplica(...) to determine
+        /// if replica has stopped being leader and we should retry query.
+        /// However, for the majority of exceptions there is no sense to retry, because most likely we will just
+        /// get the same exception again. So we return false only for several special exception codes,
+        /// and consider query as executed with status "failed" and return true in other cases.
+        bool no_sense_to_retry = e.code() != ErrorCodes::KEEPER_EXCEPTION &&
+                                 e.code() != ErrorCodes::NOT_A_LEADER &&
+                                 e.code() != ErrorCodes::CANNOT_ASSIGN_ALTER &&
+                                 e.code() != ErrorCodes::CANNOT_ALLOCATE_MEMORY &&
+                                 e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED;
+        return no_sense_to_retry;
     }
     catch (...)
     {
-        status = ExecutionStatus::fromCurrentException();
+        if (task.is_initial_query)
+            throw;
+
+        task.execution_status = ExecutionStatus::fromCurrentException();
         tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
 
+        /// We don't know what exactly happened, but maybe it's Poco::NetException or std::bad_alloc,
+        /// so we consider unknown exception as retryable error.
         return false;
     }
 
-    status = ExecutionStatus(0);
+    task.execution_status = ExecutionStatus(0);
     LOG_DEBUG(log, "Executed query: {}", query);
 
     return true;
 }
 
-void DDLWorker::attachToThreadGroup()
+void DDLWorker::updateMaxDDLEntryID(const String & entry_name)
 {
-    if (thread_group)
+    UInt64 id = DDLTaskBase::getLogEntryNumber(entry_name);
+    auto prev_id = max_id.load(std::memory_order_relaxed);
+    while (prev_id < id)
     {
-        /// Put all threads to one thread pool
-        CurrentThread::attachToIfDetached(thread_group);
-    }
-    else
-    {
-        CurrentThread::initializeQuery();
-        thread_group = CurrentThread::getGroup();
-    }
-}
-
-
-void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
-{
-    auto & task = *task_ptr;
-
-    while (!stop_flag)
-    {
-        try
+        if (max_id.compare_exchange_weak(prev_id, id))
         {
-            processTask(task);
-            return;
-        }
-        catch (const Coordination::Exception & e)
-        {
-            if (Coordination::isHardwareError(e.code))
-            {
-                recoverZooKeeper();
-            }
-            else if (e.code == Coordination::Error::ZNONODE)
-            {
-                LOG_ERROR(log, "ZooKeeper error: {}", getCurrentExceptionMessage(true));
-                // TODO: retry?
-            }
-            else
-            {
-                LOG_ERROR(log, "Unexpected ZooKeeper error: {}.", getCurrentExceptionMessage(true));
-                return;
-            }
-        }
-        catch (...)
-        {
-            LOG_WARNING(log, "An error occurred while processing task {} ({}) : {}", task.entry_name, task.entry.query, getCurrentExceptionMessage(true));
+            if (max_entry_metric)
+                CurrentMetrics::set(*max_entry_metric, id);
+            break;
         }
     }
 }
-void DDLWorker::processTask(DDLTask & task)
-{
-    auto zookeeper = tryGetZooKeeper();
 
+void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
+{
     LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query);
 
-    String dummy;
-    String active_node_path = task.entry_path + "/active/" + task.host_id_str;
-    String finished_node_path = task.entry_path + "/finished/" + task.host_id_str;
+    String active_node_path = task.getActiveNodePath();
+    String finished_node_path = task.getFinishedNodePath();
 
-    auto code = zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy);
+    /// It will tryRemove(...) on exception
+    auto active_node = zkutil::EphemeralNodeHolder::existing(active_node_path, *zookeeper);
 
-    if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
+    /// Try fast path
+    auto create_active_res = zookeeper->tryCreate(active_node_path, {}, zkutil::CreateMode::Ephemeral);
+    if (create_active_res != Coordination::Error::ZOK)
     {
-        // Ok
+        if (create_active_res != Coordination::Error::ZNONODE && create_active_res != Coordination::Error::ZNODEEXISTS)
+        {
+            assert(Coordination::isHardwareError(create_active_res));
+            throw Coordination::Exception(create_active_res, active_node_path);
+        }
+
+        /// Status dirs were not created in enqueueQuery(...) or someone is removing entry
+        if (create_active_res == Coordination::Error::ZNONODE)
+            createStatusDirs(task.entry_path, zookeeper);
+
+        if (create_active_res == Coordination::Error::ZNODEEXISTS)
+        {
+            /// Connection has been lost and now we are retrying,
+            /// but our previous ephemeral node still exists.
+            zkutil::EventPtr eph_node_disappeared = std::make_shared<Poco::Event>();
+            String dummy;
+            if (zookeeper->tryGet(active_node_path, dummy, nullptr, eph_node_disappeared))
+            {
+                constexpr int timeout_ms = 5000;
+                if (!eph_node_disappeared->tryWait(timeout_ms))
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Ephemeral node {} still exists, "
+                                    "probably it's owned by someone else", active_node_path);
+            }
+        }
+
+        zookeeper->create(active_node_path, {}, zkutil::CreateMode::Ephemeral);
     }
-    else if (code == Coordination::Error::ZNONODE)
-    {
-        /// There is no parent
-        createStatusDirs(task.entry_path, zookeeper);
-        if (Coordination::Error::ZOK != zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy))
-            throw Coordination::Exception(code, active_node_path);
-    }
-    else
-        throw Coordination::Exception(code, active_node_path);
 
     if (!task.was_executed)
     {
+        /// If table and database engine supports it, they will execute task.ops by their own in a single transaction
+        /// with other zk operations (such as appending something to ReplicatedMergeTree log, or
+        /// updating metadata in Replicated database), so we make create request for finished_node_path with status "0",
+        /// which means that query executed successfully.
+        task.ops.emplace_back(zkutil::makeRemoveRequest(active_node_path, -1));
+        task.ops.emplace_back(zkutil::makeCreateRequest(finished_node_path, ExecutionStatus(0).serializeText(), zkutil::CreateMode::Persistent));
+
         try
         {
-            parseQueryAndResolveHost(task);
-
-            ASTPtr rewritten_ast = task.query_on_cluster->getRewrittenASTWithoutOnCluster(task.address_in_cluster.default_database);
-            String rewritten_query = queryToString(rewritten_ast);
+            String rewritten_query = queryToString(task.query);
             LOG_DEBUG(log, "Executing query: {}", rewritten_query);
 
-            if (auto * query_with_table = dynamic_cast<ASTQueryWithTableAndOutput *>(rewritten_ast.get()); query_with_table)
+            StoragePtr storage;
+            if (auto * query_with_table = dynamic_cast<ASTQueryWithTableAndOutput *>(task.query.get()); query_with_table)
             {
-                StoragePtr storage;
                 if (!query_with_table->table.empty())
                 {
                     /// It's not CREATE DATABASE
@@ -726,19 +532,18 @@ void DDLWorker::processTask(DDLTask & task)
                     storage = DatabaseCatalog::instance().tryGetTable(table_id, context);
                 }
 
-                if (storage && taskShouldBeExecutedOnLeader(rewritten_ast, storage)  && !task.is_circular_replicated)
-                {
-                    tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper);
-                }
-                else
-                {
-                    /// StoragePtr may cause DROP TABLE to hang
-                    storage.reset();
-                    tryExecuteQuery(rewritten_query, task, task.execution_status);
-                }
+                task.execute_on_leader = storage && taskShouldBeExecutedOnLeader(task.query, storage) && !task.is_circular_replicated;
+            }
+
+            if (task.execute_on_leader)
+            {
+                tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper);
             }
             else
-                tryExecuteQuery(rewritten_query, task, task.execution_status);
+            {
+                storage.reset();
+                tryExecuteQuery(rewritten_query, task, zookeeper);
+            }
         }
         catch (const Coordination::Exception &)
         {
@@ -746,41 +551,52 @@ void DDLWorker::processTask(DDLTask & task)
         }
         catch (...)
         {
+            if (task.is_initial_query)
+                throw;
             tryLogCurrentException(log, "An error occurred before execution of DDL task: ");
             task.execution_status = ExecutionStatus::fromCurrentException("An error occurred before execution");
         }
 
+        if (task.execution_status.code != 0)
+        {
+            bool status_written_by_table_or_db = task.ops.empty();
+            if (status_written_by_table_or_db)
+            {
+                throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.serializeText());
+            }
+            else
+            {
+                /// task.ops where not executed by table or database engine, so DDLWorker is responsible for
+                /// writing query execution status into ZooKeeper.
+                task.ops.emplace_back(zkutil::makeSetRequest(finished_node_path, task.execution_status.serializeText(), -1));
+            }
+        }
+
         /// We need to distinguish ZK errors occurred before and after query executing
         task.was_executed = true;
     }
 
-    {
-        DB::ReadBufferFromString in(task.entry_name);
-        DB::assertString("query-", in);
-        UInt64 id;
-        readText(id, in);
-        auto prev_id = max_id.load(std::memory_order_relaxed);
-        while (prev_id < id)
-        {
-            if (max_id.compare_exchange_weak(prev_id, id))
-            {
-                CurrentMetrics::set(CurrentMetrics::MaxDDLEntryID, id);
-                break;
-            }
-        }
-    }
+    updateMaxDDLEntryID(task.entry_name);
 
     /// FIXME: if server fails right here, the task will be executed twice. We need WAL here.
+    /// NOTE: If ZooKeeper connection is lost here, we will try again to write query status.
+    /// NOTE: If both table and database are replicated, task is executed in single ZK transaction.
 
-    /// Delete active flag and create finish flag
-    Coordination::Requests ops;
-    ops.emplace_back(zkutil::makeRemoveRequest(active_node_path, -1));
-    ops.emplace_back(zkutil::makeCreateRequest(finished_node_path, task.execution_status.serializeText(), zkutil::CreateMode::Persistent));
-    zookeeper->multi(ops);
+    bool status_written = task.ops.empty();
+    if (!status_written)
+    {
+        zookeeper->multi(task.ops);
+        task.ops.clear();
+    }
+
+    /// Active node was removed in multi ops
+    active_node->setAlreadyRemoved();
+
+    task.completely_processed = true;
 }
 
 
-bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, const StoragePtr storage)
+bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr & ast_ddl, const StoragePtr storage)
 {
     /// Pure DROP queries have to be executed on each node separately
     if (auto * query = ast_ddl->as<ASTDropQuery>(); query && query->kind != ASTDropQuery::Kind::Truncate)
@@ -794,47 +610,36 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, const Storage
         // Setting alters should be executed on all replicas
         if (alter->isSettingsAlter())
             return false;
+
+        if (alter->isFreezeAlter())
+            return false;
     }
 
     return storage->supportsReplication();
 }
 
 bool DDLWorker::tryExecuteQueryOnLeaderReplica(
-    DDLTask & task,
+    DDLTaskBase & task,
     StoragePtr storage,
     const String & rewritten_query,
-    const String & node_path,
+    const String & /*node_path*/,
     const ZooKeeperPtr & zookeeper)
 {
     StorageReplicatedMergeTree * replicated_storage = dynamic_cast<StorageReplicatedMergeTree *>(storage.get());
 
     /// If we will develop new replicated storage
     if (!replicated_storage)
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Storage type '{}' is not supported by distributed DDL", storage->getName());
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage type '{}' is not supported by distributed DDL", storage->getName());
 
-    /// Generate unique name for shard node, it will be used to execute the query by only single host
-    /// Shard node name has format 'replica_name1,replica_name2,...,replica_nameN'
-    /// Where replica_name is 'replica_config_host_name:replica_port'
-    auto get_shard_name = [] (const Cluster::Addresses & shard_addresses)
-    {
-        Strings replica_names;
-        for (const Cluster::Address & address : shard_addresses)
-            replica_names.emplace_back(address.readableString());
-        std::sort(replica_names.begin(), replica_names.end());
-
-        String res;
-        for (auto it = replica_names.begin(); it != replica_names.end(); ++it)
-            res += *it + (std::next(it) != replica_names.end() ? "," : "");
-
-        return res;
-    };
-
-    String shard_node_name = get_shard_name(task.cluster->getShardsAddresses().at(task.host_shard_num));
-    String shard_path = fs::path(node_path) / "shards" / shard_node_name;
+    String shard_path = task.getShardNodePath();
     String is_executed_path = fs::path(shard_path) / "executed";
     String tries_to_execute_path = fs::path(shard_path) / "tries_to_execute";
     zookeeper->createAncestors(fs::path(shard_path) / ""); /* appends "/" at the end of shard_path */
 
+    /// Leader replica creates is_executed_path node on successful query execution.
+    /// We will remove create_shard_flag from zk operations list, if current replica is just waiting for leader to execute the query.
+    auto create_shard_flag = zkutil::makeCreateRequest(is_executed_path, task.host_id_str, zkutil::CreateMode::Persistent);
+
     /// Node exists, or we will create or we will get an exception
     zookeeper->tryCreate(tries_to_execute_path, "0", zkutil::CreateMode::Persistent);
 
@@ -858,14 +663,30 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
 
     Stopwatch stopwatch;
 
-    bool executed_by_leader = false;
+    bool executed_by_us = false;
+    bool executed_by_other_leader = false;
+
     /// Defensive programming. One hour is more than enough to execute almost all DDL queries.
     /// If it will be very long query like ALTER DELETE for a huge table it's still will be executed,
     /// but DDL worker can continue processing other queries.
     while (stopwatch.elapsedSeconds() <= MAX_EXECUTION_TIMEOUT_SEC)
     {
         StorageReplicatedMergeTree::Status status;
-        replicated_storage->getStatus(status);
+        // Has to get with zk fields to get active replicas field
+        replicated_storage->getStatus(status, true);
+
+        // Should return as soon as possible if the table is dropped.
+        bool replica_dropped = replicated_storage->is_dropped;
+        bool all_replicas_likely_detached = status.active_replicas == 0 && !DatabaseCatalog::instance().isTableExist(replicated_storage->getStorageID(), context);
+        if (replica_dropped || all_replicas_likely_detached)
+        {
+            LOG_WARNING(log, ", task {} will not be executed.", task.entry_name);
+            task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, table is dropped or detached permanently");
+            return false;
+        }
+
+        if (task.is_initial_query && !status.is_leader)
+            throw Exception(ErrorCodes::NOT_A_LEADER, "Cannot execute initial query on non-leader replica");
 
         /// Any replica which is leader tries to take lock
         if (status.is_leader && lock->tryLock())
@@ -876,7 +697,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
             if (zookeeper->tryGet(is_executed_path, executed_by))
             {
                 LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, executed_by);
-                executed_by_leader = true;
+                executed_by_other_leader = true;
                 break;
             }
 
@@ -887,12 +708,14 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
 
             zookeeper->set(tries_to_execute_path, toString(counter + 1));
 
+            task.ops.push_back(create_shard_flag);
+            SCOPE_EXIT({ if (!executed_by_us && !task.ops.empty()) task.ops.pop_back(); });
+
             /// If the leader will unexpectedly changed this method will return false
             /// and on the next iteration new leader will take lock
-            if (tryExecuteQuery(rewritten_query, task, task.execution_status))
+            if (tryExecuteQuery(rewritten_query, task, zookeeper))
             {
-                zookeeper->create(is_executed_path, task.host_id_str, zkutil::CreateMode::Persistent);
-                executed_by_leader = true;
+                executed_by_us = true;
                 break;
             }
 
@@ -903,7 +726,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         if (event->tryWait(std::uniform_int_distribution<int>(0, 1000)(rng)))
         {
             LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path));
-            executed_by_leader = true;
+            executed_by_other_leader = true;
             break;
         }
         else
@@ -924,8 +747,10 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         }
     }
 
+    assert(!(executed_by_us && executed_by_other_leader));
+
     /// Not executed by leader so was not executed at all
-    if (!executed_by_leader)
+    if (!executed_by_us && !executed_by_other_leader)
     {
         /// If we failed with timeout
         if (stopwatch.elapsedSeconds() >= MAX_EXECUTION_TIMEOUT_SEC)
@@ -941,21 +766,22 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         return false;
     }
 
-    LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path));
+    if (executed_by_us)
+        LOG_DEBUG(log, "Task {} executed by current replica", task.entry_name);
+    else // if (executed_by_other_leader)
+        LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path));
+
     return true;
 }
 
 
-void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper)
+void DDLWorker::cleanupQueue(Int64, const ZooKeeperPtr & zookeeper)
 {
     LOG_DEBUG(log, "Cleaning queue");
 
     Strings queue_nodes = zookeeper->getChildren(queue_dir);
     filterAndSortQueueNodes(queue_nodes);
 
-    size_t num_outdated_nodes = (queue_nodes.size() > max_tasks_in_queue) ? queue_nodes.size() - max_tasks_in_queue : 0;
-    auto first_non_outdated_node = queue_nodes.begin() + num_outdated_nodes;
-
     for (auto it = queue_nodes.cbegin(); it < queue_nodes.cend(); ++it)
     {
         if (stop_flag)
@@ -963,7 +789,6 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo
 
         String node_name = *it;
         String node_path = fs::path(queue_dir) / node_name;
-        String lock_path = fs::path(node_path) / "lock";
 
         Coordination::Stat stat;
         String dummy;
@@ -974,53 +799,51 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo
             if (!zookeeper->exists(node_path, &stat))
                 continue;
 
-            /// Delete node if its lifetime is expired (according to task_max_lifetime parameter)
-            constexpr UInt64 zookeeper_time_resolution = 1000;
-            Int64 zookeeper_time_seconds = stat.ctime / zookeeper_time_resolution;
-            bool node_lifetime_is_expired = zookeeper_time_seconds + task_max_lifetime < current_time_seconds;
-
-            /// If too many nodes in task queue (> max_tasks_in_queue), delete oldest one
-            bool node_is_outside_max_window = it < first_non_outdated_node;
-
-            if (!node_lifetime_is_expired && !node_is_outside_max_window)
+            if (!canRemoveQueueEntry(node_name, stat))
                 continue;
 
-            /// Skip if there are active nodes (it is weak guard)
-            if (zookeeper->exists(fs::path(node_path) / "active", &stat) && stat.numChildren > 0)
+            /// At first we remove entry/active node to prevent staled hosts from executing entry concurrently
+            auto rm_active_res = zookeeper->tryRemove(fs::path(node_path) / "active");
+            if (rm_active_res != Coordination::Error::ZOK && rm_active_res != Coordination::Error::ZNONODE)
             {
-                LOG_INFO(log, "Task {} should be deleted, but there are active workers. Skipping it.", node_name);
+                if (rm_active_res == Coordination::Error::ZNOTEMPTY)
+                    LOG_DEBUG(log, "Task {} should be deleted, but there are active workers. Skipping it.", node_name);
+                else
+                    LOG_WARNING(log, "Unexpected status code {} on attempt to remove {}/active", rm_active_res, node_name);
                 continue;
             }
 
-            /// Usage of the lock is not necessary now (tryRemoveRecursive correctly removes node in a presence of concurrent cleaners)
-            /// But the lock will be required to implement system.distributed_ddl_queue table
-            auto lock = createSimpleZooKeeperLock(zookeeper, node_path, "lock", host_fqdn_id);
-            if (!lock->tryLock())
+            /// Now we can safely delete entry
+            LOG_INFO(log, "Task {} is outdated, deleting it", node_name);
+
+            /// We recursively delete all nodes except node_path/finished to prevent staled hosts from
+            /// creating node_path/active node (see createStatusDirs(...))
+            zookeeper->tryRemoveChildrenRecursive(node_path, "finished");
+
+            /// And then we remove node_path and node_path/finished in a single transaction
+            Coordination::Requests ops;
+            Coordination::Responses res;
+            ops.emplace_back(zkutil::makeCheckRequest(node_path, -1));  /// See a comment below
+            ops.emplace_back(zkutil::makeRemoveRequest(fs::path(node_path) / "finished", -1));
+            ops.emplace_back(zkutil::makeRemoveRequest(node_path, -1));
+            auto rm_entry_res = zookeeper->tryMulti(ops, res);
+
+            if (rm_entry_res == Coordination::Error::ZNONODE)
             {
-                LOG_INFO(log, "Task {} should be deleted, but it is locked. Skipping it.", node_name);
+                /// Most likely both node_path/finished and node_path were removed concurrently.
+                bool entry_removed_concurrently = res[0]->error == Coordination::Error::ZNONODE;
+                if (entry_removed_concurrently)
+                    continue;
+
+                /// Possible rare case: initiator node has lost connection after enqueueing entry and failed to create status dirs.
+                /// No one has started to process the entry, so node_path/active and node_path/finished nodes were never created, node_path has no children.
+                /// Entry became outdated, but we cannot remove remove it in a transaction with node_path/finished.
+                assert(res[0]->error == Coordination::Error::ZOK && res[1]->error == Coordination::Error::ZNONODE);
+                rm_entry_res = zookeeper->tryRemove(node_path);
+                assert(rm_entry_res != Coordination::Error::ZNOTEMPTY);
                 continue;
             }
-
-            if (node_lifetime_is_expired)
-                LOG_INFO(log, "Lifetime of task {} is expired, deleting it", node_name);
-            else if (node_is_outside_max_window)
-                LOG_INFO(log, "Task {} is outdated, deleting it", node_name);
-
-            /// Deleting
-            {
-                Strings children = zookeeper->getChildren(node_path);
-                for (const String & child : children)
-                {
-                    if (child != "lock")
-                        zookeeper->tryRemoveRecursive(fs::path(node_path) / child);
-                }
-
-                /// Remove the lock node and its parent atomically
-                Coordination::Requests ops;
-                ops.emplace_back(zkutil::makeRemoveRequest(lock_path, -1));
-                ops.emplace_back(zkutil::makeRemoveRequest(node_path, -1));
-                zookeeper->multi(ops);
-            }
+            zkutil::KeeperMultiException::check(rm_entry_res, ops, res);
         }
         catch (...)
         {
@@ -1029,26 +852,53 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo
     }
 }
 
+bool DDLWorker::canRemoveQueueEntry(const String & entry_name, const Coordination::Stat & stat)
+{
+    /// Delete node if its lifetime is expired (according to task_max_lifetime parameter)
+    constexpr UInt64 zookeeper_time_resolution = 1000;
+    Int64 zookeeper_time_seconds = stat.ctime / zookeeper_time_resolution;
+    bool node_lifetime_is_expired = zookeeper_time_seconds + task_max_lifetime < Poco::Timestamp().epochTime();
+
+    /// If too many nodes in task queue (> max_tasks_in_queue), delete oldest one
+    UInt32 entry_number = DDLTaskBase::getLogEntryNumber(entry_name);
+    bool node_is_outside_max_window = entry_number + max_tasks_in_queue < max_id.load(std::memory_order_relaxed);
+
+    return node_lifetime_is_expired || node_is_outside_max_window;
+}
 
 /// Try to create nonexisting "status" dirs for a node
 void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper)
 {
     Coordination::Requests ops;
-    {
-        Coordination::CreateRequest request;
-        request.path = fs::path(node_path) / "active";
-        ops.emplace_back(std::make_shared<Coordination::CreateRequest>(std::move(request)));
-    }
-    {
-        Coordination::CreateRequest request;
-        request.path = fs::path(node_path) / "finished";
-        ops.emplace_back(std::make_shared<Coordination::CreateRequest>(std::move(request)));
-    }
+    ops.emplace_back(zkutil::makeCreateRequest(fs::path(node_path) / "active", {}, zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(fs::path(node_path) / "finished", {}, zkutil::CreateMode::Persistent));
+
     Coordination::Responses responses;
     Coordination::Error code = zookeeper->tryMulti(ops, responses);
-    if (code != Coordination::Error::ZOK
-        && code != Coordination::Error::ZNODEEXISTS)
-        throw Coordination::Exception(code);
+
+    bool both_created = code == Coordination::Error::ZOK;
+
+    /// Failed on attempt to create node_path/active because it exists, so node_path/finished must exist too
+    bool both_already_exists = responses.size() == 2 && responses[0]->error == Coordination::Error::ZNODEEXISTS
+                                                     && responses[1]->error == Coordination::Error::ZRUNTIMEINCONSISTENCY;
+    assert(!both_already_exists || (zookeeper->exists(fs::path(node_path) / "active") && zookeeper->exists(fs::path(node_path) / "finished")));
+
+    /// Failed on attempt to create node_path/finished, but node_path/active does not exist
+    bool is_currently_deleting = responses.size() == 2 && responses[0]->error == Coordination::Error::ZOK
+                                                       && responses[1]->error == Coordination::Error::ZNODEEXISTS;
+    if (both_created || both_already_exists)
+        return;
+
+    if (is_currently_deleting)
+    {
+        cleanup_event->set();
+        throw Exception(ErrorCodes::UNFINISHED, "Cannot create status dirs for {}, "
+                        "most likely because someone is deleting it concurrently", node_path);
+    }
+
+    /// Connection lost or entry was removed
+    assert(Coordination::isHardwareError(code) || code == Coordination::Error::ZNONODE);
+    zkutil::KeeperMultiException::check(code, ops, responses);
 }
 
 
@@ -1064,7 +914,9 @@ String DDLWorker::enqueueQuery(DDLLogEntry & entry)
 
     String node_path = zookeeper->create(query_path_prefix, entry.toString(), zkutil::CreateMode::PersistentSequential);
 
-    /// Optional step
+    /// We cannot create status dirs in a single transaction with previous request,
+    /// because we don't know node_path until previous request is executed.
+    /// Se we try to create status dirs here or later when we will execute entry.
     try
     {
         createStatusDirs(node_path, zookeeper);
@@ -1078,86 +930,97 @@ String DDLWorker::enqueueQuery(DDLLogEntry & entry)
 }
 
 
-void DDLWorker::runMainThread()
+void DDLWorker::initializeMainThread()
 {
-    auto reset_state = [&](bool reset_pool = true)
-    {
-        /// It will wait for all threads in pool to finish and will not rethrow exceptions (if any).
-        /// We create new thread pool to forget previous exceptions.
-        if (reset_pool)
-            worker_pool = std::make_unique<ThreadPool>(pool_size);
-        /// Clear other in-memory state, like server just started.
-        last_tasks.clear();
-        max_id = 0;
-    };
-
+    assert(!initialized);
     setThreadName("DDLWorker");
     LOG_DEBUG(log, "Started DDLWorker thread");
 
-    bool initialized = false;
-    do
+    while (!stop_flag)
     {
         try
         {
             auto zookeeper = getAndSetZooKeeper();
             zookeeper->createAncestors(fs::path(queue_dir) / "");
             initialized = true;
+            return;
         }
         catch (const Coordination::Exception & e)
         {
             if (!Coordination::isHardwareError(e.code))
             {
                 /// A logical error.
-                LOG_ERROR(log, "ZooKeeper error: {}. Failed to start DDLWorker.",getCurrentExceptionMessage(true));
-                reset_state(false);
+                LOG_ERROR(log, "ZooKeeper error: {}. Failed to start DDLWorker.", getCurrentExceptionMessage(true));
                 assert(false);  /// Catch such failures in tests with debug build
             }
 
             tryLogCurrentException(__PRETTY_FUNCTION__);
-
-            /// Avoid busy loop when ZooKeeper is not available.
-            sleepForSeconds(1);
         }
         catch (...)
         {
             tryLogCurrentException(log, "Cannot initialize DDL queue.");
-            reset_state(false);
         }
+
+        /// Avoid busy loop when ZooKeeper is not available.
+        sleepForSeconds(5);
     }
-    while (!initialized && !stop_flag);
+}
+
+void DDLWorker::runMainThread()
+{
+    auto reset_state = [&]()
+    {
+        initialized = false;
+        /// It will wait for all threads in pool to finish and will not rethrow exceptions (if any).
+        /// We create new thread pool to forget previous exceptions.
+        if (1 < pool_size)
+            worker_pool = std::make_unique<ThreadPool>(pool_size);
+        /// Clear other in-memory state, like server just started.
+        current_tasks.clear();
+        last_skipped_entry_name.reset();
+        max_id = 0;
+        LOG_INFO(log, "Cleaned DDLWorker state");
+    };
+
+    setThreadName("DDLWorker");
+    LOG_DEBUG(log, "Starting DDLWorker thread");
 
     while (!stop_flag)
     {
         try
         {
-            attachToThreadGroup();
+            /// Reinitialize DDLWorker state (including ZooKeeper connection) if required
+            if (!initialized)
+            {
+                initializeMainThread();
+                LOG_DEBUG(log, "Initialized DDLWorker thread");
+            }
 
             cleanup_event->set();
             scheduleTasks();
 
-            LOG_DEBUG(log, "Waiting a watch");
+            LOG_DEBUG(log, "Waiting for queue updates");
             queue_updated_event->wait();
         }
         catch (const Coordination::Exception & e)
         {
             if (Coordination::isHardwareError(e.code))
             {
-                recoverZooKeeper();
-            }
-            else if (e.code == Coordination::Error::ZNONODE)
-            {
-                LOG_ERROR(log, "ZooKeeper error: {}", getCurrentExceptionMessage(true));
+                initialized = false;
+                LOG_INFO(log, "Lost ZooKeeper connection, will try to connect again: {}", getCurrentExceptionMessage(true));
             }
             else
             {
-                LOG_ERROR(log, "Unexpected ZooKeeper error: {}", getCurrentExceptionMessage(true));
+                LOG_ERROR(log, "Unexpected ZooKeeper error, will try to restart main thread: {}", getCurrentExceptionMessage(true));
                 reset_state();
             }
+            sleepForSeconds(1);
         }
         catch (...)
         {
-            tryLogCurrentException(log, "Unexpected error:");
+            tryLogCurrentException(log, "Unexpected error, will try to restart main thread:");
             reset_state();
+            sleepForSeconds(5);
         }
     }
 }
@@ -1184,6 +1047,7 @@ void DDLWorker::runCleanupThread()
                 continue;
             }
 
+            /// ZooKeeper connection is recovered by main thread. We will wait for it on cleanup_event.
             auto zookeeper = tryGetZooKeeper();
             if (zookeeper->expired())
                 continue;
@@ -1198,313 +1062,4 @@ void DDLWorker::runCleanupThread()
     }
 }
 
-
-class DDLQueryStatusInputStream : public IBlockInputStream
-{
-public:
-
-    DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context_)
-        : node_path(zk_node_path), context(context_), watch(CLOCK_MONOTONIC_COARSE), log(&Poco::Logger::get("DDLQueryStatusInputStream"))
-    {
-        sample = Block{
-            {std::make_shared<DataTypeString>(),    "host"},
-            {std::make_shared<DataTypeUInt16>(),    "port"},
-            {std::make_shared<DataTypeInt64>(),     "status"},
-            {std::make_shared<DataTypeString>(),    "error"},
-            {std::make_shared<DataTypeUInt64>(),    "num_hosts_remaining"},
-            {std::make_shared<DataTypeUInt64>(),    "num_hosts_active"},
-        };
-
-        for (const HostID & host: entry.hosts)
-            waiting_hosts.emplace(host.toString());
-
-        addTotalRowsApprox(entry.hosts.size());
-
-        timeout_seconds = context.getSettingsRef().distributed_ddl_task_timeout;
-    }
-
-    String getName() const override
-    {
-        return "DDLQueryStatusInputStream";
-    }
-
-    Block getHeader() const override { return sample; }
-
-    Block readImpl() override
-    {
-        Block res;
-        if (num_hosts_finished >= waiting_hosts.size())
-        {
-            if (first_exception)
-                throw Exception(*first_exception);
-
-            return res;
-        }
-
-        auto zookeeper = context.getZooKeeper();
-        size_t try_number = 0;
-
-        while (res.rows() == 0)
-        {
-            if (isCancelled())
-            {
-                if (first_exception)
-                    throw Exception(*first_exception);
-
-                return res;
-            }
-
-            if (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds)
-            {
-                size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished;
-                size_t num_active_hosts = current_active_hosts.size();
-
-                throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
-                    "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. "
-                    "There are {} unfinished hosts ({} of them are currently active), they are going to execute the query in background",
-                    node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
-            }
-
-            if (num_hosts_finished != 0 || try_number != 0)
-            {
-                sleepForMilliseconds(std::min<size_t>(1000, 50 * (try_number + 1)));
-            }
-
-            /// TODO: add shared lock
-            if (!zookeeper->exists(node_path))
-            {
-                throw Exception(ErrorCodes::UNFINISHED,
-                    "Cannot provide query execution status. The query's node {} has been deleted by the cleaner since it was finished (or its lifetime is expired)",
-                    node_path);
-            }
-
-            Strings new_hosts = getNewAndUpdate(getChildrenAllowNoNode(zookeeper, fs::path(node_path) / "finished"));
-            ++try_number;
-            if (new_hosts.empty())
-                continue;
-
-            current_active_hosts = getChildrenAllowNoNode(zookeeper, fs::path(node_path) / "active");
-
-            MutableColumns columns = sample.cloneEmptyColumns();
-            for (const String & host_id : new_hosts)
-            {
-                ExecutionStatus status(-1, "Cannot obtain error message");
-                {
-                    String status_data;
-                    if (zookeeper->tryGet(fs::path(node_path) / "finished" / host_id, status_data))
-                        status.tryDeserializeText(status_data);
-                }
-
-                auto [host, port] = Cluster::Address::fromString(host_id);
-
-                if (status.code != 0 && first_exception == nullptr)
-                    first_exception = std::make_unique<Exception>(status.code, "There was an error on [{}:{}]: {}", host, port, status.message);
-
-                ++num_hosts_finished;
-
-                columns[0]->insert(host);
-                columns[1]->insert(port);
-                columns[2]->insert(status.code);
-                columns[3]->insert(status.message);
-                columns[4]->insert(waiting_hosts.size() - num_hosts_finished);
-                columns[5]->insert(current_active_hosts.size());
-            }
-            res = sample.cloneWithColumns(std::move(columns));
-        }
-
-        return res;
-    }
-
-    Block getSampleBlock() const
-    {
-        return sample.cloneEmpty();
-    }
-
-    ~DDLQueryStatusInputStream() override = default;
-
-private:
-
-    static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
-    {
-        Strings res;
-        Coordination::Error code = zookeeper->tryGetChildren(node_path, res);
-        if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE)
-            throw Coordination::Exception(code, node_path);
-        return res;
-    }
-
-    Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts)
-    {
-        Strings diff;
-        for (const String & host : current_list_of_finished_hosts)
-        {
-            if (!waiting_hosts.count(host))
-            {
-                if (!ignoring_hosts.count(host))
-                {
-                    ignoring_hosts.emplace(host);
-                    LOG_INFO(log, "Unexpected host {} appeared  in task {}", host, node_path);
-                }
-                continue;
-            }
-
-            if (!finished_hosts.count(host))
-            {
-                diff.emplace_back(host);
-                finished_hosts.emplace(host);
-            }
-        }
-
-        return diff;
-    }
-
-    String node_path;
-    const Context & context;
-    Stopwatch watch;
-    Poco::Logger * log;
-
-    Block sample;
-
-    NameSet waiting_hosts;  /// hosts from task host list
-    NameSet finished_hosts; /// finished hosts from host list
-    NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
-    Strings current_active_hosts; /// Hosts that were in active state at the last check
-    size_t num_hosts_finished = 0;
-
-    /// Save the first detected error and throw it at the end of execution
-    std::unique_ptr<Exception> first_exception;
-
-    Int64 timeout_seconds = 120;
-};
-
-
-BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option)
-{
-    /// Remove FORMAT <fmt> and INTO OUTFILE <file> if exists
-    ASTPtr query_ptr = query_ptr_->clone();
-    ASTQueryWithOutput::resetOutputASTIfExist(*query_ptr);
-
-    // XXX: serious design flaw since `ASTQueryWithOnCluster` is not inherited from `IAST`!
-    auto * query = dynamic_cast<ASTQueryWithOnCluster *>(query_ptr.get());
-    if (!query)
-    {
-        throw Exception("Distributed execution is not supported for such DDL queries", ErrorCodes::NOT_IMPLEMENTED);
-    }
-
-    if (!context.getSettingsRef().allow_distributed_ddl)
-        throw Exception("Distributed DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED);
-
-    if (const auto * query_alter = query_ptr->as<ASTAlterQuery>())
-    {
-        for (const auto & command : query_alter->command_list->children)
-        {
-            if (!isSupportedAlterType(command->as<ASTAlterCommand&>().type))
-                throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
-        }
-    }
-
-    query->cluster = context.getMacros()->expand(query->cluster);
-    ClusterPtr cluster = context.getCluster(query->cluster);
-    DDLWorker & ddl_worker = context.getDDLWorker();
-
-    /// Enumerate hosts which will be used to send query.
-    Cluster::AddressesWithFailover shards = cluster->getShardsAddresses();
-    std::vector<HostID> hosts;
-    for (const auto & shard : shards)
-    {
-        for (const auto & addr : shard)
-            hosts.emplace_back(addr);
-    }
-
-    if (hosts.empty())
-        throw Exception("No hosts defined to execute distributed DDL query", ErrorCodes::LOGICAL_ERROR);
-
-    /// The current database in a distributed query need to be replaced with either
-    /// the local current database or a shard's default database.
-    bool need_replace_current_database
-        = (std::find_if(
-               query_requires_access.begin(),
-               query_requires_access.end(),
-               [](const AccessRightsElement & elem) { return elem.isEmptyDatabase(); })
-           != query_requires_access.end());
-
-    bool use_local_default_database = false;
-    const String & current_database = context.getCurrentDatabase();
-
-    if (need_replace_current_database)
-    {
-        Strings shard_default_databases;
-        for (const auto & shard : shards)
-        {
-            for (const auto & addr : shard)
-            {
-                if (!addr.default_database.empty())
-                    shard_default_databases.push_back(addr.default_database);
-                else
-                    use_local_default_database = true;
-            }
-        }
-        std::sort(shard_default_databases.begin(), shard_default_databases.end());
-        shard_default_databases.erase(std::unique(shard_default_databases.begin(), shard_default_databases.end()), shard_default_databases.end());
-        assert(use_local_default_database || !shard_default_databases.empty());
-
-        if (use_local_default_database && !shard_default_databases.empty())
-            throw Exception("Mixed local default DB and shard default DB in DDL query", ErrorCodes::NOT_IMPLEMENTED);
-
-        if (use_local_default_database)
-        {
-            query_requires_access.replaceEmptyDatabase(current_database);
-        }
-        else
-        {
-            for (size_t i = 0; i != query_requires_access.size();)
-            {
-                auto & element = query_requires_access[i];
-                if (element.isEmptyDatabase())
-                {
-                    query_requires_access.insert(query_requires_access.begin() + i + 1, shard_default_databases.size() - 1, element);
-                    for (size_t j = 0; j != shard_default_databases.size(); ++j)
-                        query_requires_access[i + j].replaceEmptyDatabase(shard_default_databases[j]);
-                    i += shard_default_databases.size();
-                }
-                else
-                    ++i;
-            }
-        }
-    }
-
-    AddDefaultDatabaseVisitor visitor(current_database, !use_local_default_database);
-    visitor.visitDDL(query_ptr);
-
-    /// Check access rights, assume that all servers have the same users config
-    if (query_requires_grant_option)
-        context.getAccess()->checkGrantOption(query_requires_access);
-    else
-        context.checkAccess(query_requires_access);
-
-    DDLLogEntry entry;
-    entry.hosts = std::move(hosts);
-    entry.query = queryToString(query_ptr);
-    entry.initiator = ddl_worker.getCommonHostID();
-    String node_path = ddl_worker.enqueueQuery(entry);
-
-    BlockIO io;
-    if (context.getSettingsRef().distributed_ddl_task_timeout == 0)
-        return io;
-
-    auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, context);
-    io.in = std::move(stream);
-    return io;
-}
-
-BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option)
-{
-    return executeDDLQueryOnCluster(query_ptr, context, AccessRightsElements{query_requires_access}, query_requires_grant_option);
-}
-
-BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context)
-{
-    return executeDDLQueryOnCluster(query_ptr_, context, {});
-}
-
 }
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 9a4e55dcfc4..8b0a8f038a0 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -1,15 +1,11 @@
 #pragma once
 
-#include <DataStreams/BlockIO.h>
-#include <Interpreters/Cluster.h>
-#include <Interpreters/Context.h>
-#include <Storages/IStorage_fwd.h>
-#include <Poco/Net/NetException.h>
 #include <Common/CurrentThread.h>
 #include <Common/DNSResolver.h>
 #include <Common/ThreadPool.h>
-#include <Common/isLocalAddress.h>
-#include <common/logger_useful.h>
+#include <Storages/IStorage_fwd.h>
+#include <Parsers/IAST_fwd.h>
+#include <Interpreters/Context.h>
 
 #include <atomic>
 #include <chrono>
@@ -19,90 +15,39 @@
 
 namespace zkutil
 {
-class ZooKeeper;
+    class ZooKeeper;
+}
+
+namespace Poco
+{
+    class Logger;
+    namespace Util { class AbstractConfiguration; }
+}
+
+namespace Coordination
+{
+    struct Stat;
 }
 
 namespace DB
 {
-class Context;
 class ASTAlterQuery;
+struct DDLLogEntry;
+struct DDLTaskBase;
+using DDLTaskPtr = std::unique_ptr<DDLTaskBase>;
+using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 class AccessRightsElements;
 
-struct HostID
-{
-    String host_name;
-    UInt16 port;
-
-    HostID() = default;
-
-    explicit HostID(const Cluster::Address & address) : host_name(address.host_name), port(address.port) { }
-
-    static HostID fromString(const String & host_port_str)
-    {
-        HostID res;
-        std::tie(res.host_name, res.port) = Cluster::Address::fromString(host_port_str);
-        return res;
-    }
-
-    String toString() const { return Cluster::Address::toString(host_name, port); }
-
-    String readableString() const { return host_name + ":" + DB::toString(port); }
-
-    bool isLocalAddress(UInt16 clickhouse_port) const
-    {
-        try
-        {
-            return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port);
-        }
-        catch (const Poco::Net::NetException &)
-        {
-            /// Avoid "Host not found" exceptions
-            return false;
-        }
-    }
-
-    static String applyToString(const HostID & host_id) { return host_id.toString(); }
-};
-
-struct DDLLogEntry
-{
-    String query;
-    std::vector<HostID> hosts;
-    String initiator; // optional
-
-    static constexpr int CURRENT_VERSION = 1;
-
-public:
-    String toString();
-    void parse(const String & data);
-};
-
-struct DDLTask;
-using DDLTaskPtr = std::unique_ptr<DDLTask>;
-
-
-/// Pushes distributed DDL query to the queue
-BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context);
-BlockIO executeDDLQueryOnCluster(
-    const ASTPtr & query_ptr,
-    const Context & context,
-    const AccessRightsElements & query_requires_access,
-    bool query_requires_grant_option = false);
-BlockIO executeDDLQueryOnCluster(
-    const ASTPtr & query_ptr,
-    const Context & context,
-    AccessRightsElements && query_requires_access,
-    bool query_requires_grant_option = false);
-
 
 class DDLWorker
 {
 public:
-    DDLWorker(int pool_size_, const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix);
-    ~DDLWorker();
+    DDLWorker(int pool_size_, const std::string & zk_root_dir, const Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
+              const String & logger_name = "DDLWorker", const CurrentMetrics::Metric * max_entry_metric_ = nullptr);
+    virtual ~DDLWorker();
 
     /// Pushes query into DDL queue, returns path to created node
-    String enqueueQuery(DDLLogEntry & entry);
+    virtual String enqueueQuery(DDLLogEntry & entry);
 
     /// Host ID (name:port) for logging purposes
     /// Note that in each task hosts are identified individually by name:port from initiator server cluster config
@@ -111,29 +56,32 @@ public:
         return host_fqdn_id;
     }
 
-private:
-    using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
+    void startup();
+    virtual void shutdown();
+
+    bool isCurrentlyActive() const { return initialized && !stop_flag; }
+
+protected:
 
     /// Returns cached ZooKeeper session (possibly expired).
     ZooKeeperPtr tryGetZooKeeper() const;
     /// If necessary, creates a new session and caches it.
     ZooKeeperPtr getAndSetZooKeeper();
-    /// ZooKeeper recover loop (while not stopped).
-    void recoverZooKeeper();
 
-    void checkCurrentTasks();
+    /// Iterates through queue tasks in ZooKeeper, runs execution of new tasks
     void scheduleTasks();
-    void saveTask(const String & entry_name);
+
+    DDLTaskBase & saveTask(DDLTaskPtr && task);
 
     /// Reads entry and check that the host belongs to host list of the task
     /// Returns non-empty DDLTaskPtr if entry parsed and the check is passed
-    DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper);
+    virtual DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper);
 
-    void enqueueTask(DDLTaskPtr task);
-    void processTask(DDLTask & task);
+    void processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
+    void updateMaxDDLEntryID(const String & entry_name);
 
     /// Check that query should be executed on leader replica only
-    static bool taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, StoragePtr storage);
+    static bool taskShouldBeExecutedOnLeader(const ASTPtr & ast_ddl, StoragePtr storage);
 
     /// Executes query only on leader replica in case of replicated table.
     /// Queries like TRUNCATE/ALTER .../OPTIMIZE have to be executed only on one node of shard.
@@ -141,29 +89,27 @@ private:
     /// query via RemoteBlockOutputStream to leader, so to avoid such "2-phase" query execution we
     /// execute query directly on leader.
     bool tryExecuteQueryOnLeaderReplica(
-        DDLTask & task,
+        DDLTaskBase & task,
         StoragePtr storage,
         const String & rewritten_query,
         const String & node_path,
         const ZooKeeperPtr & zookeeper);
 
-    void parseQueryAndResolveHost(DDLTask & task);
-
-    bool tryExecuteQuery(const String & query, const DDLTask & task, ExecutionStatus & status);
+    bool tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
 
     /// Checks and cleanups queue's nodes
     void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);
+    virtual bool canRemoveQueueEntry(const String & entry_name, const Coordination::Stat & stat);
 
     /// Init task node
-    static void createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper);
+    void createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper);
 
+    virtual void initializeMainThread();
 
     void runMainThread();
     void runCleanupThread();
 
-    void attachToThreadGroup();
-
-private:
+protected:
     Context context;
     Poco::Logger * log;
 
@@ -175,10 +121,12 @@ private:
     ZooKeeperPtr current_zookeeper;
 
     /// Save state of executed task to avoid duplicate execution on ZK error
-    std::vector<std::string> last_tasks;
+    std::optional<String> last_skipped_entry_name;
+    std::list<DDLTaskPtr> current_tasks;
 
     std::shared_ptr<Poco::Event> queue_updated_event = std::make_shared<Poco::Event>();
     std::shared_ptr<Poco::Event> cleanup_event = std::make_shared<Poco::Event>();
+    std::atomic<bool> initialized = false;
     std::atomic<bool> stop_flag = false;
 
     ThreadFromGlobalPool main_thread;
@@ -195,12 +143,8 @@ private:
     /// How many tasks could be in the queue
     size_t max_tasks_in_queue = 1000;
 
-    ThreadGroupStatusPtr thread_group;
-
     std::atomic<UInt64> max_id = 0;
-
-    friend class DDLQueryStatusInputStream;
-    friend struct DDLTask;
+    const CurrentMetrics::Metric * max_entry_metric;
 };
 
 
diff --git a/src/Interpreters/DNSCacheUpdater.cpp b/src/Interpreters/DNSCacheUpdater.cpp
index fb0298f480f..723945165e3 100644
--- a/src/Interpreters/DNSCacheUpdater.cpp
+++ b/src/Interpreters/DNSCacheUpdater.cpp
@@ -37,7 +37,7 @@ void DNSCacheUpdater::run()
       * - automatically throttle when DNS requests take longer time;
       * - add natural randomization on huge clusters - avoid sending all requests at the same moment of time from different servers.
       */
-    task_handle->scheduleAfter(update_period_seconds * 1000);
+    task_handle->scheduleAfter(size_t(update_period_seconds) * 1000);
 }
 
 void DNSCacheUpdater::start()
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 18cf69675ba..f27fb93b2d4 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -609,7 +609,7 @@ DatabaseCatalog::updateDependency(const StorageID & old_from, const StorageID &
         view_dependencies[{new_from.getDatabaseName(), new_from.getTableName()}].insert(new_where);
 }
 
-std::unique_ptr<DDLGuard> DatabaseCatalog::getDDLGuard(const String & database, const String & table)
+DDLGuardPtr DatabaseCatalog::getDDLGuard(const String & database, const String & table)
 {
     std::unique_lock lock(ddl_guards_mutex);
     auto db_guard_iter = ddl_guards.try_emplace(database).first;
@@ -956,36 +956,38 @@ DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock<s
     ++it->second.counter;
     guards_lock.unlock();
     table_lock = std::unique_lock(*it->second.mutex);
-    bool is_database = elem.empty();
-    if (!is_database)
+    is_database_guard = elem.empty();
+    if (!is_database_guard)
     {
 
         bool locked_database_for_read = db_mutex.try_lock_shared();
         if (!locked_database_for_read)
         {
-            removeTableLock();
+            releaseTableLock();
             throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} is currently dropped or renamed", database_name);
         }
     }
 }
 
-void DDLGuard::removeTableLock()
+void DDLGuard::releaseTableLock() noexcept
 {
+    if (table_lock_removed)
+        return;
+
+    table_lock_removed = true;
     guards_lock.lock();
-    --it->second.counter;
-    if (!it->second.counter)
-    {
-        table_lock.unlock();
+    UInt32 counter = --it->second.counter;
+    table_lock.unlock();
+    if (counter == 0)
         map.erase(it);
-    }
+    guards_lock.unlock();
 }
 
 DDLGuard::~DDLGuard()
 {
-    bool is_database = it->first.empty();
-    if (!is_database)
+    if (!is_database_guard)
         db_mutex.unlock_shared();
-    removeTableLock();
+    releaseTableLock();
 }
 
 }
diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h
index 5146c786f64..bb82dbfc440 100644
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@@ -54,16 +54,21 @@ public:
     DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock<std::mutex> guards_lock_, const String & elem, const String & database_name);
     ~DDLGuard();
 
+    /// Unlocks table name, keeps holding read lock for database name
+    void releaseTableLock() noexcept;
+
 private:
     Map & map;
     std::shared_mutex & db_mutex;
     Map::iterator it;
     std::unique_lock<std::mutex> guards_lock;
     std::unique_lock<std::mutex> table_lock;
-
-    void removeTableLock();
+    bool table_lock_removed = false;
+    bool is_database_guard = false;
 };
 
+using DDLGuardPtr = std::unique_ptr<DDLGuard>;
+
 
 /// Creates temporary table in `_temporary_and_external_tables` with randomly generated unique StorageID.
 /// Such table can be accessed from everywhere by its ID.
@@ -117,7 +122,7 @@ public:
     void loadDatabases();
 
     /// Get an object that protects the table from concurrently executing multiple DDL operations.
-    std::unique_ptr<DDLGuard> getDDLGuard(const String & database, const String & table);
+    DDLGuardPtr getDDLGuard(const String & database, const String & table);
     /// Get an object that protects the database from concurrent DDL queries all tables in the database
     std::unique_lock<std::shared_mutex> getExclusiveDDLGuardForDatabase(const String & database);
 
diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
index affd9d0678f..e6061aabe94 100644
--- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
+++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
@@ -96,7 +96,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
 
         ASTPtr subquery_select = subquery.children.at(0);
 
-        auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1);
+        auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1, true);
         options.analyze(data.only_analyze);
 
         auto interpreter = InterpreterSelectWithUnionQuery(subquery_select, subquery_context, options);
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 77f3c9d7537..660718549b3 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -89,8 +89,7 @@ bool allowEarlyConstantFolding(const ActionsDAG & actions, const Settings & sett
     {
         if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base)
         {
-            auto name = node.function_base->getName();
-            if (name == "ignore")
+            if (!node.function_base->isSuitableForConstantFolding())
                 return false;
         }
     }
@@ -516,6 +515,21 @@ void makeWindowDescriptionFromAST(WindowDescription & desc, const IAST * ast)
     desc.full_sort_description = desc.partition_by;
     desc.full_sort_description.insert(desc.full_sort_description.end(),
         desc.order_by.begin(), desc.order_by.end());
+
+    if (definition.frame.type != WindowFrame::FrameType::Rows
+        && definition.frame.type != WindowFrame::FrameType::Range)
+    {
+        std::string name = definition.frame.type == WindowFrame::FrameType::Rows
+            ? "ROWS"
+            : definition.frame.type == WindowFrame::FrameType::Groups
+                ? "GROUPS" : "RANGE";
+
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+            "Window frame '{}' is not implemented (while processing '{}')",
+            name, ast->formatForErrorMessage());
+    }
+
+    desc.frame = definition.frame;
 }
 
 void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
@@ -525,7 +539,10 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
         !context.getSettingsRef().allow_experimental_window_functions)
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-            "Window functions are not implemented (while processing '{}')",
+            "The support for window functions is experimental and will change"
+            " in backwards-incompatible ways in the future releases. Set"
+            " allow_experimental_window_functions = 1 to enable it."
+            " While processing '{}'",
             syntax->window_function_asts[0]->formatForErrorMessage());
     }
 
@@ -844,7 +861,12 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
     step.required_output.push_back(prewhere_column_name);
     step.can_remove_required_output.push_back(true);
 
-    auto filter_type = (*step.actions()->getIndex().find(prewhere_column_name))->result_type;
+    const auto & index = step.actions()->getIndex();
+    auto it = index.find(prewhere_column_name);
+    if (it == index.end())
+        throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: '{}'", prewhere_column_name);
+
+    auto filter_type = (*it)->result_type;
     if (!filter_type->canBeUsedInBooleanContext())
         throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(),
                         ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
@@ -944,7 +966,12 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
     step.required_output.push_back(where_column_name);
     step.can_remove_required_output = {true};
 
-    auto filter_type = (*step.actions()->getIndex().find(where_column_name))->result_type;
+    const auto & index = step.actions()->getIndex();
+    auto it = index.find(where_column_name);
+    if (it == index.end())
+        throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: '{}'", where_column_name);
+
+    auto filter_type = (*it)->result_type;
     if (!filter_type->canBeUsedInBooleanContext())
         throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(),
                         ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index 71301ad64a2..319be9c1409 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -3,6 +3,7 @@
 #include <DataStreams/IBlockStream_fwd.h>
 #include <Columns/FilterDescription.h>
 #include <Interpreters/AggregateDescription.h>
+#include <Interpreters/WindowDescription.h>
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/SubqueryForSet.h>
 #include <Parsers/IAST_fwd.h>
diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp
index c735dd76911..4df4e5f8c1b 100644
--- a/src/Interpreters/ExternalDictionariesLoader.cpp
+++ b/src/Interpreters/ExternalDictionariesLoader.cpp
@@ -38,7 +38,7 @@ ExternalLoader::LoadablePtr ExternalDictionariesLoader::create(
 DictionaryStructure
 ExternalDictionariesLoader::getDictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config)
 {
-    return {config, key_in_config + ".structure"};
+    return {config, key_in_config};
 }
 
 DictionaryStructure ExternalDictionariesLoader::getDictionaryStructure(const ObjectConfig & config)
diff --git a/src/Interpreters/FunctionNameNormalizer.cpp b/src/Interpreters/FunctionNameNormalizer.cpp
new file mode 100644
index 00000000000..255f4d8c6bb
--- /dev/null
+++ b/src/Interpreters/FunctionNameNormalizer.cpp
@@ -0,0 +1,45 @@
+#include <Interpreters/FunctionNameNormalizer.h>
+
+#include <Parsers/ASTColumnDeclaration.h>
+#include <Parsers/ASTCreateQuery.h>
+
+namespace DB
+{
+
+const String & getFunctionCanonicalNameIfAny(const String & name);
+const String & getAggregateFunctionCanonicalNameIfAny(const String & name);
+
+void FunctionNameNormalizer::visit(IAST * ast)
+{
+    if (!ast)
+        return;
+
+    // Normalize only selected children. Avoid normalizing engine clause because some engine might
+    // have the same name as function, e.g. Log.
+    if (auto * node_storage = ast->as<ASTStorage>())
+    {
+        visit(node_storage->partition_by);
+        visit(node_storage->primary_key);
+        visit(node_storage->order_by);
+        visit(node_storage->sample_by);
+        visit(node_storage->ttl_table);
+        return;
+    }
+
+    // Normalize only selected children. Avoid normalizing type clause because some type might
+    // have the same name as function, e.g. Date.
+    if (auto * node_decl = ast->as<ASTColumnDeclaration>())
+    {
+        visit(node_decl->default_expression.get());
+        visit(node_decl->ttl.get());
+        return;
+    }
+
+    if (auto * node_func = ast->as<ASTFunction>())
+        node_func->name = getAggregateFunctionCanonicalNameIfAny(getFunctionCanonicalNameIfAny(node_func->name));
+
+    for (auto & child : ast->children)
+        visit(child.get());
+}
+
+}
diff --git a/src/Interpreters/FunctionNameNormalizer.h b/src/Interpreters/FunctionNameNormalizer.h
new file mode 100644
index 00000000000..3f22bb2f627
--- /dev/null
+++ b/src/Interpreters/FunctionNameNormalizer.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <Parsers/ASTFunction.h>
+
+namespace DB
+{
+
+struct FunctionNameNormalizer
+{
+    static void visit(IAST *);
+};
+
+}
diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h
index cde59d1e6c9..80d133ebea6 100644
--- a/src/Interpreters/GlobalSubqueriesVisitor.h
+++ b/src/Interpreters/GlobalSubqueriesVisitor.h
@@ -12,7 +12,6 @@
 #include <Core/Block.h>
 #include <Core/NamesAndTypes.h>
 #include <Databases/IDatabase.h>
-#include <Storages/StorageMemory.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/InDepthNodeVisitor.h>
 #include <Interpreters/IdentifierSemantic.h>
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index ac2429334e4..5c50b53e2ca 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -55,6 +55,58 @@ struct NotProcessedCrossJoin : public ExtraBlock
 
 }
 
+namespace JoinStuff
+{
+    /// Version of `getUsed` with dynamic dispatch
+    bool JoinUsedFlags::getUsedSafe(size_t i) const
+    {
+        if (flags.empty())
+            return !need_flags;
+        return flags[i].load();
+    }
+
+    template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS>
+    void JoinUsedFlags::reinit(size_t size)
+    {
+        if constexpr (MapGetter<KIND, STRICTNESS>::flagged)
+        {
+            assert(flags.size() <= size);
+            need_flags = true;
+            flags = std::vector<std::atomic_bool>(size);
+        }
+    }
+
+    template <>
+    void JoinUsedFlags::setUsed<false>(size_t i [[maybe_unused]]) {}
+
+    template <>
+    bool JoinUsedFlags::getUsed<false>(size_t i [[maybe_unused]]) { return true; }
+
+    template <>
+    bool JoinUsedFlags::setUsedOnce<false>(size_t i [[maybe_unused]]) { return true; }
+
+    template <>
+    void JoinUsedFlags::setUsed<true>(size_t i)
+    {
+        /// Could be set simultaneously from different threads.
+        flags[i].store(true, std::memory_order_relaxed);
+    }
+
+    template <>
+    bool JoinUsedFlags::getUsed<true>(size_t i) { return flags[i].load(); }
+
+    template <>
+    bool JoinUsedFlags::setUsedOnce<true>(size_t i)
+    {
+        /// fast check to prevent heavy CAS with seq_cst order
+        if (flags[i].load(std::memory_order_relaxed))
+            return false;
+
+        bool expected = false;
+        return flags[i].compare_exchange_strong(expected, true);
+    }
+}
+
 static ColumnPtr filterWithBlanks(ColumnPtr src_column, const IColumn::Filter & filter, bool inverse_filter = false)
 {
     ColumnPtr column = src_column->convertToFullColumnIfConst();
@@ -264,8 +316,8 @@ static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes
 class KeyGetterForDict
 {
 public:
-    using Mapped = JoinStuff::MappedOne;
-    using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped>;
+    using Mapped = RowRef;
+    using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped, true>;
 
     KeyGetterForDict(const ColumnRawPtrs & key_columns_, const Sizes &, void *)
         : key_columns(key_columns_)
@@ -286,7 +338,7 @@ public:
         }
 
         result.row_num = positions[row];
-        return FindResult(&result, found[row]);
+        return FindResult(&result, found[row], 0);
     }
 
 private:
@@ -300,41 +352,43 @@ private:
 template <HashJoin::Type type, typename Value, typename Mapped>
 struct KeyGetterForTypeImpl;
 
+constexpr bool use_offset = true;
+
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key8, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt8, false>;
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt8, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key16, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt16, false>;
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt16, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key32, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt32, false>;
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt32, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key64, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt64, false>;
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt64, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key_string, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodString<Value, Mapped, true, false>;
+    using Type = ColumnsHashing::HashMethodString<Value, Mapped, true, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key_fixed_string, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodFixedString<Value, Mapped, true, false>;
+    using Type = ColumnsHashing::HashMethodFixedString<Value, Mapped, true, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::keys128, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt128, Mapped, false, false, false>;
+    using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt128, Mapped, false, false, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::keys256, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodKeysFixed<Value, DummyUInt256, Mapped, false, false, false>;
+    using Type = ColumnsHashing::HashMethodKeysFixed<Value, DummyUInt256, Mapped, false, false, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::hashed, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodHashed<Value, Mapped, false>;
+    using Type = ColumnsHashing::HashMethodHashed<Value, Mapped, false, use_offset>;
 };
 
 template <HashJoin::Type type, typename Data>
@@ -463,12 +517,11 @@ namespace
 
 
     template <ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map, bool has_null_map>
-    void NO_INLINE insertFromBlockImplTypeCase(
+    size_t NO_INLINE insertFromBlockImplTypeCase(
         HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
         const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
     {
-        [[maybe_unused]] constexpr bool mapped_one = std::is_same_v<typename Map::mapped_type, JoinStuff::MappedOne> ||
-                                    std::is_same_v<typename Map::mapped_type, JoinStuff::MappedOneFlagged>;
+        [[maybe_unused]] constexpr bool mapped_one = std::is_same_v<typename Map::mapped_type, RowRef>;
         constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
 
         const IColumn * asof_column [[maybe_unused]] = nullptr;
@@ -489,40 +542,42 @@ namespace
             else
                 Inserter<Map, KeyGetter>::insertAll(join, map, key_getter, stored_block, i, pool);
         }
+        return map.getBufferSizeInCells();
     }
 
 
     template <ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map>
-    void insertFromBlockImplType(
+    size_t insertFromBlockImplType(
         HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
         const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
     {
         if (null_map)
-            insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, true>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
+            return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, true>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
         else
-            insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, false>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
+            return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, false>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
     }
 
 
     template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
-    void insertFromBlockImpl(
+    size_t insertFromBlockImpl(
         HashJoin & join, HashJoin::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns,
         const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
     {
         switch (type)
         {
-            case HashJoin::Type::EMPTY: break;
-            case HashJoin::Type::CROSS: break; /// Do nothing. We have already saved block, and it is enough.
-            case HashJoin::Type::DICT:  break; /// No one should call it with Type::DICT.
+            case HashJoin::Type::EMPTY: return 0;
+            case HashJoin::Type::CROSS: return 0; /// Do nothing. We have already saved block, and it is enough.
+            case HashJoin::Type::DICT:  return 0; /// No one should call it with Type::DICT.
 
         #define M(TYPE) \
             case HashJoin::Type::TYPE: \
-                insertFromBlockImplType<STRICTNESS, typename KeyGetterForType<HashJoin::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>(\
+                return insertFromBlockImplType<STRICTNESS, typename KeyGetterForType<HashJoin::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>(\
                     join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, pool); \
                     break;
             APPLY_FOR_JOIN_VARIANTS(M)
         #undef M
         }
+        __builtin_unreachable();
     }
 }
 
@@ -607,9 +662,11 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
 
         if (kind != ASTTableJoin::Kind::Cross)
         {
-            joinDispatch(kind, strictness, data->maps, [&](auto, auto strictness_, auto & map)
+            joinDispatch(kind, strictness, data->maps, [&](auto kind_, auto strictness_, auto & map)
             {
-                insertFromBlockImpl<strictness_>(*this, data->type, map, rows, key_columns, key_sizes, stored_block, null_map, data->pool);
+                size_t size = insertFromBlockImpl<strictness_>(*this, data->type, map, rows, key_columns, key_sizes, stored_block, null_map, data->pool);
+                /// Number of buckets + 1 value from zero storage
+                used_flags.reinit<kind_, strictness_>(size + 1);
             });
         }
 
@@ -688,7 +745,7 @@ public:
         if constexpr (has_defaults)
             applyLazyDefaults();
 
-        for (size_t j = 0; j < right_indexes.size(); ++j)
+        for (size_t j = 0, size = right_indexes.size(); j < size; ++j)
             columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num);
     }
 
@@ -701,7 +758,7 @@ public:
     {
         if (lazy_defaults_count)
         {
-            for (size_t j = 0; j < right_indexes.size(); ++j)
+            for (size_t j = 0, size = right_indexes.size(); j < size; ++j)
                 JoinCommon::addDefaultValues(*columns[j], type_name[j].first, lazy_defaults_count);
             lazy_defaults_count = 0;
         }
@@ -770,7 +827,11 @@ void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unuse
 /// Joins right table columns which indexes are present in right_indexes using specified map.
 /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS).
 template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map, bool need_filter, bool has_null_map>
-NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map [[maybe_unused]])
+NO_INLINE IColumn::Filter joinRightColumns(
+    const Map & map,
+    AddedColumns & added_columns,
+    const ConstNullMapPtr & null_map [[maybe_unused]],
+    JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]])
 {
     constexpr bool is_any_join = STRICTNESS == ASTTableJoin::Strictness::Any;
     constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All;
@@ -781,6 +842,8 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
     constexpr bool right = KIND == ASTTableJoin::Kind::Right;
     constexpr bool full = KIND == ASTTableJoin::Kind::Full;
 
+    constexpr bool need_flags = MapGetter<KIND, STRICTNESS>::flagged;
+
     constexpr bool add_missing = (left || full) && !is_semi_join;
     constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right);
 
@@ -827,7 +890,7 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
                 if (const RowRef * found = mapped.findAsof(asof_type, asof_inequality, left_asof_key, i))
                 {
                     setUsed<need_filter>(filter, i);
-                    mapped.setUsed();
+                    used_flags.template setUsed<need_flags>(find_result.getOffset());
                     added_columns.appendFromBlock<add_missing>(*found->block, found->row_num);
                 }
                 else
@@ -836,13 +899,15 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
             else if constexpr (is_all_join)
             {
                 setUsed<need_filter>(filter, i);
-                mapped.setUsed();
+                used_flags.template setUsed<need_flags>(find_result.getOffset());
                 addFoundRowAll<Map, add_missing>(mapped, added_columns, current_offset);
             }
             else if constexpr ((is_any_join || is_semi_join) && right)
             {
                 /// Use first appeared left key + it needs left columns replication
-                if (mapped.setUsedOnce())
+                bool used_once = used_flags.template setUsedOnce<need_flags>(find_result.getOffset());
+
+                if (used_once)
                 {
                     setUsed<need_filter>(filter, i);
                     addFoundRowAll<Map, add_missing>(mapped, added_columns, current_offset);
@@ -850,8 +915,10 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
             }
             else if constexpr (is_any_join && KIND == ASTTableJoin::Kind::Inner)
             {
+                bool used_once = used_flags.template setUsedOnce<need_flags>(find_result.getOffset());
+
                 /// Use first appeared left key only
-                if (mapped.setUsedOnce())
+                if (used_once)
                 {
                     setUsed<need_filter>(filter, i);
                     added_columns.appendFromBlock<add_missing>(*mapped.block, mapped.row_num);
@@ -863,13 +930,13 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
             }
             else if constexpr (is_anti_join)
             {
-                if constexpr (right)
-                    mapped.setUsed();
+                if constexpr (right && need_flags)
+                    used_flags.template setUsed<need_flags>(find_result.getOffset());
             }
             else /// ANY LEFT, SEMI LEFT, old ANY (RightAny)
             {
                 setUsed<need_filter>(filter, i);
-                mapped.setUsed();
+                used_flags.template setUsed<need_flags>(find_result.getOffset());
                 added_columns.appendFromBlock<add_missing>(*mapped.block, mapped.row_num);
             }
         }
@@ -889,26 +956,28 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
 }
 
 template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map>
-IColumn::Filter joinRightColumnsSwitchNullability(const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map)
+IColumn::Filter joinRightColumnsSwitchNullability(
+    const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map, JoinStuff::JoinUsedFlags & used_flags)
 {
     if (added_columns.need_filter)
     {
         if (null_map)
-            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, true>(map, added_columns, null_map);
+            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, true>(map, added_columns, null_map, used_flags);
         else
-            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, false>(map, added_columns, nullptr);
+            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, false>(map, added_columns, nullptr, used_flags);
     }
     else
     {
         if (null_map)
-            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, true>(map, added_columns, null_map);
+            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, true>(map, added_columns, null_map, used_flags);
         else
-            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, false>(map, added_columns, nullptr);
+            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, false>(map, added_columns, nullptr, used_flags);
     }
 }
 
 template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename Maps>
-IColumn::Filter switchJoinRightColumns(const Maps & maps_, AddedColumns & added_columns, HashJoin::Type type, const ConstNullMapPtr & null_map)
+IColumn::Filter switchJoinRightColumns(
+    const Maps & maps_, AddedColumns & added_columns, HashJoin::Type type, const ConstNullMapPtr & null_map, JoinStuff::JoinUsedFlags & used_flags)
 {
     switch (type)
     {
@@ -916,7 +985,7 @@ IColumn::Filter switchJoinRightColumns(const Maps & maps_, AddedColumns & added_
         case HashJoin::Type::TYPE: \
             return joinRightColumnsSwitchNullability<KIND, STRICTNESS,\
                 typename KeyGetterForType<HashJoin::Type::TYPE, const std::remove_reference_t<decltype(*maps_.TYPE)>>::Type>(\
-                *maps_.TYPE, added_columns, null_map);
+                *maps_.TYPE, added_columns, null_map, used_flags);
         APPLY_FOR_JOIN_VARIANTS(M)
     #undef M
 
@@ -933,7 +1002,8 @@ IColumn::Filter dictionaryJoinRightColumns(const TableJoin & table_join, AddedCo
         STRICTNESS == ASTTableJoin::Strictness::Semi ||
         STRICTNESS == ASTTableJoin::Strictness::Anti))
     {
-        return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(table_join, added_columns, null_map);
+        JoinStuff::JoinUsedFlags flags;
+        return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(table_join, added_columns, null_map, flags);
     }
 
     throw Exception("Logical error: wrong JOIN combination", ErrorCodes::LOGICAL_ERROR);
@@ -997,7 +1067,7 @@ void HashJoin::joinBlockImpl(
 
     IColumn::Filter row_filter = overDictionary() ?
         dictionaryJoinRightColumns<KIND, STRICTNESS>(*table_join, added_columns, null_map) :
-        switchJoinRightColumns<KIND, STRICTNESS>(maps_, added_columns, data->type, null_map);
+        switchJoinRightColumns<KIND, STRICTNESS>(maps_, added_columns, data->type, null_map, used_flags);
 
     for (size_t i = 0; i < added_columns.size(); ++i)
         block.insert(added_columns.moveColumn(i));
@@ -1276,8 +1346,8 @@ struct AdderNonJoined
 {
     static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right)
     {
-        constexpr bool mapped_asof = std::is_same_v<Mapped, JoinStuff::MappedAsof>;
-        [[maybe_unused]] constexpr bool mapped_one = std::is_same_v<Mapped, JoinStuff::MappedOne> || std::is_same_v<Mapped, JoinStuff::MappedOneFlagged>;
+        constexpr bool mapped_asof = std::is_same_v<Mapped, AsofRowRefs>;
+        [[maybe_unused]] constexpr bool mapped_one = std::is_same_v<Mapped, RowRef>;
 
         if constexpr (mapped_asof)
         {
@@ -1404,7 +1474,8 @@ private:
         {
             const Mapped & mapped = it->getMapped();
 
-            if (mapped.getUsed())
+            size_t off = map.offsetInternal(it.getPtr());
+            if (parent.isUsed(off))
                 continue;
 
             AdderNonJoined<Mapped>::add(mapped, rows_added, columns_keys_and_right);
@@ -1456,4 +1527,13 @@ BlockInputStreamPtr HashJoin::createStreamWithNonJoinedRows(const Block & result
     return {};
 }
 
+void HashJoin::reuseJoinedData(const HashJoin & join)
+{
+    data = join.data;
+    joinDispatch(kind, strictness, data->maps, [this](auto kind_, auto strictness_, auto & map)
+    {
+        used_flags.reinit<kind_, strictness_>(map.getBufferSizeInCells(data->type) + 1);
+    });
+}
+
 }
diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h
index c14ad4e5a99..06ce7559f31 100644
--- a/src/Interpreters/HashJoin.h
+++ b/src/Interpreters/HashJoin.h
@@ -33,48 +33,32 @@ class DictionaryReader;
 namespace JoinStuff
 {
 
-/// Base class with optional flag attached that's needed to implement RIGHT and FULL JOINs.
-template <typename T, bool with_used>
-struct WithFlags;
-
-template <typename T>
-struct WithFlags<T, true> : T
+/// Flags needed to implement RIGHT and FULL JOINs.
+class JoinUsedFlags
 {
-    using Base = T;
-    using T::T;
+    std::vector<std::atomic_bool> flags;
+    bool need_flags;
 
-    mutable std::atomic<bool> used {};
-    void setUsed() const { used.store(true, std::memory_order_relaxed); }    /// Could be set simultaneously from different threads.
-    bool getUsed() const { return used; }
+public:
 
-    bool setUsedOnce() const
-    {
-        /// fast check to prevent heavy CAS with seq_cst order
-        if (used.load(std::memory_order_relaxed))
-            return false;
+    /// Update size for vector with flags.
+    /// Calling this method invalidates existing flags.
+    /// It can be called several times, but all of them should happen before using this structure.
+    template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS>
+    void reinit(size_t size_);
 
-        bool expected = false;
-        return used.compare_exchange_strong(expected, true);
-    }
+    bool getUsedSafe(size_t i) const;
+
+    template <bool use_flags>
+    void setUsed(size_t i);
+
+    template <bool use_flags>
+    bool getUsed(size_t i);
+
+    template <bool use_flags>
+    bool setUsedOnce(size_t i);
 };
 
-template <typename T>
-struct WithFlags<T, false> : T
-{
-    using Base = T;
-    using T::T;
-
-    void setUsed() const {}
-    bool getUsed() const { return true; }
-    bool setUsedOnce() const { return true; }
-};
-
-using MappedOne =        WithFlags<RowRef, false>;
-using MappedAll =        WithFlags<RowRefList, false>;
-using MappedOneFlagged = WithFlags<RowRef, true>;
-using MappedAllFlagged = WithFlags<RowRefList, true>;
-using MappedAsof =       WithFlags<AsofRowRefs, false>;
-
 }
 
 /** Data structure for implementation of JOIN.
@@ -294,15 +278,30 @@ public:
 
             __builtin_unreachable();
         }
+
+        size_t getBufferSizeInCells(Type which) const
+        {
+            switch (which)
+            {
+                case Type::EMPTY:            return 0;
+                case Type::CROSS:            return 0;
+                case Type::DICT:             return 0;
+
+            #define M(NAME) \
+                case Type::NAME: return NAME ? NAME->getBufferSizeInCells() : 0;
+                APPLY_FOR_JOIN_VARIANTS(M)
+            #undef M
+            }
+
+            __builtin_unreachable();
+        }
     };
 
-    using MapsOne =             MapsTemplate<JoinStuff::MappedOne>;
-    using MapsAll =             MapsTemplate<JoinStuff::MappedAll>;
-    using MapsOneFlagged =      MapsTemplate<JoinStuff::MappedOneFlagged>;
-    using MapsAllFlagged =      MapsTemplate<JoinStuff::MappedAllFlagged>;
-    using MapsAsof =            MapsTemplate<JoinStuff::MappedAsof>;
+    using MapsOne = MapsTemplate<RowRef>;
+    using MapsAll = MapsTemplate<RowRefList>;
+    using MapsAsof = MapsTemplate<AsofRowRefs>;
 
-    using MapsVariant = std::variant<MapsOne, MapsAll, MapsOneFlagged, MapsAllFlagged, MapsAsof>;
+    using MapsVariant = std::variant<MapsOne, MapsAll, MapsAsof>;
     using BlockNullmapList = std::deque<std::pair<const Block *, ColumnPtr>>;
 
     struct RightTableData
@@ -323,16 +322,15 @@ public:
         Arena pool;
     };
 
-    void reuseJoinedData(const HashJoin & join)
-    {
-        data = join.data;
-    }
+    void reuseJoinedData(const HashJoin & join);
 
     std::shared_ptr<RightTableData> getJoinedData() const
     {
         return data;
     }
 
+    bool isUsed(size_t off) const { return used_flags.getUsedSafe(off); }
+
 private:
     friend class NonJoinedBlockInputStream;
     friend class JoinSource;
@@ -352,6 +350,10 @@ private:
 
     /// Right table data. StorageJoin shares it between many Join objects.
     std::shared_ptr<RightTableData> data;
+    /// Flags that indicate that particular row already used in join.
+    /// Flag is stored for every record in hash map.
+    /// Number of this flags equals to hashtable buffer size (plus one for zero value).
+    mutable JoinStuff::JoinUsedFlags used_flags;
     Sizes key_sizes;
 
     /// Block with columns from the right-side table.
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index bb457b65f4d..bf624507574 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -1,5 +1,5 @@
 #include <Interpreters/InterpreterAlterQuery.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/MutationsInterpreter.h>
 #include <Interpreters/AddDefaultDatabaseVisitor.h>
 #include <Interpreters/Context.h>
@@ -16,6 +16,9 @@
 #include <Common/typeid_cast.h>
 #include <boost/range/algorithm_ext/push_back.hpp>
 #include <algorithm>
+#include <Databases/IDatabase.h>
+#include <Databases/DatabaseReplicated.h>
+#include <Databases/DatabaseFactory.h>
 
 
 namespace DB
@@ -25,6 +28,7 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int INCORRECT_QUERY;
+    extern const int NOT_IMPLEMENTED;
 }
 
 
@@ -38,11 +42,21 @@ BlockIO InterpreterAlterQuery::execute()
     BlockIO res;
     const auto & alter = query_ptr->as<ASTAlterQuery &>();
 
+
     if (!alter.cluster.empty())
         return executeDDLQueryOnCluster(query_ptr, context, getRequiredAccess());
 
     context.checkAccess(getRequiredAccess());
     auto table_id = context.resolveStorageID(alter, Context::ResolveOrdinary);
+
+    DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
+    if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
+    {
+        auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name);
+        guard->releaseTableLock();
+        return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, context);
+    }
+
     StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context);
     auto alter_lock = table->lockForAlter(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
     auto metadata_snapshot = table->getInMemoryMetadataPtr();
@@ -80,6 +94,14 @@ BlockIO InterpreterAlterQuery::execute()
             throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR);
     }
 
+    if (typeid_cast<DatabaseReplicated *>(database.get()))
+    {
+        int command_types_count = !mutation_commands.empty() + !partition_commands.empty() + !live_view_commands.empty() + !alter_commands.empty();
+        if (1 < command_types_count)
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "For Replicated databases it's not allowed "
+                                                         "to execute ALTERs of different types in single query");
+    }
+
     if (!mutation_commands.empty())
     {
         MutationsInterpreter(table, metadata_snapshot, mutation_commands, context, false).validate();
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index e9a11b9eb0d..d1af86e7b11 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -30,7 +30,8 @@
 #include <Storages/StorageInMemoryMetadata.h>
 
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
+#include <Interpreters/Cluster.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/InterpreterSelectWithUnionQuery.h>
@@ -46,6 +47,7 @@
 #include <DataTypes/DataTypeNullable.h>
 
 #include <Databases/DatabaseFactory.h>
+#include <Databases/DatabaseReplicated.h>
 #include <Databases/IDatabase.h>
 #include <Databases/DatabaseOnDisk.h>
 
@@ -56,6 +58,7 @@
 #include <Interpreters/InterpreterDropQuery.h>
 #include <Interpreters/QueryLog.h>
 #include <Interpreters/addTypeConversionToAST.h>
+#include <Interpreters/FunctionNameNormalizer.h>
 
 #include <TableFunctions/TableFunctionFactory.h>
 #include <common/logger_useful.h>
@@ -79,6 +82,7 @@ namespace ErrorCodes
     extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE;
     extern const int ILLEGAL_COLUMN;
     extern const int LOGICAL_ERROR;
+    extern const int UNKNOWN_DATABASE;
     extern const int PATH_ACCESS_DENIED;
     extern const int NOT_IMPLEMENTED;
     extern const int UNKNOWN_TABLE;
@@ -146,7 +150,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
         throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", serializeAST(*create.storage));
     }
 
-    if (create.storage->engine->name == "Atomic")
+    if (create.storage->engine->name == "Atomic" || create.storage->engine->name == "Replicated")
     {
         if (create.attach && create.uuid == UUIDHelpers::Nil)
             throw Exception(ErrorCodes::INCORRECT_QUERY, "UUID must be specified for ATTACH. "
@@ -205,6 +209,12 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
                         "Enable allow_experimental_database_materialize_mysql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE);
     }
 
+    if (create.storage->engine->name == "Replicated" && !context.getSettingsRef().allow_experimental_database_replicated && !internal)
+    {
+        throw Exception("Replicated is an experimental database engine. "
+                        "Enable allow_experimental_database_replicated to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE);
+    }
+
     DatabasePtr database = DatabaseFactory::get(create, metadata_path / "", context);
 
     if (create.uuid != UUIDHelpers::Nil)
@@ -556,6 +566,11 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(AS
     validateTableStructure(create, properties);
     /// Set the table engine if it was not specified explicitly.
     setEngine(create);
+
+    assert(as_database_saved.empty() && as_table_saved.empty());
+    std::swap(create.as_database, as_database_saved);
+    std::swap(create.as_table, as_table_saved);
+
     return properties;
 }
 
@@ -702,6 +717,12 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
     const auto * kind = create.is_dictionary ? "Dictionary" : "Table";
     const auto * kind_upper = create.is_dictionary ? "DICTIONARY" : "TABLE";
 
+    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && !internal)
+    {
+        if (create.uuid == UUIDHelpers::Nil)
+            throw Exception("Table UUID is not specified in DDL log", ErrorCodes::LOGICAL_ERROR);
+    }
+
     bool from_path = create.attach_from_path.has_value();
 
     if (database->getUUID() != UUIDHelpers::Nil)
@@ -776,11 +797,11 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
             ErrorCodes::BAD_DATABASE_FOR_TEMPORARY_TABLE);
 
     String current_database = context.getCurrentDatabase();
+    auto database_name = create.database.empty() ? current_database : create.database;
 
     // If this is a stub ATTACH query, read the query definition from the database
     if (create.attach && !create.storage && !create.columns_list)
     {
-        auto database_name = create.database.empty() ? current_database : create.database;
         auto database = DatabaseCatalog::instance().getDatabase(database_name);
         bool if_not_exists = create.if_not_exists;
 
@@ -800,19 +821,30 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
 
     if (create.attach_from_path)
     {
-        fs::path data_path = fs::path(*create.attach_from_path).lexically_normal();
         fs::path user_files = fs::path(context.getUserFilesPath()).lexically_normal();
-        if (data_path.is_relative())
-            data_path = (user_files / data_path).lexically_normal();
-        if (!startsWith(data_path, user_files))
-            throw Exception(ErrorCodes::PATH_ACCESS_DENIED,
-                            "Data directory {} must be inside {} to attach it", String(data_path), String(user_files));
-
         fs::path root_path = fs::path(context.getPath()).lexically_normal();
-        /// Data path must be relative to root_path
-        create.attach_from_path = fs::relative(data_path, root_path) / "";
+
+        if (context.getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)
+        {
+            fs::path data_path = fs::path(*create.attach_from_path).lexically_normal();
+            if (data_path.is_relative())
+                data_path = (user_files / data_path).lexically_normal();
+            if (!startsWith(data_path, user_files))
+                throw Exception(ErrorCodes::PATH_ACCESS_DENIED,
+                                "Data directory {} must be inside {} to attach it", String(data_path), String(user_files));
+
+            /// Data path must be relative to root_path
+            create.attach_from_path = fs::relative(data_path, root_path) / "";
+        }
+        else
+        {
+            fs::path data_path = (root_path / *create.attach_from_path).lexically_normal();
+            if (!startsWith(data_path, user_files))
+                throw Exception(ErrorCodes::PATH_ACCESS_DENIED,
+                                "Data directory {} must be inside {} to attach it", String(data_path), String(user_files));
+        }
     }
-    else if (create.attach && !create.attach_short_syntax)
+    else if (create.attach && !create.attach_short_syntax && context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
     {
         auto * log = &Poco::Logger::get("InterpreterCreateQuery");
         LOG_WARNING(log, "ATTACH TABLE query with full table definition is not recommended: "
@@ -836,11 +868,29 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     /// Set and retrieve list of columns, indices and constraints. Set table engine if needed. Rewrite query in canonical way.
     TableProperties properties = setProperties(create);
 
+    DatabasePtr database;
+    bool need_add_to_database = !create.temporary;
+    if (need_add_to_database)
+        database = DatabaseCatalog::instance().getDatabase(database_name);
+
+    if (need_add_to_database && database->getEngineName() == "Replicated")
+    {
+        auto guard = DatabaseCatalog::instance().getDDLGuard(create.database, create.table);
+        database = DatabaseCatalog::instance().getDatabase(create.database);
+        if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
+        {
+            assertOrSetUUID(create, database);
+            guard->releaseTableLock();
+            return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, context);
+        }
+    }
+
     if (create.replace_table)
         return doCreateOrReplaceTable(create, properties);
 
     /// Actually creates table
     bool created = doCreateTable(create, properties);
+
     if (!created)   /// Table already exists
         return {};
 
@@ -880,7 +930,8 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
                 drop_ast->table = create.table;
                 drop_ast->no_ddl_lock = true;
 
-                InterpreterDropQuery interpreter(drop_ast, context);
+                Context drop_context = context;
+                InterpreterDropQuery interpreter(drop_ast, drop_context);
                 interpreter.execute();
             }
             else
@@ -1037,6 +1088,14 @@ BlockIO InterpreterCreateQuery::createDictionary(ASTCreateQuery & create)
     auto guard = DatabaseCatalog::instance().getDDLGuard(database_name, dictionary_name);
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(database_name);
 
+    if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
+    {
+        if (!create.attach)
+            assertOrSetUUID(create, database);
+        guard->releaseTableLock();
+        return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, context);
+    }
+
     if (database->isDictionaryExist(dictionary_name))
     {
         /// TODO Check structure of dictionary
@@ -1118,6 +1177,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, cons
 
 BlockIO InterpreterCreateQuery::execute()
 {
+    FunctionNameNormalizer().visit(query_ptr.get());
     auto & create = query_ptr->as<ASTCreateQuery &>();
     if (!create.cluster.empty())
     {
@@ -1189,15 +1249,14 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const
     return required_access;
 }
 
-void InterpreterCreateQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, const Context &) const
+void InterpreterCreateQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, const Context &) const
 {
-    const auto & create = ast->as<const ASTCreateQuery &>();
     elem.query_kind = "Create";
-    if (!create.as_table.empty())
+    if (!as_table_saved.empty())
     {
-        String database = backQuoteIfNeed(create.as_database.empty() ? context.getCurrentDatabase() : create.as_database);
+        String database = backQuoteIfNeed(as_database_saved.empty() ? context.getCurrentDatabase() : as_database_saved);
         elem.query_databases.insert(database);
-        elem.query_tables.insert(database + "." + backQuoteIfNeed(create.as_table));
+        elem.query_tables.insert(database + "." + backQuoteIfNeed(as_table_saved));
     }
 }
 
diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h
index c109b0b7760..d88357fe412 100644
--- a/src/Interpreters/InterpreterCreateQuery.h
+++ b/src/Interpreters/InterpreterCreateQuery.h
@@ -95,5 +95,8 @@ private:
     /// Is this an internal query - not from the user.
     bool internal = false;
     bool force_attach = false;
+
+    mutable String as_database_saved;
+    mutable String as_table_saved;
 };
 }
diff --git a/src/Interpreters/InterpreterCreateQuotaQuery.cpp b/src/Interpreters/InterpreterCreateQuotaQuery.cpp
index f45c2c9709d..ff30a2fff47 100644
--- a/src/Interpreters/InterpreterCreateQuotaQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuotaQuery.cpp
@@ -2,7 +2,7 @@
 #include <Parsers/ASTCreateQuotaQuery.h>
 #include <Parsers/ASTRolesOrUsersSet.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/AccessFlags.h>
 #include <ext/range.h>
diff --git a/src/Interpreters/InterpreterCreateRoleQuery.cpp b/src/Interpreters/InterpreterCreateRoleQuery.cpp
index 2fa04eebae1..72ad3234b95 100644
--- a/src/Interpreters/InterpreterCreateRoleQuery.cpp
+++ b/src/Interpreters/InterpreterCreateRoleQuery.cpp
@@ -1,7 +1,7 @@
 #include <Interpreters/InterpreterCreateRoleQuery.h>
 #include <Parsers/ASTCreateRoleQuery.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/Role.h>
 
diff --git a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp b/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp
index 9dacc9d1bf4..8f1c5b061e0 100644
--- a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp
+++ b/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp
@@ -4,7 +4,7 @@
 #include <Parsers/ASTRolesOrUsersSet.h>
 #include <Parsers/formatAST.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/AccessFlags.h>
 #include <boost/range/algorithm/sort.hpp>
diff --git a/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp b/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp
index 2d5f4d499b7..b65225db16c 100644
--- a/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp
+++ b/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp
@@ -2,7 +2,7 @@
 #include <Parsers/ASTCreateSettingsProfileQuery.h>
 #include <Parsers/ASTRolesOrUsersSet.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/SettingsProfile.h>
 #include <Access/AccessFlags.h>
diff --git a/src/Interpreters/InterpreterCreateUserQuery.cpp b/src/Interpreters/InterpreterCreateUserQuery.cpp
index 111f698beb9..c9b087de5b4 100644
--- a/src/Interpreters/InterpreterCreateUserQuery.cpp
+++ b/src/Interpreters/InterpreterCreateUserQuery.cpp
@@ -1,7 +1,7 @@
 #include <Interpreters/InterpreterCreateUserQuery.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/InterpreterSetRoleQuery.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Parsers/ASTCreateUserQuery.h>
 #include <Parsers/ASTUserNameWithHost.h>
 #include <Parsers/ASTRolesOrUsersSet.h>
diff --git a/src/Interpreters/InterpreterDropAccessEntityQuery.cpp b/src/Interpreters/InterpreterDropAccessEntityQuery.cpp
index d79d239ee12..e86f8361100 100644
--- a/src/Interpreters/InterpreterDropAccessEntityQuery.cpp
+++ b/src/Interpreters/InterpreterDropAccessEntityQuery.cpp
@@ -2,7 +2,7 @@
 #include <Parsers/ASTDropAccessEntityQuery.h>
 #include <Parsers/ASTRowPolicyName.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/AccessFlags.h>
 #include <Access/User.h>
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index be4908582a5..33e93a79c41 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -2,7 +2,7 @@
 
 #include <Databases/IDatabase.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/InterpreterDropQuery.h>
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <Interpreters/QueryLog.h>
@@ -12,6 +12,7 @@
 #include <Common/escapeForFileName.h>
 #include <Common/quoteString.h>
 #include <Common/typeid_cast.h>
+#include <Databases/DatabaseReplicated.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include "config_core.h"
@@ -32,6 +33,7 @@ namespace ErrorCodes
     extern const int UNKNOWN_TABLE;
     extern const int UNKNOWN_DICTIONARY;
     extern const int NOT_IMPLEMENTED;
+    extern const int INCORRECT_QUERY;
 }
 
 
@@ -118,32 +120,55 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
 
     if (database && table)
     {
-        if (query_ptr->as<ASTDropQuery &>().is_view && !table->isView())
+        if (query.as<ASTDropQuery &>().is_view && !table->isView())
             throw Exception("Table " + table_id.getNameForLogs() + " is not a View", ErrorCodes::LOGICAL_ERROR);
 
         /// Now get UUID, so we can wait for table data to be finally dropped
         table_id.uuid = database->tryGetTableUUID(table_id.table_name);
 
+        /// Prevents recursive drop from drop database query. The original query must specify a table.
+        bool is_drop_or_detach_database = query_ptr->as<ASTDropQuery>()->table.empty();
+        bool is_replicated_ddl_query = typeid_cast<DatabaseReplicated *>(database.get()) &&
+                                       context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY &&
+                                       !is_drop_or_detach_database;
+        if (is_replicated_ddl_query)
+        {
+            if (query.kind == ASTDropQuery::Kind::Detach && !query.permanently)
+                throw Exception(ErrorCodes::INCORRECT_QUERY, "DETACH TABLE is not allowed for Replicated databases. "
+                                                             "Use DETACH TABLE PERMANENTLY or SYSTEM RESTART REPLICA");
+
+            if (query.kind == ASTDropQuery::Kind::Detach)
+                context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id);
+            else if (query.kind == ASTDropQuery::Kind::Truncate)
+                context.checkAccess(AccessType::TRUNCATE, table_id);
+            else if (query.kind == ASTDropQuery::Kind::Drop)
+                context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id);
+
+            ddl_guard->releaseTableLock();
+            table.reset();
+            return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query.clone(), context);
+        }
+
         if (query.kind == ASTDropQuery::Kind::Detach)
         {
             context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id);
             table->checkTableCanBeDetached();
             table->shutdown();
             TableExclusiveLockHolder table_lock;
+
             if (database->getUUID() == UUIDHelpers::Nil)
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
 
             if (query.permanently)
             {
                 /// Drop table from memory, don't touch data, metadata file renamed and will be skipped during server restart
-                database->detachTablePermanently(table_id.table_name);
+                database->detachTablePermanently(context, table_id.table_name);
             }
             else
             {
                 /// Drop table from memory, don't touch data and metadata
                 database->detachTable(table_id.table_name);
             }
-
         }
         else if (query.kind == ASTDropQuery::Kind::Truncate)
         {
@@ -194,6 +219,21 @@ BlockIO InterpreterDropQuery::executeToDictionary(
 
     DatabasePtr database = tryGetDatabase(database_name, if_exists);
 
+    bool is_drop_or_detach_database = query_ptr->as<ASTDropQuery>()->table.empty();
+    bool is_replicated_ddl_query = typeid_cast<DatabaseReplicated *>(database.get()) &&
+                                   context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY &&
+                                   !is_drop_or_detach_database;
+    if (is_replicated_ddl_query)
+    {
+        if (kind == ASTDropQuery::Kind::Detach)
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "DETACH DICTIONARY is not allowed for Replicated databases.");
+
+        context.checkAccess(AccessType::DROP_DICTIONARY, database_name, dictionary_name);
+
+        ddl_guard->releaseTableLock();
+        return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, context);
+    }
+
     if (!database || !database->isDictionaryExist(dictionary_name))
     {
         if (!if_exists)
@@ -307,6 +347,8 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
             if (database->getEngineName() == "MaterializeMySQL")
                 stopDatabaseSynchronization(database);
 #endif
+            if (auto * replicated = typeid_cast<DatabaseReplicated *>(database.get()))
+                replicated->stopReplication();
 
             if (database->shouldBeEmptyOnDetach())
             {
diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp
index 6f45687a4e1..dafe4d2e18c 100644
--- a/src/Interpreters/InterpreterGrantQuery.cpp
+++ b/src/Interpreters/InterpreterGrantQuery.cpp
@@ -2,7 +2,7 @@
 #include <Parsers/ASTGrantQuery.h>
 #include <Parsers/ASTRolesOrUsersSet.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/ContextAccess.h>
 #include <Access/RolesOrUsersSet.h>
diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index 55c4d19206f..6b488a3edc3 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -166,7 +166,7 @@ BlockIO InterpreterInsertQuery::execute()
     BlockIO res;
 
     StoragePtr table = getTable(query);
-    auto table_lock = table->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
+    auto table_lock = table->lockForShare(context.getInitialQueryId(), settings.lock_acquire_timeout);
     auto metadata_snapshot = table->getInMemoryMetadataPtr();
 
     auto query_sample_block = getSampleBlock(query, table, metadata_snapshot);
@@ -289,7 +289,7 @@ BlockIO InterpreterInsertQuery::execute()
 
                 new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
 
-                if (settings.min_insert_block_size_rows)
+                if (settings.min_insert_block_size_rows && table->prefersLargeBlocks())
                     new_settings.max_block_size = settings.min_insert_block_size_rows;
 
                 Context new_context = context;
@@ -341,20 +341,22 @@ BlockIO InterpreterInsertQuery::execute()
             /// Actually we don't know structure of input blocks from query/table,
             /// because some clients break insertion protocol (columns != header)
             out = std::make_shared<AddingDefaultBlockOutputStream>(
-                out, query_sample_block, out->getHeader(), metadata_snapshot->getColumns(), context);
+                out, query_sample_block, metadata_snapshot->getColumns(), context);
 
             /// It's important to squash blocks as early as possible (before other transforms),
             ///  because other transforms may work inefficient if block size is small.
 
             /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
             /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
-            if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()) && !no_squash && !query.watch)
+            if (!(settings.insert_distributed_sync && table->isRemote()) && !no_squash && !query.watch)
             {
+                bool table_prefers_large_blocks = table->prefersLargeBlocks();
+
                 out = std::make_shared<SquashingBlockOutputStream>(
                     out,
                     out->getHeader(),
-                    context.getSettingsRef().min_insert_block_size_rows,
-                    context.getSettingsRef().min_insert_block_size_bytes);
+                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0);
             }
 
             auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp
index 0f7da8f1f58..c50659c6c45 100644
--- a/src/Interpreters/InterpreterKillQueryQuery.cpp
+++ b/src/Interpreters/InterpreterKillQueryQuery.cpp
@@ -2,7 +2,7 @@
 #include <Parsers/ASTKillQueryQuery.h>
 #include <Parsers/queryToString.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/executeQuery.h>
 #include <Interpreters/CancellationCode.h>
diff --git a/src/Interpreters/InterpreterKillQueryQuery.h b/src/Interpreters/InterpreterKillQueryQuery.h
index e1ea23d3dec..788703f8e6d 100644
--- a/src/Interpreters/InterpreterKillQueryQuery.h
+++ b/src/Interpreters/InterpreterKillQueryQuery.h
@@ -12,7 +12,7 @@ class Context;
 class AccessRightsElements;
 
 
-class InterpreterKillQueryQuery : public IInterpreter
+class InterpreterKillQueryQuery final : public IInterpreter
 {
 public:
     InterpreterKillQueryQuery(const ASTPtr & query_ptr_, Context & context_)
diff --git a/src/Interpreters/InterpreterOptimizeQuery.cpp b/src/Interpreters/InterpreterOptimizeQuery.cpp
index cda8471c32e..d8e9013e397 100644
--- a/src/Interpreters/InterpreterOptimizeQuery.cpp
+++ b/src/Interpreters/InterpreterOptimizeQuery.cpp
@@ -1,7 +1,7 @@
 #include <Storages/IStorage.h>
 #include <Parsers/ASTOptimizeQuery.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/InterpreterOptimizeQuery.h>
 #include <Access/AccessRightsElement.h>
 #include <Common/typeid_cast.h>
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index f3b01a6a42c..923a342d9ea 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -3,14 +3,20 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/InterpreterRenameQuery.h>
 #include <Storages/IStorage.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/QueryLog.h>
 #include <Access/AccessRightsElement.h>
+#include <Common/typeid_cast.h>
+#include <Databases/DatabaseReplicated.h>
 
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
 
 InterpreterRenameQuery::InterpreterRenameQuery(const ASTPtr & query_ptr_, Context & context_)
     : query_ptr(query_ptr_), context(context_)
@@ -61,10 +67,10 @@ BlockIO InterpreterRenameQuery::execute()
     if (rename.database)
         return executeToDatabase(rename, descriptions);
     else
-        return executeToTables(rename, descriptions);
+        return executeToTables(rename, descriptions, table_guards);
 }
 
-BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions)
+BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards)
 {
     auto & database_catalog = DatabaseCatalog::instance();
 
@@ -73,13 +79,29 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
         if (!rename.exchange)
             database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name), context);
 
-        database_catalog.getDatabase(elem.from_database_name)->renameTable(
+        DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
+        if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
+        {
+            if (1 < descriptions.size())
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database {} is Replicated, "
+                                "it does not support renaming of multiple tables in single query.", elem.from_database_name);
+
+            UniqueTableName from(elem.from_database_name, elem.from_table_name);
+            UniqueTableName to(elem.to_database_name, elem.to_table_name);
+            ddl_guards[from]->releaseTableLock();
+            ddl_guards[to]->releaseTableLock();
+            return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, context);
+        }
+        else
+        {
+            database->renameTable(
                 context,
                 elem.from_table_name,
                 *database_catalog.getDatabase(elem.to_database_name),
                 elem.to_table_name,
                 rename.exchange,
                 rename.dictionary);
+        }
     }
 
     return {};
diff --git a/src/Interpreters/InterpreterRenameQuery.h b/src/Interpreters/InterpreterRenameQuery.h
index 055c15181c1..0da25f63e8d 100644
--- a/src/Interpreters/InterpreterRenameQuery.h
+++ b/src/Interpreters/InterpreterRenameQuery.h
@@ -57,7 +57,7 @@ public:
     void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, const Context &) const override;
 
 private:
-    BlockIO executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions);
+    BlockIO executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards);
     static BlockIO executeToDatabase(const ASTRenameQuery & rename, const RenameDescriptions & descriptions);
 
     AccessRightsElements getRequiredAccess() const;
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 2ee1b3956e4..da6ad7ab102 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -69,7 +69,6 @@
 #include <Processors/Transforms/FilterTransform.h>
 #include <Processors/Transforms/JoiningTransform.h>
 
-#include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
 #include <Storages/IStorage.h>
 #include <Storages/StorageView.h>
@@ -295,7 +294,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     }
 
     // Only propagate WITH elements to subqueries if we're not a subquery
-    if (options.subquery_depth == 0)
+    if (!options.is_subquery)
     {
         if (context->getSettingsRef().enable_global_with_statement)
             ApplyWithAliasVisitor().visit(query_ptr);
@@ -390,13 +389,18 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         if (try_move_to_prewhere && storage && !row_policy_filter && query.where() && !query.prewhere() && !query.final())
         {
             /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
-            if (const auto * merge_tree = dynamic_cast<const MergeTreeData *>(storage.get()))
+            if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
             {
+                /// Extract column compressed sizes.
+                std::unordered_map<std::string, UInt64> column_compressed_sizes;
+                for (const auto & [name, sizes] : column_sizes)
+                    column_compressed_sizes[name] = sizes.data_compressed;
+
                 SelectQueryInfo current_info;
                 current_info.query = query_ptr;
                 current_info.syntax_analyzer_result = syntax_analyzer_result;
 
-                MergeTreeWhereOptimizer{current_info, *context, *merge_tree, metadata_snapshot, syntax_analyzer_result->requiredSourceColumns(), log};
+                MergeTreeWhereOptimizer{current_info, *context, std::move(column_compressed_sizes), metadata_snapshot, syntax_analyzer_result->requiredSourceColumns(), log};
             }
         }
 
@@ -557,10 +561,20 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
     if (storage && !options.only_analyze)
         from_stage = storage->getQueryProcessingStage(*context, options.to_stage, query_info);
 
-    /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
+    /// Do I need to perform the first part of the pipeline?
+    /// Running on remote servers during distributed processing or if query is not distributed.
+    ///
+    /// Also note that with distributed_group_by_no_merge=1 or when there is
+    /// only one remote server, it is equal to local query in terms of query
+    /// stages (or when due to optimize_distributed_group_by_sharding_key the query was processed up to Complete stage).
     bool first_stage = from_stage < QueryProcessingStage::WithMergeableState
         && options.to_stage >= QueryProcessingStage::WithMergeableState;
-    /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
+    /// Do I need to execute the second part of the pipeline?
+    /// Running on the initiating server during distributed processing or if query is not distributed.
+    ///
+    /// Also note that with distributed_group_by_no_merge=2 (i.e. when optimize_distributed_group_by_sharding_key takes place)
+    /// the query on the remote server will be processed up to WithMergeableStateAfterAggregation,
+    /// So it will do partial second stage (second_stage=true), and initiator will do the final part.
     bool second_stage = from_stage <= QueryProcessingStage::WithMergeableState
         && options.to_stage > QueryProcessingStage::WithMergeableState;
 
@@ -780,9 +794,22 @@ static bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query)
     {
         if (const auto * ast_union = query_table->as<ASTSelectWithUnionQuery>())
         {
+            /// NOTE: Child of subquery can be ASTSelectWithUnionQuery or ASTSelectQuery,
+            /// and after normalization, the height of the AST tree is at most 2
             for (const auto & elem : ast_union->list_of_selects->children)
-                if (hasWithTotalsInAnySubqueryInFromClause(elem->as<ASTSelectQuery &>()))
-                    return true;
+            {
+                if (const auto * child_union = elem->as<ASTSelectWithUnionQuery>())
+                {
+                    for (const auto & child_elem : child_union->list_of_selects->children)
+                        if (hasWithTotalsInAnySubqueryInFromClause(child_elem->as<ASTSelectQuery &>()))
+                            return true;
+                }
+                else
+                {
+                    if (hasWithTotalsInAnySubqueryInFromClause(elem->as<ASTSelectQuery &>()))
+                        return true;
+                }
+            }
         }
     }
 
@@ -1076,9 +1103,15 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
                 /** If there is an ORDER BY for distributed query processing,
                   *  but there is no aggregation, then on the remote servers ORDER BY was made
                   *  - therefore, we merge the sorted streams from remote servers.
+                  *
+                  * Also in case of remote servers was process the query up to WithMergeableStateAfterAggregation
+                  * (distributed_group_by_no_merge=2 or optimize_distributed_group_by_sharding_key=1 takes place),
+                  * then merge the sorted streams is enough, since remote servers already did full ORDER BY.
                   */
 
-                if (!expressions.first_stage && !expressions.need_aggregate && !(query.group_by_with_totals && !aggregate_final))
+                if (from_aggregation_stage)
+                    executeMergeSorted(query_plan, "for ORDER BY");
+                else if (!expressions.first_stage && !expressions.need_aggregate && !(query.group_by_with_totals && !aggregate_final))
                     executeMergeSorted(query_plan, "for ORDER BY");
                 else    /// Otherwise, just sort.
                     executeOrder(query_plan, query_info.input_order_info);
@@ -1252,8 +1285,11 @@ void InterpreterSelectQuery::executeFetchColumns(
         const auto & desc = query_analyzer->aggregates()[0];
         const auto & func = desc.function;
         std::optional<UInt64> num_rows{};
+
         if (!query.prewhere() && !query.where())
+        {
             num_rows = storage->totalRows(settings);
+        }
         else // It's possible to optimize count() given only partition predicates
         {
             SelectQueryInfo temp_query_info;
@@ -1263,6 +1299,7 @@ void InterpreterSelectQuery::executeFetchColumns(
 
             num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, *context);
         }
+
         if (num_rows)
         {
             AggregateFunctionCount & agg_count = static_cast<AggregateFunctionCount &>(*func);
@@ -1757,7 +1794,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
     auto merging_aggregated = std::make_unique<MergingAggregatedStep>(
             query_plan.getCurrentDataStream(),
             std::move(transform_params),
-            settings.distributed_aggregation_memory_efficient,
+            settings.distributed_aggregation_memory_efficient && storage && storage->isRemote(),
             settings.max_threads,
             settings.aggregation_memory_efficient_merge_threads);
 
@@ -1831,46 +1868,130 @@ void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const Act
     query_plan.addStep(std::move(expression_step));
 }
 
+static bool windowDescriptionComparator(const WindowDescription * _left,
+    const WindowDescription * _right)
+{
+    const auto & left = _left->full_sort_description;
+    const auto & right = _right->full_sort_description;
+
+    for (size_t i = 0; i < std::min(left.size(), right.size()); ++i)
+    {
+        if (left[i].column_name < right[i].column_name)
+        {
+            return true;
+        }
+        else if (left[i].column_name > right[i].column_name)
+        {
+            return false;
+        }
+        else if (left[i].column_number < right[i].column_number)
+        {
+            return true;
+        }
+        else if (left[i].column_number > right[i].column_number)
+        {
+            return false;
+        }
+        else if (left[i].direction < right[i].direction)
+        {
+            return true;
+        }
+        else if (left[i].direction > right[i].direction)
+        {
+            return false;
+        }
+        else if (left[i].nulls_direction < right[i].nulls_direction)
+        {
+            return true;
+        }
+        else if (left[i].nulls_direction > right[i].nulls_direction)
+        {
+            return false;
+        }
+
+        assert(left[i] == right[i]);
+    }
+
+    // Note that we check the length last, because we want to put together the
+    // sort orders that have common prefix but different length.
+    return left.size() > right.size();
+}
+
+static bool sortIsPrefix(const WindowDescription & _prefix,
+    const WindowDescription & _full)
+{
+    const auto & prefix = _prefix.full_sort_description;
+    const auto & full = _full.full_sort_description;
+
+    if (prefix.size() > full.size())
+    {
+        return false;
+    }
+
+    for (size_t i = 0; i < prefix.size(); ++i)
+    {
+        if (full[i] != prefix[i])
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
 
 void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
 {
+    // Try to sort windows in such an order that the window with the longest
+    // sort description goes first, and all window that use its prefixes follow.
+    std::vector<const WindowDescription *> windows_sorted;
     for (const auto & [_, w] : query_analyzer->windowDescriptions())
     {
-        const Settings & settings = context->getSettingsRef();
+        windows_sorted.push_back(&w);
+    }
 
-        auto partial_sorting = std::make_unique<PartialSortingStep>(
-            query_plan.getCurrentDataStream(),
-            w.full_sort_description,
-            0 /* LIMIT */,
-            SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort,
-                settings.sort_overflow_mode));
-        partial_sorting->setStepDescription("Sort each block for window '"
-            + w.window_name + "'");
-        query_plan.addStep(std::move(partial_sorting));
+    std::sort(windows_sorted.begin(), windows_sorted.end(),
+        windowDescriptionComparator);
 
-        auto merge_sorting_step = std::make_unique<MergeSortingStep>(
-            query_plan.getCurrentDataStream(),
-            w.full_sort_description,
-            settings.max_block_size,
-            0 /* LIMIT */,
-            settings.max_bytes_before_remerge_sort,
-            settings.remerge_sort_lowered_memory_bytes_ratio,
-            settings.max_bytes_before_external_sort,
-            context->getTemporaryVolume(),
-            settings.min_free_disk_space_for_temporary_data);
-        merge_sorting_step->setStepDescription("Merge sorted blocks for window '"
-            + w.window_name + "'");
-        query_plan.addStep(std::move(merge_sorting_step));
+    const Settings & settings = context->getSettingsRef();
+    for (size_t i = 0; i < windows_sorted.size(); ++i)
+    {
+        const auto & w = *windows_sorted[i];
+        if (i == 0 || !sortIsPrefix(w, *windows_sorted[i - 1]))
+        {
+            auto partial_sorting = std::make_unique<PartialSortingStep>(
+                query_plan.getCurrentDataStream(),
+                w.full_sort_description,
+                0 /* LIMIT */,
+                SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort,
+                    settings.sort_overflow_mode));
+            partial_sorting->setStepDescription("Sort each block for window '"
+                + w.window_name + "'");
+            query_plan.addStep(std::move(partial_sorting));
 
-        // First MergeSorted, now MergingSorted.
-        auto merging_sorted = std::make_unique<MergingSortedStep>(
-            query_plan.getCurrentDataStream(),
-            w.full_sort_description,
-            settings.max_block_size,
-            0 /* LIMIT */);
-        merging_sorted->setStepDescription("Merge sorted streams for window '"
-            + w.window_name + "'");
-        query_plan.addStep(std::move(merging_sorted));
+            auto merge_sorting_step = std::make_unique<MergeSortingStep>(
+                query_plan.getCurrentDataStream(),
+                w.full_sort_description,
+                settings.max_block_size,
+                0 /* LIMIT */,
+                settings.max_bytes_before_remerge_sort,
+                settings.remerge_sort_lowered_memory_bytes_ratio,
+                settings.max_bytes_before_external_sort,
+                context->getTemporaryVolume(),
+                settings.min_free_disk_space_for_temporary_data);
+            merge_sorting_step->setStepDescription(
+                "Merge sorted blocks for window '" + w.window_name + "'");
+            query_plan.addStep(std::move(merge_sorting_step));
+
+            // First MergeSorted, now MergingSorted.
+            auto merging_sorted = std::make_unique<MergingSortedStep>(
+                query_plan.getCurrentDataStream(),
+                w.full_sort_description,
+                settings.max_block_size,
+                0 /* LIMIT */);
+            merging_sorted->setStepDescription(
+                "Merge sorted streams for window '" + w.window_name + "'");
+            query_plan.addStep(std::move(merging_sorted));
+        }
 
         auto window_step = std::make_unique<WindowStep>(
             query_plan.getCurrentDataStream(),
diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
index e6610df43ff..59fcff61936 100644
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@@ -329,7 +329,7 @@ InterpreterSelectWithUnionQuery::buildCurrentChildInterpreter(const ASTPtr & ast
 
 InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default;
 
-Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, const Context & context_)
+Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, const Context & context_, bool is_subquery)
 {
     auto & cache = context_.getSampleBlockCache();
     /// Using query string because query_ptr changes for every internal SELECT
@@ -339,7 +339,11 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_,
         return cache[key];
     }
 
-    return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock();
+    if (is_subquery)
+        return cache[key]
+            = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock();
+    else
+        return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock();
 }
 
 
diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.h b/src/Interpreters/InterpreterSelectWithUnionQuery.h
index cd089a51970..f4062b2005e 100644
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.h
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.h
@@ -35,7 +35,8 @@ public:
 
     static Block getSampleBlock(
         const ASTPtr & query_ptr_,
-        const Context & context_);
+        const Context & context_,
+        bool is_subquery = false);
 
     virtual void ignoreWithTotals() override;
 
diff --git a/src/Interpreters/InterpreterShowProcesslistQuery.h b/src/Interpreters/InterpreterShowProcesslistQuery.h
index 6b87fd7edc3..fa0bbf075bd 100644
--- a/src/Interpreters/InterpreterShowProcesslistQuery.h
+++ b/src/Interpreters/InterpreterShowProcesslistQuery.h
@@ -20,6 +20,11 @@ public:
 
     BlockIO execute() override;
 
+    /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then
+    /// the SELECT query will checks the quota and limits.
+    bool ignoreQuota() const override { return true; }
+    bool ignoreLimits() const override { return true; }
+
 private:
     ASTPtr query_ptr;
     Context & context;
diff --git a/src/Interpreters/InterpreterShowTablesQuery.h b/src/Interpreters/InterpreterShowTablesQuery.h
index fc5cb2b7505..4f720e68622 100644
--- a/src/Interpreters/InterpreterShowTablesQuery.h
+++ b/src/Interpreters/InterpreterShowTablesQuery.h
@@ -20,6 +20,11 @@ public:
 
     BlockIO execute() override;
 
+    /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then
+    /// the SELECT query will checks the quota and limits.
+    bool ignoreQuota() const override { return true; }
+    bool ignoreLimits() const override { return true; }
+
 private:
     ASTPtr query_ptr;
     Context & context;
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 86706701141..ece3209621b 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -16,7 +16,7 @@
 #include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/InterpreterRenameQuery.h>
 #include <Interpreters/QueryLog.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/PartLog.h>
 #include <Interpreters/QueryThreadLog.h>
 #include <Interpreters/TraceLog.h>
@@ -606,7 +606,7 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery &)
     context.checkAccess(AccessType::SYSTEM_FLUSH_DISTRIBUTED, table_id);
 
     if (auto * storage_distributed = dynamic_cast<StorageDistributed *>(DatabaseCatalog::instance().getTable(table_id, context).get()))
-        storage_distributed->flushClusterNodesAllData();
+        storage_distributed->flushClusterNodesAllData(context);
     else
         throw Exception("Table " + table_id.getNameForLogs() + " is not distributed", ErrorCodes::BAD_ARGUMENTS);
 }
diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h
index 6fd96c15a2e..6fa0a432191 100644
--- a/src/Interpreters/InterpreterSystemQuery.h
+++ b/src/Interpreters/InterpreterSystemQuery.h
@@ -37,9 +37,6 @@ public:
 
     BlockIO execute() override;
 
-    bool ignoreQuota() const override { return true; }
-    bool ignoreLimits() const override { return true; }
-
 private:
     ASTPtr query_ptr;
     Context & context;
diff --git a/src/Interpreters/InterserverIOHandler.h b/src/Interpreters/InterserverIOHandler.h
index 6d62c9651ca..db95a00d0f7 100644
--- a/src/Interpreters/InterserverIOHandler.h
+++ b/src/Interpreters/InterserverIOHandler.h
@@ -8,13 +8,13 @@
 #include <IO/WriteHelpers.h>
 #include <Common/ActionBlocker.h>
 #include <common/types.h>
-#include <map>
-#include <atomic>
-#include <utility>
-#include <shared_mutex>
+
 #include <Poco/Net/HTMLForm.h>
 
-namespace Poco { namespace Net { class HTTPServerResponse; } }
+#include <atomic>
+#include <map>
+#include <shared_mutex>
+#include <utility>
 
 namespace DB
 {
@@ -25,13 +25,16 @@ namespace ErrorCodes
     extern const int NO_SUCH_INTERSERVER_IO_ENDPOINT;
 }
 
+class HTMLForm;
+class HTTPServerResponse;
+
 /** Query processor from other servers.
   */
 class InterserverIOEndpoint
 {
 public:
     virtual std::string getId(const std::string & path) const = 0;
-    virtual void processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body, WriteBuffer & out, Poco::Net::HTTPServerResponse & response) = 0;
+    virtual void processQuery(const HTMLForm & params, ReadBuffer & body, WriteBuffer & out, HTTPServerResponse & response) = 0;
     virtual ~InterserverIOEndpoint() = default;
 
     /// You need to stop the data transfer if blocker is activated.
diff --git a/src/Interpreters/MarkTableIdentifiersVisitor.cpp b/src/Interpreters/MarkTableIdentifiersVisitor.cpp
index 78563059ed1..6557e1b5292 100644
--- a/src/Interpreters/MarkTableIdentifiersVisitor.cpp
+++ b/src/Interpreters/MarkTableIdentifiersVisitor.cpp
@@ -47,7 +47,7 @@ void MarkTableIdentifiersMatcher::visit(const ASTFunction & func, ASTPtr &, Data
     // First argument of dictGet can be a dictionary name, perhaps with a database.
     if (functionIsJoinGet(func.name) || functionIsDictGet(func.name))
     {
-        if (func.arguments->children.empty())
+        if (!func.arguments || func.arguments->children.empty())
         {
             return;
         }
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 528b5ec6d8e..c393b214ee8 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -442,10 +442,10 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
                 auto type_literal = std::make_shared<ASTLiteral>(columns_desc.getPhysical(column).type->getName());
 
                 const auto & update_expr = kv.second;
-                auto updated_column = makeASTFunction("cast",
+                auto updated_column = makeASTFunction("CAST",
                     makeASTFunction("if",
                         getPartitionAndPredicateExpressionForMutationCommand(command),
-                        makeASTFunction("cast",
+                        makeASTFunction("CAST",
                             update_expr->clone(),
                             type_literal),
                         std::make_shared<ASTIdentifier>(column)),
diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
index dee4c69118b..cdcf6f7dddd 100644
--- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
+++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
@@ -29,7 +29,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v
     /// cast of numeric constant in condition to UInt8
     if (const auto * function = condition->as<ASTFunction>())
     {
-        if (function->name == "cast")
+        if (function->name == "CAST")
         {
             if (const auto * expr_list = function->arguments->as<ASTExpressionList>())
             {
diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp
index bf63b6f49c7..860666a0035 100644
--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@@ -1,9 +1,8 @@
 #include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnString.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeEnum.h>
@@ -31,33 +30,38 @@ Block PartLogElement::createBlock()
         }
     );
 
-    return
-    {
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "query_id"},
-        {ColumnInt8::create(),   std::move(event_type_datatype),       "event_type"},
-        {ColumnUInt16::create(), std::make_shared<DataTypeDate>(),     "event_date"},
-        {ColumnUInt32::create(), std::make_shared<DataTypeDateTime>(), "event_time"},
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "duration_ms"},
+    ColumnsWithTypeAndName columns_with_type_and_name;
 
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "database"},
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "table"},
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "part_name"},
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "partition_id"},
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "path_on_disk"},
+    return {
 
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "rows"},
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "size_in_bytes"}, // On disk
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "query_id"),
+        columns_with_type_and_name.emplace_back(std::move(event_type_datatype), "event_type"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDate>(), "event_date"),
+
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds"),
+
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "duration_ms"),
+
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "database"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "table"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "part_name"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "partition_id"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "path_on_disk"),
+
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "rows"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "size_in_bytes"), // On disk
 
         /// Merge-specific info
-        {ColumnArray::create(ColumnString::create()), std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "merged_from"},
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "bytes_uncompressed"}, // Result bytes
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "read_rows"},
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "read_bytes"},
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "peak_memory_usage"},
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "merged_from"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "bytes_uncompressed"), // Result bytes
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "read_rows"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "read_bytes"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "peak_memory_usage"),
 
         /// Is there an error during the execution or commit
-        {ColumnUInt16::create(), std::make_shared<DataTypeUInt16>(),   "error"},
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "exception"},
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt16>(), "error"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "exception"),
     };
 }
 
@@ -69,6 +73,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const
     columns[i++]->insert(event_type);
     columns[i++]->insert(DateLUT::instance().toDayNum(event_time));
     columns[i++]->insert(event_time);
+    columns[i++]->insert(event_time_microseconds);
     columns[i++]->insert(duration_ms);
 
     columns[i++]->insert(database_name);
@@ -97,13 +102,25 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const
 }
 
 
-bool PartLog::addNewPart(Context & current_context, const MutableDataPartPtr & part, UInt64 elapsed_ns, const ExecutionStatus & execution_status)
+bool PartLog::addNewPart(
+    Context & current_context, const MutableDataPartPtr & part, UInt64 elapsed_ns, const ExecutionStatus & execution_status)
 {
     return addNewParts(current_context, {part}, elapsed_ns, execution_status);
 }
 
-bool PartLog::addNewParts(Context & current_context, const PartLog::MutableDataPartsVector & parts, UInt64 elapsed_ns,
-                          const ExecutionStatus & execution_status)
+inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
+}
+
+
+inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::seconds>(timepoint.time_since_epoch()).count();
+}
+
+bool PartLog::addNewParts(
+    Context & current_context, const PartLog::MutableDataPartsVector & parts, UInt64 elapsed_ns, const ExecutionStatus & execution_status)
 {
     if (parts.empty())
         return true;
@@ -127,7 +144,12 @@ bool PartLog::addNewParts(Context & current_context, const PartLog::MutableDataP
                 elem.query_id.insert(0, query_id.data, query_id.size);
 
             elem.event_type = PartLogElement::NEW_PART;
-            elem.event_time = time(nullptr);
+
+            // construct event_time and event_time_microseconds using the same time point
+            // so that the two times will always be equal up to a precision of a second.
+            const auto time_now = std::chrono::system_clock::now();
+            elem.event_time = time_in_seconds(time_now);
+            elem.event_time_microseconds = time_in_microseconds(time_now);
             elem.duration_ms = elapsed_ns / 1000000;
 
             elem.database_name = table_id.database_name;
diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h
index a06d28f1f12..c946d6ce85f 100644
--- a/src/Interpreters/PartLog.h
+++ b/src/Interpreters/PartLog.h
@@ -23,6 +23,7 @@ struct PartLogElement
     Type event_type = NEW_PART;
 
     time_t event_time = 0;
+    Decimal64 event_time_microseconds = 0;
     UInt64 duration_ms = 0;
 
     String database_name;
diff --git a/src/Interpreters/PredicateRewriteVisitor.cpp b/src/Interpreters/PredicateRewriteVisitor.cpp
index 5773629d0d1..9e6d5543f2f 100644
--- a/src/Interpreters/PredicateRewriteVisitor.cpp
+++ b/src/Interpreters/PredicateRewriteVisitor.cpp
@@ -26,11 +26,18 @@ void PredicateRewriteVisitorData::visit(ASTSelectWithUnionQuery & union_select_q
 {
     auto & internal_select_list = union_select_query.list_of_selects->children;
 
-    if (!internal_select_list.empty())
-        visitFirstInternalSelect(*internal_select_list[0]->as<ASTSelectQuery>(), internal_select_list[0]);
-
-    for (size_t index = 1; index < internal_select_list.size(); ++index)
-        visitOtherInternalSelect(*internal_select_list[index]->as<ASTSelectQuery>(), internal_select_list[index]);
+    for (size_t index = 0; index < internal_select_list.size(); ++index)
+    {
+        if (auto * child_union = internal_select_list[index]->as<ASTSelectWithUnionQuery>())
+            visit(*child_union, internal_select_list[index]);
+        else
+        {
+            if (index == 0)
+                visitFirstInternalSelect(*internal_select_list[0]->as<ASTSelectQuery>(), internal_select_list[0]);
+            else
+                visitOtherInternalSelect(*internal_select_list[index]->as<ASTSelectQuery>(), internal_select_list[index]);
+        }
+    }
 }
 
 void PredicateRewriteVisitorData::visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &)
diff --git a/src/Interpreters/RewriteCountVariantsVisitor.cpp b/src/Interpreters/RewriteCountVariantsVisitor.cpp
new file mode 100644
index 00000000000..63bf5fce2e8
--- /dev/null
+++ b/src/Interpreters/RewriteCountVariantsVisitor.cpp
@@ -0,0 +1,62 @@
+#include <Interpreters/RewriteCountVariantsVisitor.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTSubquery.h>
+#include <Parsers/ASTTablesInSelectQuery.h>
+#include <Poco/String.h>
+#include <Common/typeid_cast.h>
+
+namespace DB
+{
+void RewriteCountVariantsVisitor::visit(ASTPtr & node)
+{
+    if (node->as<ASTSubquery>() || node->as<ASTTableExpression>() || node->as<ASTArrayJoin>())
+        return;
+
+    for (auto & child : node->children)
+        visit(child);
+
+    if (auto * func = node->as<ASTFunction>())
+        visit(*func);
+}
+
+void RewriteCountVariantsVisitor::visit(ASTFunction & func)
+{
+    if (!func.arguments || func.arguments->children.empty() || func.arguments->children.size() > 1 || !func.arguments->children[0])
+        return;
+
+    auto name = Poco::toLower(func.name);
+
+    if (name != "sum" && name != "count")
+        return;
+
+    auto & func_arguments = func.arguments->children;
+
+    const auto * first_arg_literal = func_arguments[0]->as<ASTLiteral>();
+    if (!first_arg_literal)
+        return;
+
+    bool transform = false;
+    if (name == "count")
+    {
+        if (first_arg_literal->value.getType() != Field::Types::Null)
+            transform = true;
+    }
+    else if (name == "sum")
+    {
+        if (first_arg_literal->value.getType() == Field::Types::UInt64)
+        {
+            auto constant = first_arg_literal->value.get<UInt64>();
+            if (constant == 1)
+                transform = true;
+        }
+    }
+    if (!transform)
+        return;
+
+    func.name = "count";
+    func.arguments->children.clear();
+}
+
+}
diff --git a/src/Interpreters/RewriteCountVariantsVisitor.h b/src/Interpreters/RewriteCountVariantsVisitor.h
new file mode 100644
index 00000000000..6f731c8c463
--- /dev/null
+++ b/src/Interpreters/RewriteCountVariantsVisitor.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <Parsers/IAST.h>
+
+namespace DB
+{
+
+class ASTFunction;
+
+class RewriteCountVariantsVisitor
+{
+public:
+    static void visit(ASTPtr &);
+    static void visit(ASTFunction &);
+};
+
+}
diff --git a/src/Interpreters/RewriteSumIfFunctionVisitor.cpp b/src/Interpreters/RewriteSumIfFunctionVisitor.cpp
index 2fb0765db13..2593c220c63 100644
--- a/src/Interpreters/RewriteSumIfFunctionVisitor.cpp
+++ b/src/Interpreters/RewriteSumIfFunctionVisitor.cpp
@@ -13,18 +13,6 @@ void RewriteSumIfFunctionMatcher::visit(ASTPtr & ast, Data & data)
         visit(*func, ast, data);
 }
 
-static ASTPtr createNewFunctionWithOneArgument(const String & func_name, const ASTPtr & argument)
-{
-    auto new_func = std::make_shared<ASTFunction>();
-    new_func->name = func_name;
-
-    auto new_arguments = std::make_shared<ASTExpressionList>();
-    new_arguments->children.push_back(argument);
-    new_func->arguments = new_arguments;
-    new_func->children.push_back(new_arguments);
-    return new_func;
-}
-
 void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &)
 {
     if (!func.arguments || func.arguments->children.empty())
@@ -46,7 +34,7 @@ void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast,
 
         if (func_arguments.size() == 2 && literal->value.get<UInt64>() == 1)
         {
-            auto new_func = createNewFunctionWithOneArgument("countIf", func_arguments[1]);
+            auto new_func = makeASTFunction("countIf", func_arguments[1]);
             new_func->setAlias(func.alias);
             ast = std::move(new_func);
             return;
@@ -74,7 +62,7 @@ void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast,
             /// sum(if(cond, 1, 0)) -> countIf(cond)
             if (first_value == 1 && second_value == 0)
             {
-                auto new_func = createNewFunctionWithOneArgument("countIf", if_arguments[0]);
+                auto new_func = makeASTFunction("countIf", if_arguments[0]);
                 new_func->setAlias(func.alias);
                 ast = std::move(new_func);
                 return;
@@ -82,8 +70,8 @@ void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast,
             /// sum(if(cond, 0, 1)) -> countIf(not(cond))
             if (first_value == 0 && second_value == 1)
             {
-                auto not_func = createNewFunctionWithOneArgument("not", if_arguments[0]);
-                auto new_func = createNewFunctionWithOneArgument("countIf", not_func);
+                auto not_func = makeASTFunction("not", if_arguments[0]);
+                auto new_func = makeASTFunction("countIf", not_func);
                 new_func->setAlias(func.alias);
                 ast = std::move(new_func);
                 return;
diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h
index 611c2b1601e..b21d27ef5c8 100644
--- a/src/Interpreters/SelectQueryOptions.h
+++ b/src/Interpreters/SelectQueryOptions.h
@@ -33,9 +33,10 @@ struct SelectQueryOptions
     bool ignore_quota = false;
     bool ignore_limits = false;
     bool is_internal = false;
+    bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select
 
-    SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0)
-        : to_stage(stage), subquery_depth(depth)
+    SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0, bool is_subquery_ = false)
+        : to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_)
     {
     }
 
@@ -46,6 +47,7 @@ struct SelectQueryOptions
         SelectQueryOptions out = *this;
         out.to_stage = QueryProcessingStage::Complete;
         ++out.subquery_depth;
+        out.is_subquery = true;
         return out;
     }
 
diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp
index faec94de9ac..b150eabda65 100644
--- a/src/Interpreters/Set.cpp
+++ b/src/Interpreters/Set.cpp
@@ -140,7 +140,16 @@ void Set::setHeader(const Block & header)
     ConstNullMapPtr null_map{};
     ColumnPtr null_map_holder;
     if (!transform_null_in)
+    {
+        /// We convert nullable columns to non nullable we also need to update nullable types
+        for (size_t i = 0; i < set_elements_types.size(); ++i)
+        {
+            data_types[i] = removeNullable(data_types[i]);
+            set_elements_types[i] = removeNullable(set_elements_types[i]);
+        }
+
         extractNestedColumnsAndNullMap(key_columns, null_map);
+    }
 
     if (fill_set_elements)
     {
@@ -182,7 +191,7 @@ bool Set::insertFromBlock(const Block & block)
     ConstNullMapPtr null_map{};
     ColumnPtr null_map_holder;
     if (!transform_null_in)
-         null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
+        null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
 
     /// Filter to extract distinct values from the block.
     ColumnUInt8::MutablePtr filter;
diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp
index 35df9cd4516..b12616dba1e 100644
--- a/src/Interpreters/SortedBlocksWriter.cpp
+++ b/src/Interpreters/SortedBlocksWriter.cpp
@@ -4,7 +4,7 @@
 #include <DataStreams/OneBlockInputStream.h>
 #include <DataStreams/TemporaryFileStream.h>
 #include <DataStreams/materializeBlock.h>
-#include <Disks/StoragePolicy.h>
+#include <Disks/IVolume.h>
 
 namespace DB
 {
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 6c56565a152..aa3dc113e44 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -8,6 +8,7 @@
 #include <condition_variable>
 #include <boost/noncopyable.hpp>
 #include <common/logger_useful.h>
+#include <ext/scope_guard.h>
 #include <common/types.h>
 #include <Core/Defines.h>
 #include <Storages/IStorage.h>
@@ -229,14 +230,23 @@ void SystemLog<LogElement>::startup()
 }
 
 
+static thread_local bool recursive_add_call = false;
+
 template <typename LogElement>
 void SystemLog<LogElement>::add(const LogElement & element)
 {
+    /// It is possible that the method will be called recursively.
+    /// Better to drop these events to avoid complications.
+    if (recursive_add_call)
+        return;
+    recursive_add_call = true;
+    SCOPE_EXIT({ recursive_add_call = false; });
+
     /// Memory can be allocated while resizing on queue.push_back.
     /// The size of allocation can be in order of a few megabytes.
     /// But this should not be accounted for query memory usage.
     /// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky.
-    MemoryTracker::BlockerInThread temporarily_disable_memory_tracker;
+    MemoryTracker::BlockerInThread temporarily_disable_memory_tracker(VariableContext::Global);
 
     /// Should not log messages under mutex.
     bool queue_is_half_full = false;
@@ -505,7 +515,9 @@ void SystemLog<LogElement>::prepareTable()
 
             LOG_DEBUG(log, "Existing table {} for system log has obsolete or different structure. Renaming it to {}", description, backQuoteIfNeed(to.table));
 
-            InterpreterRenameQuery(rename, context).execute();
+            Context query_context = context;
+            query_context.makeQueryContext();
+            InterpreterRenameQuery(rename, query_context).execute();
 
             /// The required table will be created.
             table = nullptr;
@@ -521,7 +533,10 @@ void SystemLog<LogElement>::prepareTable()
 
         auto create = getCreateTableQuery();
 
-        InterpreterCreateQuery interpreter(create, context);
+
+        Context query_context = context;
+        query_context.makeQueryContext();
+        InterpreterCreateQuery interpreter(create, query_context);
         interpreter.setInternal(true);
         interpreter.execute();
 
diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp
index 2d3bffa8234..c1777711d9e 100644
--- a/src/Interpreters/TableJoin.cpp
+++ b/src/Interpreters/TableJoin.cpp
@@ -230,8 +230,16 @@ void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column)
 void TableJoin::addJoinedColumnsAndCorrectNullability(ColumnsWithTypeAndName & columns) const
 {
     for (auto & col : columns)
+    {
         if (leftBecomeNullable(col.type))
-            col.type = makeNullable(col.type);
+        {
+            /// No need to nullify constants
+            if (!(col.column && isColumnConst(*col.column)))
+            {
+                col.type = makeNullable(col.type);
+            }
+        }
+    }
 
     for (const auto & col : columns_added_by_join)
     {
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 61322cabfb3..8a979721290 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -500,6 +500,8 @@ CurrentThread::QueryScope::QueryScope(Context & query_context)
 {
     CurrentThread::initializeQuery();
     CurrentThread::attachQueryContext(query_context);
+    if (!query_context.hasQueryContext())
+        query_context.makeQueryContext();
 }
 
 void CurrentThread::QueryScope::logPeakMemoryUsage()
diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp
index 2347ab0d4a5..5c6f76c8c29 100644
--- a/src/Interpreters/TreeOptimizer.cpp
+++ b/src/Interpreters/TreeOptimizer.cpp
@@ -10,6 +10,7 @@
 #include <Interpreters/RewriteAnyFunctionVisitor.h>
 #include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
 #include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
+#include <Interpreters/RewriteCountVariantsVisitor.h>
 #include <Interpreters/MonotonicityCheckVisitor.h>
 #include <Interpreters/ConvertStringsToEnumVisitor.h>
 #include <Interpreters/PredicateExpressionsOptimizer.h>
@@ -555,6 +556,11 @@ void optimizeSumIfFunctions(ASTPtr & query)
     RewriteSumIfFunctionVisitor(data).visit(query);
 }
 
+void optimizeCountConstantAndSumOne(ASTPtr & query)
+{
+    RewriteCountVariantsVisitor::visit(query);
+}
+
 
 void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, const Context & context)
 {
@@ -616,6 +622,9 @@ void TreeOptimizer::apply(ASTPtr & query, Aliases & aliases, const NameSet & sou
     if (settings.optimize_move_functions_out_of_any)
         optimizeAnyFunctions(query);
 
+    if (settings.optimize_normalize_count_variants)
+        optimizeCountConstantAndSumOne(query);
+
     if (settings.optimize_rewrite_sum_if_to_count_if)
         optimizeSumIfFunctions(query);
 
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index ce4103e97ec..37f49874e0a 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -8,6 +8,7 @@
 #include <Interpreters/ArrayJoinedColumnsVisitor.h>
 #include <Interpreters/TranslateQualifiedNamesVisitor.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/FunctionNameNormalizer.h>
 #include <Interpreters/MarkTableIdentifiersVisitor.h>
 #include <Interpreters/QueryNormalizer.h>
 #include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
@@ -40,13 +41,14 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int EMPTY_NESTED_TABLE;
-    extern const int LOGICAL_ERROR;
-    extern const int INVALID_JOIN_ON_EXPRESSION;
     extern const int EMPTY_LIST_OF_COLUMNS_QUERIED;
-    extern const int NOT_IMPLEMENTED;
-    extern const int UNKNOWN_IDENTIFIER;
+    extern const int EMPTY_NESTED_TABLE;
     extern const int EXPECTED_ALL_OR_ANY;
+    extern const int INVALID_JOIN_ON_EXPRESSION;
+    extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int UNKNOWN_IDENTIFIER;
 }
 
 namespace
@@ -327,6 +329,9 @@ void getArrayJoinedColumns(ASTPtr & query, TreeRewriterResult & result, const AS
         /// to get the correct number of rows.
         if (result.array_join_result_to_source.empty())
         {
+            if (select_query->arrayJoinExpressionList()->children.empty())
+                throw DB::Exception("ARRAY JOIN requires an argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
             ASTPtr expr = select_query->arrayJoinExpressionList()->children.at(0);
             String source_name = expr->getColumnName();
             String result_name = expr->getAliasOrColumnName();
@@ -400,13 +405,13 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul
 
 /// Find the columns that are obtained by JOIN.
 void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & select_query,
-                          const TablesWithColumns & tables, const Aliases & aliases)
+                          const TablesWithColumns & tables, const Aliases & aliases, ASTPtr & new_where_conditions)
 {
     const ASTTablesInSelectQueryElement * node = select_query.join();
     if (!node)
         return;
 
-    const auto & table_join = node->table_join->as<ASTTableJoin &>();
+    auto & table_join = node->table_join->as<ASTTableJoin &>();
 
     if (table_join.using_expression_list)
     {
@@ -418,16 +423,33 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
     {
         bool is_asof = (table_join.strictness == ASTTableJoin::Strictness::Asof);
 
-        CollectJoinOnKeysVisitor::Data data{analyzed_join, tables[0], tables[1], aliases, is_asof};
+        CollectJoinOnKeysVisitor::Data data{analyzed_join, tables[0], tables[1], aliases, is_asof, table_join.kind};
         CollectJoinOnKeysVisitor(data).visit(table_join.on_expression);
         if (!data.has_some)
             throw Exception("Cannot get JOIN keys from JOIN ON section: " + queryToString(table_join.on_expression),
                             ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
         if (is_asof)
+        {
             data.asofToJoinKeys();
+        }
+        else if (data.new_on_expression)
+        {
+            table_join.on_expression = data.new_on_expression;
+            new_where_conditions = data.new_where_conditions;
+        }
     }
 }
 
+/// Move joined key related to only one table to WHERE clause
+void moveJoinedKeyToWhere(ASTSelectQuery * select_query, ASTPtr & new_where_conditions)
+{
+    if (select_query->where())
+        select_query->setExpression(ASTSelectQuery::Expression::WHERE,
+            makeASTFunction("and", new_where_conditions, select_query->where()));
+    else
+        select_query->setExpression(ASTSelectQuery::Expression::WHERE, new_where_conditions->clone());
+}
+
 
 std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
 {
@@ -804,7 +826,11 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
 
     setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys,
                         result.analyzed_join->table_join);
-    collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
+
+    ASTPtr new_where_condition = nullptr;
+    collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases, new_where_condition);
+    if (new_where_condition)
+        moveJoinedKeyToWhere(select_query, new_where_condition);
 
     /// rewrite filters for select query, must go after getArrayJoinedColumns
     if (settings.optimize_respect_aliases && result.metadata_snapshot)
@@ -909,6 +935,10 @@ void TreeRewriter::normalize(ASTPtr & query, Aliases & aliases, const Settings &
     MarkTableIdentifiersVisitor::Data identifiers_data{aliases};
     MarkTableIdentifiersVisitor(identifiers_data).visit(query);
 
+    /// Rewrite function names to their canonical ones.
+    if (settings.normalize_function_names)
+        FunctionNameNormalizer().visit(query.get());
+
     /// Common subexpression elimination. Rewrite rules.
     QueryNormalizer::Data normalizer_data(aliases, settings);
     QueryNormalizer(normalizer_data).visit(query);
diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp
new file mode 100644
index 00000000000..e922f49c896
--- /dev/null
+++ b/src/Interpreters/WindowDescription.cpp
@@ -0,0 +1,164 @@
+#include <Interpreters/WindowDescription.h>
+
+#include <IO/Operators.h>
+#include <Parsers/ASTFunction.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+std::string WindowFunctionDescription::dump() const
+{
+    WriteBufferFromOwnString ss;
+
+    ss << "window function '" << column_name << "\n";
+    ss << "function node " << function_node->dumpTree() << "\n";
+    ss << "aggregate function '" << aggregate_function->getName() << "'\n";
+    if (!function_parameters.empty())
+    {
+        ss << "parameters " << toString(function_parameters) << "\n";
+    }
+
+    return ss.str();
+}
+
+std::string WindowDescription::dump() const
+{
+    WriteBufferFromOwnString ss;
+
+    ss << "window '" << window_name << "'\n";
+    ss << "partition_by " << dumpSortDescription(partition_by) << "\n";
+    ss << "order_by " << dumpSortDescription(order_by) << "\n";
+    ss << "full_sort_description " << dumpSortDescription(full_sort_description) << "\n";
+
+    return ss.str();
+}
+
+std::string WindowFrame::toString() const
+{
+    WriteBufferFromOwnString buf;
+    toString(buf);
+    return buf.str();
+}
+
+void WindowFrame::toString(WriteBuffer & buf) const
+{
+    buf << toString(type) << " BETWEEN ";
+    if (begin_type == BoundaryType::Current)
+    {
+        buf << "CURRENT ROW";
+    }
+    else if (begin_type == BoundaryType::Unbounded)
+    {
+        buf << "UNBOUNDED";
+        buf << " "
+            << (begin_preceding ? "PRECEDING" : "FOLLOWING");
+    }
+    else
+    {
+        buf << abs(begin_offset);
+        buf << " "
+            << (begin_preceding ? "PRECEDING" : "FOLLOWING");
+    }
+    buf << " AND ";
+    if (end_type == BoundaryType::Current)
+    {
+        buf << "CURRENT ROW";
+    }
+    else if (end_type == BoundaryType::Unbounded)
+    {
+        buf << "UNBOUNDED";
+        buf << " "
+            << (end_preceding ? "PRECEDING" : "FOLLOWING");
+    }
+    else
+    {
+        buf << abs(end_offset);
+        buf << " "
+            << (end_preceding ? "PRECEDING" : "FOLLOWING");
+    }
+}
+
+void WindowFrame::checkValid() const
+{
+    // UNBOUNDED PRECEDING end and UNBOUNDED FOLLOWING start should have been
+    // forbidden at the parsing level.
+    assert(!(begin_type == BoundaryType::Unbounded && !begin_preceding));
+    assert(!(end_type == BoundaryType::Unbounded && end_preceding));
+
+    if (begin_type == BoundaryType::Unbounded
+        || end_type == BoundaryType::Unbounded)
+    {
+        return;
+    }
+
+    if (begin_type == BoundaryType::Current
+        && end_type == BoundaryType::Offset
+        && !end_preceding)
+    {
+        return;
+    }
+
+    if (end_type == BoundaryType::Current
+        && begin_type == BoundaryType::Offset
+        && begin_preceding)
+    {
+        return;
+    }
+
+    if (end_type == BoundaryType::Current
+        && begin_type == BoundaryType::Current)
+    {
+        // BETWEEN CURRENT ROW AND CURRENT ROW makes some sense for RANGE or
+        // GROUP frames, and is technically valid for ROWS frame.
+        return;
+    }
+
+    if (end_type == BoundaryType::Offset
+        && begin_type == BoundaryType::Offset)
+    {
+        // Frame starting with following rows can't have preceding rows.
+        if (!(end_preceding && !begin_preceding))
+        {
+            // Frame start offset must be less or equal that the frame end offset.
+            const bool begin_before_end
+                = begin_offset * (begin_preceding ? -1 : 1)
+                    <= end_offset * (end_preceding ? -1 : 1);
+
+            if (!begin_before_end)
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "Frame start offset {} {} does not precede the frame end offset {} {}",
+                    begin_offset, begin_preceding ? "PRECEDING" : "FOLLOWING",
+                    end_offset, end_preceding ? "PRECEDING" : "FOLLOWING");
+            }
+            return;
+        }
+    }
+
+    throw Exception(ErrorCodes::BAD_ARGUMENTS,
+        "Window frame '{}' is invalid",
+        toString());
+}
+
+void WindowDescription::checkValid() const
+{
+    frame.checkValid();
+
+    // RANGE OFFSET requires exactly one ORDER BY column.
+    if (frame.type == WindowFrame::FrameType::Range
+        && (frame.begin_type == WindowFrame::BoundaryType::Offset
+            || frame.end_type == WindowFrame::BoundaryType::Offset)
+        && order_by.size() != 1)
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "The RANGE OFFSET window frame requires exactly one ORDER BY column, {} given",
+           order_by.size());
+    }
+}
+
+}
diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h
new file mode 100644
index 00000000000..faad4649f91
--- /dev/null
+++ b/src/Interpreters/WindowDescription.h
@@ -0,0 +1,144 @@
+#pragma once
+
+#include <Common/FieldVisitors.h>
+#include <Core/Field.h>
+#include <Parsers/IAST_fwd.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <Core/SortDescription.h>
+#include <DataTypes/IDataType.h>
+#include <Core/Names.h>
+#include <Core/Types.h>
+
+namespace DB
+{
+
+class ASTFunction;
+
+struct WindowFunctionDescription
+{
+    std::string column_name;
+    const ASTFunction * function_node = nullptr;
+    AggregateFunctionPtr aggregate_function;
+    Array function_parameters;
+    DataTypes argument_types;
+    Names argument_names;
+
+    std::string dump() const;
+};
+
+struct WindowFrame
+{
+    enum class FrameType { Rows, Groups, Range };
+    enum class BoundaryType { Unbounded, Current, Offset };
+
+    // This flag signifies that the frame properties were not set explicitly by
+    // user, but the fields of this structure still have to contain proper values
+    // for the default frame of RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW.
+    bool is_default = true;
+
+    FrameType type = FrameType::Range;
+
+    // UNBOUNDED FOLLOWING for the frame end is forbidden by the standard, but for
+    // uniformity the begin_preceding still has to be set to true for UNBOUNDED
+    // frame start.
+    // Offset might be both preceding and following, controlled by begin_preceding,
+    // but the offset value must be positive.
+    BoundaryType begin_type = BoundaryType::Unbounded;
+    // This should have been a Field but I'm getting some crazy linker errors.
+    int64_t begin_offset = 0;
+    bool begin_preceding = true;
+
+    // Here as well, Unbounded can only be UNBOUNDED FOLLOWING, and end_preceding
+    // must be false.
+    BoundaryType end_type = BoundaryType::Current;
+    int64_t end_offset = 0;
+    bool end_preceding = false;
+
+
+    // Throws BAD_ARGUMENTS exception if the frame definition is incorrect, e.g.
+    // the frame start comes later than the frame end.
+    void checkValid() const;
+
+    std::string toString() const;
+    void toString(WriteBuffer & buf) const;
+
+    bool operator == (const WindowFrame & other) const
+    {
+        // We don't compare is_default because it's not a real property of the
+        // frame, and only influences how we display it.
+        return other.type == type
+            && other.begin_type == begin_type
+            && other.begin_offset == begin_offset
+            && other.begin_preceding == begin_preceding
+            && other.end_type == end_type
+            && other.end_offset == end_offset
+            && other.end_preceding == end_preceding
+            ;
+    }
+
+    static std::string toString(FrameType type)
+    {
+        switch (type)
+        {
+            case FrameType::Rows:
+                return "ROWS";
+            case FrameType::Groups:
+                return "GROUPS";
+            case FrameType::Range:
+                return "RANGE";
+        }
+
+        // Somehow GCC 10 doesn't understand that the above switch is exhaustive.
+        assert(false);
+        return "<unknown frame>";
+    }
+
+    static std::string toString(BoundaryType type)
+    {
+        switch (type)
+        {
+            case BoundaryType::Unbounded:
+                return "UNBOUNDED";
+            case BoundaryType::Offset:
+                return "OFFSET";
+            case BoundaryType::Current:
+                return "CURRENT ROW";
+        }
+
+        // Somehow GCC 10 doesn't understand that the above switch is exhaustive.
+        assert(false);
+        return "<unknown frame boundary>";
+    }
+};
+
+struct WindowDescription
+{
+    std::string window_name;
+
+    // We don't care about the particular order of keys for PARTITION BY, only
+    // that they are sorted. For now we always require ASC, but we could be more
+    // flexible and match any direction, or even different order of columns.
+    SortDescription partition_by;
+
+    SortDescription order_by;
+
+    // To calculate the window function, we sort input data first by PARTITION BY,
+    // then by ORDER BY. This field holds this combined sort order.
+    SortDescription full_sort_description;
+
+    WindowFrame frame;
+
+    // The window functions that are calculated for this window.
+    std::vector<WindowFunctionDescription> window_functions;
+
+
+    std::string dump() const;
+
+    void checkValid() const;
+};
+
+using WindowFunctionDescriptions = std::vector<WindowFunctionDescription>;
+
+using WindowDescriptions = std::unordered_map<std::string, WindowDescription>;
+
+}
diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index 37a0812826b..9e8ce1f75b4 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -7,75 +7,85 @@
 #include <Interpreters/inplaceBlockConversions.h>
 #include <Core/Block.h>
 #include <Storages/ColumnsDescription.h>
+#include <Interpreters/ExpressionActions.h>
+#include <Functions/IFunctionAdaptors.h>
+#include <Functions/materialize.h>
 
 
 namespace DB
 {
 
-Block addMissingDefaults(
-    const Block & block,
+ActionsDAGPtr addMissingDefaults(
+    const Block & header,
     const NamesAndTypesList & required_columns,
     const ColumnsDescription & columns,
     const Context & context)
 {
     /// For missing columns of nested structure, you need to create not a column of empty arrays, but a column of arrays of correct lengths.
     /// First, remember the offset columns for all arrays in the block.
-    std::map<String, ColumnPtr> offset_columns;
+    std::map<String, Names> nested_groups;
 
-    for (size_t i = 0, size = block.columns(); i < size; ++i)
+    for (size_t i = 0, size = header.columns(); i < size; ++i)
     {
-        const auto & elem = block.getByPosition(i);
+        const auto & elem = header.getByPosition(i);
 
-        if (const ColumnArray * array = typeid_cast<const ColumnArray *>(&*elem.column))
+        if (typeid_cast<const ColumnArray *>(&*elem.column))
         {
             String offsets_name = Nested::extractTableName(elem.name);
-            auto & offsets_column = offset_columns[offsets_name];
 
-            /// If for some reason there are different offset columns for one nested structure, then we take nonempty.
-            if (!offsets_column || offsets_column->empty())
-                offsets_column = array->getOffsetsPtr();
+            auto & group = nested_groups[offsets_name];
+            if (group.empty())
+                group.push_back({});
+
+            group.push_back(elem.name);
         }
     }
 
-    const size_t rows = block.rows();
-    Block res;
+    auto actions = std::make_shared<ActionsDAG>(header.getColumnsWithTypeAndName());
+
+    FunctionOverloadResolverPtr func_builder_replicate = FunctionFactory::instance().get("replicate", context);
 
     /// We take given columns from input block and missed columns without default value
     /// (default and materialized will be computed later).
     for (const auto & column : required_columns)
     {
-        if (block.has(column.name))
-        {
-            res.insert(block.getByName(column.name));
+        if (header.has(column.name))
             continue;
-        }
 
         if (columns.hasDefault(column.name))
             continue;
 
         String offsets_name = Nested::extractTableName(column.name);
-        if (offset_columns.count(offsets_name))
+        if (nested_groups.count(offsets_name))
         {
-            ColumnPtr offsets_column = offset_columns[offsets_name];
-            DataTypePtr nested_type = typeid_cast<const DataTypeArray &>(*column.type).getNestedType();
-            UInt64 nested_rows = rows ? get<UInt64>((*offsets_column)[rows - 1]) : 0;
 
-            ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(nested_rows)->convertToFullColumnIfConst();
-            auto new_column = ColumnArray::create(nested_column, offsets_column);
-            res.insert(ColumnWithTypeAndName(std::move(new_column), column.type, column.name));
+            DataTypePtr nested_type = typeid_cast<const DataTypeArray &>(*column.type).getNestedType();
+            ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(0);
+            const auto & constant = actions->addColumn({std::move(nested_column), nested_type, column.name}, true);
+
+            auto & group = nested_groups[offsets_name];
+            group[0] = constant.result_name;
+            actions->addFunction(func_builder_replicate, group, constant.result_name, context, true);
+
             continue;
         }
 
         /** It is necessary to turn a constant column into a full column, since in part of blocks (from other parts),
         *  it can be full (or the interpreter may decide that it is constant everywhere).
         */
-        auto new_column = column.type->createColumnConstWithDefaultValue(rows)->convertToFullColumnIfConst();
-        res.insert(ColumnWithTypeAndName(std::move(new_column), column.type, column.name));
+        auto new_column = column.type->createColumnConstWithDefaultValue(0);
+        actions->addColumn({std::move(new_column), column.type, column.name}, true, true);
     }
 
     /// Computes explicitly specified values by default and materialized columns.
-    evaluateMissingDefaults(res, required_columns, columns, context);
-    return res;
+    if (auto dag = evaluateMissingDefaults(actions->getResultColumns(), required_columns, columns, context))
+        actions = ActionsDAG::merge(std::move(*actions), std::move(*dag));
+    else
+        /// Removes unused columns and reorders result.
+        /// The same is done in evaluateMissingDefaults if not empty dag is returned.
+        actions->removeUnusedActions(required_columns.getNames());
+
+    return actions;
 }
 
 }
diff --git a/src/Interpreters/addMissingDefaults.h b/src/Interpreters/addMissingDefaults.h
index ed5d5ce61ff..e746c7cc9e6 100644
--- a/src/Interpreters/addMissingDefaults.h
+++ b/src/Interpreters/addMissingDefaults.h
@@ -2,6 +2,7 @@
 
 #include <unordered_map>
 #include <string>
+#include <memory>
 
 
 namespace DB
@@ -12,14 +13,17 @@ class Context;
 class NamesAndTypesList;
 class ColumnsDescription;
 
+class ActionsDAG;
+using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
+
 /** Adds three types of columns into block
   * 1. Columns, that are missed inside request, but present in table without defaults (missed columns)
   * 2. Columns, that are missed inside request, but present in table with defaults (columns with default values)
   * 3. Columns that materialized from other columns (materialized columns)
   * All three types of columns are materialized (not constants).
   */
-Block addMissingDefaults(
-    const Block & block,
+ActionsDAGPtr addMissingDefaults(
+    const Block & header,
     const NamesAndTypesList & required_columns,
     const ColumnsDescription & columns,
     const Context & context);
diff --git a/src/Interpreters/addTypeConversionToAST.cpp b/src/Interpreters/addTypeConversionToAST.cpp
index bb42ad79daa..18591fd732c 100644
--- a/src/Interpreters/addTypeConversionToAST.cpp
+++ b/src/Interpreters/addTypeConversionToAST.cpp
@@ -20,7 +20,7 @@ namespace ErrorCodes
 
 ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
 {
-    auto func = makeASTFunction("cast", ast, std::make_shared<ASTLiteral>(type_name));
+    auto func = makeASTFunction("CAST", ast, std::make_shared<ASTLiteral>(type_name));
 
     if (ASTWithAlias * ast_with_alias = dynamic_cast<ASTWithAlias *>(ast.get()))
     {
diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp
index 02ef3426483..42e96bae07b 100644
--- a/src/Interpreters/evaluateConstantExpression.cpp
+++ b/src/Interpreters/evaluateConstantExpression.cpp
@@ -15,6 +15,7 @@
 #include <Parsers/ExpressionElementParsers.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Common/typeid_cast.h>
+#include <Interpreters/FunctionNameNormalizer.h>
 #include <Interpreters/ReplaceQueryParameterVisitor.h>
 #include <Poco/Util/AbstractConfiguration.h>
 
@@ -35,6 +36,10 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
     auto ast = node->clone();
     ReplaceQueryParameterVisitor param_visitor(context.getQueryParameters());
     param_visitor.visit(ast);
+
+    if (context.getSettingsRef().normalize_function_names)
+        FunctionNameNormalizer().visit(ast.get());
+
     String name = ast->getColumnName();
     auto syntax_result = TreeRewriter(context).analyze(ast, source_columns);
     ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
new file mode 100644
index 00000000000..1937fbaf905
--- /dev/null
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -0,0 +1,337 @@
+#include <Interpreters/executeDDLQueryOnCluster.h>
+#include <Interpreters/DDLWorker.h>
+#include <Interpreters/DDLTask.h>
+#include <Interpreters/AddDefaultDatabaseVisitor.h>
+#include <Interpreters/Context.h>
+#include <Parsers/ASTQueryWithOutput.h>
+#include <Parsers/ASTQueryWithOnCluster.h>
+#include <Parsers/ASTAlterQuery.h>
+#include <Parsers/queryToString.h>
+#include <Access/AccessRightsElement.h>
+#include <Access/ContextAccess.h>
+#include <Common/Macros.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
+#include <filesystem>
+
+namespace fs = std::filesystem;
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+    extern const int TIMEOUT_EXCEEDED;
+    extern const int UNFINISHED;
+    extern const int QUERY_IS_PROHIBITED;
+    extern const int LOGICAL_ERROR;
+}
+
+bool isSupportedAlterType(int type)
+{
+    assert(type != ASTAlterCommand::NO_TYPE);
+    static const std::unordered_set<int> unsupported_alter_types{
+        /// It's dangerous, because it may duplicate data if executed on multiple replicas. We can allow it after #18978
+        ASTAlterCommand::ATTACH_PARTITION,
+        /// Usually followed by ATTACH PARTITION
+        ASTAlterCommand::FETCH_PARTITION,
+        /// Logical error
+        ASTAlterCommand::NO_TYPE,
+    };
+
+    return unsupported_alter_types.count(type) == 0;
+}
+
+
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context)
+{
+    return executeDDLQueryOnCluster(query_ptr_, context, {});
+}
+
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option)
+{
+    return executeDDLQueryOnCluster(query_ptr, context, AccessRightsElements{query_requires_access}, query_requires_grant_option);
+}
+
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option)
+{
+    /// Remove FORMAT <fmt> and INTO OUTFILE <file> if exists
+    ASTPtr query_ptr = query_ptr_->clone();
+    ASTQueryWithOutput::resetOutputASTIfExist(*query_ptr);
+
+    // XXX: serious design flaw since `ASTQueryWithOnCluster` is not inherited from `IAST`!
+    auto * query = dynamic_cast<ASTQueryWithOnCluster *>(query_ptr.get());
+    if (!query)
+    {
+        throw Exception("Distributed execution is not supported for such DDL queries", ErrorCodes::NOT_IMPLEMENTED);
+    }
+
+    if (!context.getSettingsRef().allow_distributed_ddl)
+        throw Exception("Distributed DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED);
+
+    if (const auto * query_alter = query_ptr->as<ASTAlterQuery>())
+    {
+        for (const auto & command : query_alter->command_list->children)
+        {
+            if (!isSupportedAlterType(command->as<ASTAlterCommand&>().type))
+                throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
+        }
+    }
+
+    query->cluster = context.getMacros()->expand(query->cluster);
+    ClusterPtr cluster = context.getCluster(query->cluster);
+    DDLWorker & ddl_worker = context.getDDLWorker();
+
+    /// Enumerate hosts which will be used to send query.
+    Cluster::AddressesWithFailover shards = cluster->getShardsAddresses();
+    std::vector<HostID> hosts;
+    for (const auto & shard : shards)
+    {
+        for (const auto & addr : shard)
+            hosts.emplace_back(addr);
+    }
+
+    if (hosts.empty())
+        throw Exception("No hosts defined to execute distributed DDL query", ErrorCodes::LOGICAL_ERROR);
+
+    /// The current database in a distributed query need to be replaced with either
+    /// the local current database or a shard's default database.
+    bool need_replace_current_database
+        = (std::find_if(
+            query_requires_access.begin(),
+            query_requires_access.end(),
+            [](const AccessRightsElement & elem) { return elem.isEmptyDatabase(); })
+           != query_requires_access.end());
+
+    bool use_local_default_database = false;
+    const String & current_database = context.getCurrentDatabase();
+
+    if (need_replace_current_database)
+    {
+        Strings shard_default_databases;
+        for (const auto & shard : shards)
+        {
+            for (const auto & addr : shard)
+            {
+                if (!addr.default_database.empty())
+                    shard_default_databases.push_back(addr.default_database);
+                else
+                    use_local_default_database = true;
+            }
+        }
+        std::sort(shard_default_databases.begin(), shard_default_databases.end());
+        shard_default_databases.erase(std::unique(shard_default_databases.begin(), shard_default_databases.end()), shard_default_databases.end());
+        assert(use_local_default_database || !shard_default_databases.empty());
+
+        if (use_local_default_database && !shard_default_databases.empty())
+            throw Exception("Mixed local default DB and shard default DB in DDL query", ErrorCodes::NOT_IMPLEMENTED);
+
+        if (use_local_default_database)
+        {
+            query_requires_access.replaceEmptyDatabase(current_database);
+        }
+        else
+        {
+            for (size_t i = 0; i != query_requires_access.size();)
+            {
+                auto & element = query_requires_access[i];
+                if (element.isEmptyDatabase())
+                {
+                    query_requires_access.insert(query_requires_access.begin() + i + 1, shard_default_databases.size() - 1, element);
+                    for (size_t j = 0; j != shard_default_databases.size(); ++j)
+                        query_requires_access[i + j].replaceEmptyDatabase(shard_default_databases[j]);
+                    i += shard_default_databases.size();
+                }
+                else
+                    ++i;
+            }
+        }
+    }
+
+    AddDefaultDatabaseVisitor visitor(current_database, !use_local_default_database);
+    visitor.visitDDL(query_ptr);
+
+    /// Check access rights, assume that all servers have the same users config
+    if (query_requires_grant_option)
+        context.getAccess()->checkGrantOption(query_requires_access);
+    else
+        context.checkAccess(query_requires_access);
+
+    DDLLogEntry entry;
+    entry.hosts = std::move(hosts);
+    entry.query = queryToString(query_ptr);
+    entry.initiator = ddl_worker.getCommonHostID();
+    String node_path = ddl_worker.enqueueQuery(entry);
+
+    BlockIO io;
+    if (context.getSettingsRef().distributed_ddl_task_timeout == 0)
+        return io;
+
+    auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, context);
+    io.in = std::move(stream);
+    return io;
+}
+
+
+DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context_,
+                                                     const std::optional<Strings> & hosts_to_wait)
+    : node_path(zk_node_path)
+    , context(context_)
+    , watch(CLOCK_MONOTONIC_COARSE)
+    , log(&Poco::Logger::get("DDLQueryStatusInputStream"))
+{
+    sample = Block{
+        {std::make_shared<DataTypeString>(),    "host"},
+        {std::make_shared<DataTypeUInt16>(),    "port"},
+        {std::make_shared<DataTypeInt64>(),     "status"},
+        {std::make_shared<DataTypeString>(),    "error"},
+        {std::make_shared<DataTypeUInt64>(),    "num_hosts_remaining"},
+        {std::make_shared<DataTypeUInt64>(),    "num_hosts_active"},
+    };
+
+    if (hosts_to_wait)
+    {
+        waiting_hosts = NameSet(hosts_to_wait->begin(), hosts_to_wait->end());
+        by_hostname = false;
+    }
+    else
+    {
+        for (const HostID & host : entry.hosts)
+            waiting_hosts.emplace(host.toString());
+    }
+
+    addTotalRowsApprox(waiting_hosts.size());
+
+    timeout_seconds = context.getSettingsRef().distributed_ddl_task_timeout;
+}
+
+Block DDLQueryStatusInputStream::readImpl()
+{
+    Block res;
+    if (num_hosts_finished >= waiting_hosts.size())
+    {
+        if (first_exception)
+            throw Exception(*first_exception);
+
+        return res;
+    }
+
+    auto zookeeper = context.getZooKeeper();
+    size_t try_number = 0;
+
+    while (res.rows() == 0)
+    {
+        if (isCancelled())
+        {
+            if (first_exception)
+                throw Exception(*first_exception);
+
+            return res;
+        }
+
+        if (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds)
+        {
+            size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished;
+            size_t num_active_hosts = current_active_hosts.size();
+
+
+            throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
+                            "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. "
+                            "There are {} unfinished hosts ({} of them are currently active), they are going to execute the query in background",
+                            node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
+        }
+
+        if (num_hosts_finished != 0 || try_number != 0)
+        {
+            sleepForMilliseconds(std::min<size_t>(1000, 50 * (try_number + 1)));
+        }
+
+        if (!zookeeper->exists(node_path))
+        {
+            throw Exception(ErrorCodes::UNFINISHED,
+                            "Cannot provide query execution status. The query's node {} has been deleted by the cleaner since it was finished (or its lifetime is expired)",
+                            node_path);
+        }
+
+        Strings new_hosts = getNewAndUpdate(getChildrenAllowNoNode(zookeeper, fs::path(node_path) / "finished"));
+        ++try_number;
+        if (new_hosts.empty())
+            continue;
+
+        current_active_hosts = getChildrenAllowNoNode(zookeeper, fs::path(node_path) / "active");
+
+        MutableColumns columns = sample.cloneEmptyColumns();
+        for (const String & host_id : new_hosts)
+        {
+            ExecutionStatus status(-1, "Cannot obtain error message");
+            {
+                String status_data;
+                if (zookeeper->tryGet(fs::path(node_path) / "finished" / host_id, status_data))
+                    status.tryDeserializeText(status_data);
+            }
+
+            String host = host_id;
+            UInt16 port = 0;
+            if (by_hostname)
+            {
+                auto host_and_port = Cluster::Address::fromString(host_id);
+                host = host_and_port.first;
+                port = host_and_port.second;
+            }
+
+            if (status.code != 0 && first_exception == nullptr)
+                first_exception = std::make_unique<Exception>(status.code, "There was an error on [{}:{}]: {}", host, port, status.message);
+
+            ++num_hosts_finished;
+
+            columns[0]->insert(host);
+            columns[1]->insert(port);
+            columns[2]->insert(status.code);
+            columns[3]->insert(status.message);
+            columns[4]->insert(waiting_hosts.size() - num_hosts_finished);
+            columns[5]->insert(current_active_hosts.size());
+        }
+        res = sample.cloneWithColumns(std::move(columns));
+    }
+
+    return res;
+}
+
+Strings DDLQueryStatusInputStream::getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
+{
+    Strings res;
+    Coordination::Error code = zookeeper->tryGetChildren(node_path, res);
+    if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE)
+        throw Coordination::Exception(code, node_path);
+    return res;
+}
+
+Strings DDLQueryStatusInputStream::getNewAndUpdate(const Strings & current_list_of_finished_hosts)
+{
+    Strings diff;
+    for (const String & host : current_list_of_finished_hosts)
+    {
+        if (!waiting_hosts.count(host))
+        {
+            if (!ignoring_hosts.count(host))
+            {
+                ignoring_hosts.emplace(host);
+                LOG_INFO(log, "Unexpected host {} appeared  in task {}", host, node_path);
+            }
+            continue;
+        }
+
+        if (!finished_hosts.count(host))
+        {
+            diff.emplace_back(host);
+            finished_hosts.emplace(host);
+        }
+    }
+
+    return diff;
+}
+
+
+}
diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h
new file mode 100644
index 00000000000..2b272d3b0da
--- /dev/null
+++ b/src/Interpreters/executeDDLQueryOnCluster.h
@@ -0,0 +1,67 @@
+#pragma once
+#include <DataStreams/BlockIO.h>
+#include <Parsers/IAST_fwd.h>
+
+namespace zkutil
+{
+    class ZooKeeper;
+}
+
+namespace DB
+{
+
+class Context;
+class AccessRightsElements;
+struct DDLLogEntry;
+
+
+/// Returns true if provided ALTER type can be executed ON CLUSTER
+bool isSupportedAlterType(int type);
+
+/// Pushes distributed DDL query to the queue.
+/// Returns DDLQueryStatusInputStream, which reads results of query execution on each host in the cluster.
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context);
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option = false);
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option = false);
+
+
+class DDLQueryStatusInputStream final : public IBlockInputStream
+{
+public:
+    DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context_, const std::optional<Strings> & hosts_to_wait = {});
+
+    String getName() const override { return "DDLQueryStatusInputStream"; }
+
+    Block getHeader() const override { return sample; }
+
+    Block getSampleBlock() const { return sample.cloneEmpty(); }
+
+    Block readImpl() override;
+
+private:
+
+    static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path);
+
+    Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts);
+
+    String node_path;
+    const Context & context;
+    Stopwatch watch;
+    Poco::Logger * log;
+
+    Block sample;
+
+    NameSet waiting_hosts;  /// hosts from task host list
+    NameSet finished_hosts; /// finished hosts from host list
+    NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
+    Strings current_active_hosts; /// Hosts that were in active state at the last check
+    size_t num_hosts_finished = 0;
+
+    /// Save the first detected error and throw it at the end of execution
+    std::unique_ptr<Exception> first_exception;
+
+    Int64 timeout_seconds = 120;
+    bool by_hostname = true;
+};
+
+}
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 020e5af365a..d786e1146be 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -51,7 +51,6 @@
 #include <Interpreters/Context.h>
 #include <Common/ProfileEvents.h>
 
-#include <Interpreters/DNSCacheUpdater.h>
 #include <Common/SensitiveDataMasker.h>
 
 #include <Processors/Transforms/LimitsCheckingTransform.h>
@@ -344,13 +343,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 {
     const auto current_time = std::chrono::system_clock::now();
 
-    /// If we already executing query and it requires to execute internal query, than
-    /// don't replace thread context with given (it can be temporary). Otherwise, attach context to thread.
-    if (!internal)
-    {
-        context.makeQueryContext();
-        CurrentThread::attachQueryContext(context);
-    }
+#if !defined(ARCADIA_BUILD)
+    assert(internal || CurrentThread::get().getQueryContext());
+    assert(internal || CurrentThread::get().getQueryContext()->getCurrentQueryId() == CurrentThread::getQueryId());
+#endif
 
     const Settings & settings = context.getSettingsRef();
 
@@ -525,6 +521,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             quota = context.getQuota();
             if (quota)
             {
+                if (ast->as<ASTSelectQuery>() || ast->as<ASTSelectWithUnionQuery>())
+                {
+                    quota->used(Quota::QUERY_SELECTS, 1);
+                }
+                else if (ast->as<ASTInsertQuery>())
+                {
+                    quota->used(Quota::QUERY_INSERTS, 1);
+                }
                 quota->used(Quota::QUERIES, 1);
                 quota->checkExceeded(Quota::ERRORS);
             }
diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp
index 766ce257530..a4e971c302c 100644
--- a/src/Interpreters/getTableExpressions.cpp
+++ b/src/Interpreters/getTableExpressions.cpp
@@ -84,7 +84,7 @@ static NamesAndTypesList getColumnsFromTableExpression(
     if (table_expression.subquery)
     {
         const auto & subquery = table_expression.subquery->children.at(0);
-        names_and_type_list = InterpreterSelectWithUnionQuery::getSampleBlock(subquery, context).getNamesAndTypesList();
+        names_and_type_list = InterpreterSelectWithUnionQuery::getSampleBlock(subquery, context, true).getNamesAndTypesList();
     }
     else if (table_expression.table_function)
     {
diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp
index ab74aa7d631..d06cde99425 100644
--- a/src/Interpreters/inplaceBlockConversions.cpp
+++ b/src/Interpreters/inplaceBlockConversions.cpp
@@ -24,7 +24,7 @@ namespace
 {
 
 /// Add all required expressions for missing columns calculation
-void addDefaultRequiredExpressionsRecursively(Block & block, const String & required_column, const ColumnsDescription & columns, ASTPtr default_expr_list_accum, NameSet & added_columns)
+void addDefaultRequiredExpressionsRecursively(const Block & block, const String & required_column, const ColumnsDescription & columns, ASTPtr default_expr_list_accum, NameSet & added_columns)
 {
     checkStackSize();
     if (block.has(required_column) || added_columns.count(required_column))
@@ -43,7 +43,7 @@ void addDefaultRequiredExpressionsRecursively(Block & block, const String & requ
         RequiredSourceColumnsVisitor(columns_context).visit(column_default_expr);
         NameSet required_columns_names = columns_context.requiredColumns();
 
-        auto cast_func = makeASTFunction("cast", column_default_expr, std::make_shared<ASTLiteral>(columns.get(required_column).type->getName()));
+        auto cast_func = makeASTFunction("CAST", column_default_expr, std::make_shared<ASTLiteral>(columns.get(required_column).type->getName()));
         default_expr_list_accum->children.emplace_back(setAlias(cast_func, required_column));
         added_columns.emplace(required_column);
 
@@ -52,7 +52,7 @@ void addDefaultRequiredExpressionsRecursively(Block & block, const String & requ
     }
 }
 
-ASTPtr defaultRequiredExpressions(Block & block, const NamesAndTypesList & required_columns, const ColumnsDescription & columns)
+ASTPtr defaultRequiredExpressions(const Block & block, const NamesAndTypesList & required_columns, const ColumnsDescription & columns)
 {
     ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
 
@@ -79,7 +79,7 @@ ASTPtr convertRequiredExpressions(Block & block, const NamesAndTypesList & requi
             continue;
 
         auto cast_func = makeASTFunction(
-            "cast", std::make_shared<ASTIdentifier>(required_column.name), std::make_shared<ASTLiteral>(required_column.type->getName()));
+            "CAST", std::make_shared<ASTIdentifier>(required_column.name), std::make_shared<ASTLiteral>(required_column.type->getName()));
 
         conversion_expr_list->children.emplace_back(setAlias(cast_func, required_column.name));
 
@@ -87,67 +87,29 @@ ASTPtr convertRequiredExpressions(Block & block, const NamesAndTypesList & requi
     return conversion_expr_list;
 }
 
-void executeExpressionsOnBlock(
-    Block & block,
+ActionsDAGPtr createExpressions(
+    const Block & header,
     ASTPtr expr_list,
     bool save_unneeded_columns,
     const NamesAndTypesList & required_columns,
     const Context & context)
 {
     if (!expr_list)
-        return;
+        return nullptr;
 
-    if (!save_unneeded_columns)
-    {
-        auto syntax_result = TreeRewriter(context).analyze(expr_list, block.getNamesAndTypesList());
-        ExpressionAnalyzer{expr_list, syntax_result, context}.getActions(true)->execute(block);
-        return;
-    }
-
-    /** ExpressionAnalyzer eliminates "unused" columns, in order to ensure their safety
-      * we are going to operate on a copy instead of the original block */
-    Block copy_block{block};
-
-    auto syntax_result = TreeRewriter(context).analyze(expr_list, block.getNamesAndTypesList());
+    auto syntax_result = TreeRewriter(context).analyze(expr_list, header.getNamesAndTypesList());
     auto expression_analyzer = ExpressionAnalyzer{expr_list, syntax_result, context};
-    auto required_source_columns = syntax_result->requiredSourceColumns();
-    auto rows_was = copy_block.rows();
+    auto dag = std::make_shared<ActionsDAG>(header.getNamesAndTypesList());
+    auto actions = expression_analyzer.getActionsDAG(true, !save_unneeded_columns);
+    dag = ActionsDAG::merge(std::move(*dag), std::move(*actions));
 
-    // Delete all not needed columns in DEFAULT expression.
-    // They can intersect with columns added in PREWHERE
-    // test 00950_default_prewhere
-    // CLICKHOUSE-4523
-    for (const auto & delete_column : copy_block.getNamesAndTypesList())
+    if (save_unneeded_columns)
     {
-        if (std::find(required_source_columns.begin(), required_source_columns.end(), delete_column.name) == required_source_columns.end())
-        {
-            copy_block.erase(delete_column.name);
-        }
+        dag->removeUnusedActions(required_columns.getNames());
+        dag->addMaterializingOutputActions();
     }
 
-    if (copy_block.columns() == 0)
-    {
-        // Add column to indicate block size in execute()
-        copy_block.insert({DataTypeUInt8().createColumnConst(rows_was, 0u), std::make_shared<DataTypeUInt8>(), "__dummy"});
-    }
-
-    expression_analyzer.getActions(true)->execute(copy_block);
-
-    /// move evaluated columns to the original block, materializing them at the same time
-    size_t pos = 0;
-    for (auto col = required_columns.begin(); col != required_columns.end(); ++col, ++pos)
-    {
-        if (copy_block.has(col->name))
-        {
-            auto evaluated_col = copy_block.getByName(col->name);
-            evaluated_col.column = evaluated_col.column->convertToFullColumnIfConst();
-
-            if (block.has(col->name))
-                block.getByName(col->name) = std::move(evaluated_col);
-            else
-                block.insert(pos, std::move(evaluated_col));
-        }
-    }
+    return dag;
 }
 
 }
@@ -157,19 +119,25 @@ void performRequiredConversions(Block & block, const NamesAndTypesList & require
     ASTPtr conversion_expr_list = convertRequiredExpressions(block, required_columns);
     if (conversion_expr_list->children.empty())
         return;
-    executeExpressionsOnBlock(block, conversion_expr_list, true, required_columns, context);
+
+    if (auto dag = createExpressions(block, conversion_expr_list, true, required_columns, context))
+    {
+        auto expression = std::make_shared<ExpressionActions>(std::move(dag));
+        expression->execute(block);
+    }
 }
 
-void evaluateMissingDefaults(Block & block,
+ActionsDAGPtr evaluateMissingDefaults(
+    const Block & header,
     const NamesAndTypesList & required_columns,
     const ColumnsDescription & columns,
     const Context & context, bool save_unneeded_columns)
 {
     if (!columns.hasDefaults())
-        return;
+        return nullptr;
 
-    ASTPtr default_expr_list = defaultRequiredExpressions(block, required_columns, columns);
-    executeExpressionsOnBlock(block, default_expr_list, save_unneeded_columns, required_columns, context);
+    ASTPtr expr_list = defaultRequiredExpressions(header, required_columns, columns);
+    return createExpressions(header, expr_list, save_unneeded_columns, required_columns, context);
 }
 
 }
diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h
index 066975ab4bc..63540e2994d 100644
--- a/src/Interpreters/inplaceBlockConversions.h
+++ b/src/Interpreters/inplaceBlockConversions.h
@@ -2,6 +2,7 @@
 
 #include <unordered_map>
 #include <string>
+#include <memory>
 
 
 namespace DB
@@ -12,9 +13,13 @@ class Context;
 class NamesAndTypesList;
 class ColumnsDescription;
 
-/// Adds missing defaults to block according to required_columns
-/// using columns description
-void evaluateMissingDefaults(Block & block,
+class ActionsDAG;
+using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
+
+/// Create actions which adds missing defaults to block according to required_columns using columns description.
+/// Return nullptr if no actions required.
+ActionsDAGPtr evaluateMissingDefaults(
+    const Block & header,
     const NamesAndTypesList & required_columns,
     const ColumnsDescription & columns,
     const Context & context, bool save_unneeded_columns = true);
diff --git a/src/Interpreters/joinDispatch.h b/src/Interpreters/joinDispatch.h
index af16550e17e..cac3fcd1e9a 100644
--- a/src/Interpreters/joinDispatch.h
+++ b/src/Interpreters/joinDispatch.h
@@ -15,38 +15,35 @@ namespace DB
 template <ASTTableJoin::Kind kind, typename ASTTableJoin::Strictness>
 struct MapGetter;
 
-template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOneFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOneFlagged; };
+template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::RightAny>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::RightAny>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
 
-template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsOneFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsAllFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsAllFlagged; };
+template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Any>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Any>  { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
 
-template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAll; };
-template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAll; };
-template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAllFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAllFlagged; };
+template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::All>  { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::All>  { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
 
 /// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation.
-template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsAllFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsOne; };
+template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Semi>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Semi>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
 
 /// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation.
-template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsAllFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsOne; };
+template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Anti>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Anti>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
 
 template <ASTTableJoin::Kind kind>
-struct MapGetter<kind, ASTTableJoin::Strictness::Asof>
-{
-    using Map = HashJoin::MapsAsof;
-};
+struct MapGetter<kind, ASTTableJoin::Strictness::Asof> { using Map = HashJoin::MapsAsof; static constexpr bool flagged = false; };
 
 
 static constexpr std::array<ASTTableJoin::Strictness, 6> STRICTNESSES = {
diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make
index 1cadc447e59..879333db507 100644
--- a/src/Interpreters/ya.make
+++ b/src/Interpreters/ya.make
@@ -41,6 +41,7 @@ SRCS(
     Context.cpp
     CrashLog.cpp
     CrossToInnerJoinVisitor.cpp
+    DDLTask.cpp
     DDLWorker.cpp
     DNSCacheUpdater.cpp
     DatabaseAndTableWithAlias.cpp
@@ -58,6 +59,7 @@ SRCS(
     ExternalModelsLoader.cpp
     ExtractExpressionInfoVisitor.cpp
     FillingRow.cpp
+    FunctionNameNormalizer.cpp
     HashJoin.cpp
     IExternalLoadable.cpp
     IInterpreter.cpp
@@ -129,6 +131,7 @@ SRCS(
     RequiredSourceColumnsData.cpp
     RequiredSourceColumnsVisitor.cpp
     RewriteAnyFunctionVisitor.cpp
+    RewriteCountVariantsVisitor.cpp
     RewriteSumIfFunctionVisitor.cpp
     RowRefs.cpp
     Set.cpp
@@ -145,12 +148,14 @@ SRCS(
     TranslateQualifiedNamesVisitor.cpp
     TreeOptimizer.cpp
     TreeRewriter.cpp
+    WindowDescription.cpp
     addMissingDefaults.cpp
     addTypeConversionToAST.cpp
     castColumn.cpp
     convertFieldToType.cpp
     createBlockSelector.cpp
     evaluateConstantExpression.cpp
+    executeDDLQueryOnCluster.cpp
     executeQuery.cpp
     getClusterName.cpp
     getHeaderForProcessingStage.cpp
diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index 8a44dcc7c3b..f24b26d5b54 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -344,7 +344,7 @@ void ASTAlterCommand::formatImpl(
         throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
 }
 
-bool ASTAlterQuery::isSettingsAlter() const
+bool ASTAlterQuery::isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const
 {
     if (command_list)
     {
@@ -353,7 +353,7 @@ bool ASTAlterQuery::isSettingsAlter() const
         for (const auto & child : command_list->children)
         {
             const auto & command = child->as<const ASTAlterCommand &>();
-            if (command.type != ASTAlterCommand::MODIFY_SETTING)
+            if (command.type != type)
                 return false;
         }
         return true;
@@ -361,6 +361,16 @@ bool ASTAlterQuery::isSettingsAlter() const
     return false;
 }
 
+bool ASTAlterQuery::isSettingsAlter() const
+{
+    return isOneCommandTypeOnly(ASTAlterCommand::MODIFY_SETTING);
+}
+
+bool ASTAlterQuery::isFreezeAlter() const
+{
+    return isOneCommandTypeOnly(ASTAlterCommand::FREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::FREEZE_ALL);
+}
+
 /** Get the text that identifies this element. */
 String ASTAlterQuery::getID(char delim) const
 {
diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h
index f53a987905e..4cc01aa889e 100644
--- a/src/Parsers/ASTAlterQuery.h
+++ b/src/Parsers/ASTAlterQuery.h
@@ -189,6 +189,8 @@ public:
 
     bool isSettingsAlter() const;
 
+    bool isFreezeAlter() const;
+
     String getID(char) const override;
 
     ASTPtr clone() const override;
@@ -200,6 +202,8 @@ public:
 
 protected:
     void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+
+    bool isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const;
 };
 
 }
diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp
index d2deb05699c..2af0d2d4a45 100644
--- a/src/Parsers/ASTCreateQuery.cpp
+++ b/src/Parsers/ASTCreateQuery.cpp
@@ -269,6 +269,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
         if (live_view_timeout)
             settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH TIMEOUT " << (settings.hilite ? hilite_none : "")
                           << *live_view_timeout;
+
+        if (live_view_periodic_refresh)
+        {
+            if (live_view_timeout)
+                settings.ostr << (settings.hilite ? hilite_keyword : "") << " AND" << (settings.hilite ? hilite_none : "");
+            else
+                settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH" << (settings.hilite ? hilite_none : "");
+
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " PERIODIC REFRESH " << (settings.hilite ? hilite_none : "")
+                << *live_view_periodic_refresh;
+        }
+
         formatOnCluster(settings);
     }
     else
diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h
index c699babbdbf..c9a6251cb94 100644
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@@ -77,6 +77,8 @@ public:
     ASTDictionary * dictionary = nullptr; /// dictionary definition (layout, primary key, etc.)
 
     std::optional<UInt64> live_view_timeout;    /// For CREATE LIVE VIEW ... WITH TIMEOUT ...
+    std::optional<UInt64> live_view_periodic_refresh;    /// For CREATE LIVE VIEW ... WITH [PERIODIC] REFRESH ...
+
     bool attach_short_syntax{false};
 
     std::optional<String> attach_from_path = std::nullopt;
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 42bea0107d1..aa5508bf190 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -307,10 +307,11 @@ bool ASTSelectQuery::final() const
 
 bool ASTSelectQuery::withFill() const
 {
-    if (!orderBy())
+    const ASTPtr order_by = orderBy();
+    if (!order_by)
         return false;
 
-    for (const auto & order_expression_element : orderBy()->children)
+    for (const auto & order_expression_element : order_by->children)
         if (order_expression_element->as<ASTOrderByElement &>().with_fill)
             return true;
 
diff --git a/src/Parsers/ASTTTLElement.cpp b/src/Parsers/ASTTTLElement.cpp
index 39283a3168e..2d22c1b4307 100644
--- a/src/Parsers/ASTTTLElement.cpp
+++ b/src/Parsers/ASTTTLElement.cpp
@@ -20,7 +20,7 @@ ASTPtr ASTTTLElement::clone() const
 
     for (auto & expr : clone->group_by_key)
         expr = expr->clone();
-    for (auto & [name, expr] : clone->group_by_aggregations)
+    for (auto & expr : clone->group_by_assignments)
         expr = expr->clone();
 
     return clone;
@@ -46,15 +46,15 @@ void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & st
                 settings.ostr << ", ";
             (*it)->formatImpl(settings, state, frame);
         }
-        if (!group_by_aggregations.empty())
+
+        if (!group_by_assignments.empty())
         {
             settings.ostr << " SET ";
-            for (auto it = group_by_aggregations.begin(); it != group_by_aggregations.end(); ++it)
+            for (auto it = group_by_assignments.begin(); it != group_by_assignments.end(); ++it)
             {
-                if (it != group_by_aggregations.begin())
+                if (it != group_by_assignments.begin())
                     settings.ostr << ", ";
-                settings.ostr << it->first << " = ";
-                it->second->formatImpl(settings, state, frame);
+                (*it)->formatImpl(settings, state, frame);
             }
         }
     }
diff --git a/src/Parsers/ASTTTLElement.h b/src/Parsers/ASTTTLElement.h
index aadd019b59c..ce011d76c7b 100644
--- a/src/Parsers/ASTTTLElement.h
+++ b/src/Parsers/ASTTTLElement.h
@@ -18,7 +18,7 @@ public:
     String destination_name;
 
     ASTs group_by_key;
-    std::vector<std::pair<String, ASTPtr>> group_by_aggregations;
+    ASTs group_by_assignments;
 
     ASTPtr recompression_codec;
 
diff --git a/src/Parsers/ASTWindowDefinition.cpp b/src/Parsers/ASTWindowDefinition.cpp
index 79a4b4bf1c6..dba2935e630 100644
--- a/src/Parsers/ASTWindowDefinition.cpp
+++ b/src/Parsers/ASTWindowDefinition.cpp
@@ -22,6 +22,8 @@ ASTPtr ASTWindowDefinition::clone() const
         result->children.push_back(result->order_by);
     }
 
+    result->frame = frame;
+
     return result;
 }
 
@@ -31,12 +33,12 @@ String ASTWindowDefinition::getID(char) const
 }
 
 void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
-    FormatState & state, FormatStateStacked frame) const
+    FormatState & state, FormatStateStacked format_frame) const
 {
     if (partition_by)
     {
         settings.ostr << "PARTITION BY ";
-        partition_by->formatImpl(settings, state, frame);
+        partition_by->formatImpl(settings, state, format_frame);
     }
 
     if (partition_by && order_by)
@@ -47,7 +49,46 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
     if (order_by)
     {
         settings.ostr << "ORDER BY ";
-        order_by->formatImpl(settings, state, frame);
+        order_by->formatImpl(settings, state, format_frame);
+    }
+
+    if ((partition_by || order_by) && !frame.is_default)
+    {
+        settings.ostr << " ";
+    }
+
+    if (!frame.is_default)
+    {
+        settings.ostr << WindowFrame::toString(frame.type) << " BETWEEN ";
+        if (frame.begin_type == WindowFrame::BoundaryType::Current)
+        {
+            settings.ostr << "CURRENT ROW";
+        }
+        else if (frame.begin_type == WindowFrame::BoundaryType::Unbounded)
+        {
+            settings.ostr << "UNBOUNDED PRECEDING";
+        }
+        else
+        {
+            settings.ostr << abs(frame.begin_offset);
+            settings.ostr << " "
+                << (!frame.begin_preceding ? "FOLLOWING" : "PRECEDING");
+        }
+        settings.ostr << " AND ";
+        if (frame.end_type == WindowFrame::BoundaryType::Current)
+        {
+            settings.ostr << "CURRENT ROW";
+        }
+        else if (frame.end_type == WindowFrame::BoundaryType::Unbounded)
+        {
+            settings.ostr << "UNBOUNDED PRECEDING";
+        }
+        else
+        {
+            settings.ostr << abs(frame.end_offset);
+            settings.ostr << " "
+                << (!frame.end_preceding ? "FOLLOWING" : "PRECEDING");
+        }
     }
 }
 
@@ -56,8 +97,8 @@ std::string ASTWindowDefinition::getDefaultWindowName() const
     WriteBufferFromOwnString ostr;
     FormatSettings settings{ostr, true /* one_line */};
     FormatState state;
-    FormatStateStacked frame;
-    formatImpl(settings, state, frame);
+    FormatStateStacked format_frame;
+    formatImpl(settings, state, format_frame);
     return ostr.str();
 }
 
diff --git a/src/Parsers/ASTWindowDefinition.h b/src/Parsers/ASTWindowDefinition.h
index bf74cf809f9..b57c1094e42 100644
--- a/src/Parsers/ASTWindowDefinition.h
+++ b/src/Parsers/ASTWindowDefinition.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <Interpreters/WindowDescription.h>
+
 #include <Parsers/IAST.h>
 
 
@@ -12,6 +14,8 @@ struct ASTWindowDefinition : public IAST
 
     ASTPtr order_by;
 
+    WindowFrame frame;
+
 
     ASTPtr clone() const override;
 
diff --git a/src/Parsers/ASTWithElement.cpp b/src/Parsers/ASTWithElement.cpp
index 3f67722a5c7..ce39086eb4a 100644
--- a/src/Parsers/ASTWithElement.cpp
+++ b/src/Parsers/ASTWithElement.cpp
@@ -18,7 +18,7 @@ void ASTWithElement::formatImpl(const FormatSettings & settings, FormatState & s
 {
     settings.writeIdentifier(name);
     settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "");
-    dynamic_cast<const ASTWithAlias *>(subquery.get())->formatImplWithoutAlias(settings, state, frame);
+    dynamic_cast<const ASTWithAlias &>(*subquery).formatImplWithoutAlias(settings, state, frame);
 }
 
 }
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 39f8a3c951c..7a426e7774d 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -24,6 +24,7 @@
 #include <Parsers/ASTTTLElement.h>
 #include <Parsers/ASTWindowDefinition.h>
 #include <Parsers/IAST.h>
+#include <Parsers/ASTAssignment.h>
 
 #include <Parsers/parseIdentifierOrStringLiteral.h>
 #include <Parsers/parseIntervalKind.h>
@@ -45,8 +46,10 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int BAD_ARGUMENTS;
     extern const int SYNTAX_ERROR;
     extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
 }
 
 
@@ -263,7 +266,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserIdentifier id_parser;
     ParserKeyword distinct("DISTINCT");
     ParserKeyword all("ALL");
-    ParserExpressionList contents(false);
+    ParserExpressionList contents(false, is_table_function);
     ParserSelectWithUnionQuery select;
     ParserKeyword over("OVER");
 
@@ -275,6 +278,12 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ASTPtr expr_list_args;
     ASTPtr expr_list_params;
 
+    if (is_table_function)
+    {
+        if (ParserTableFunctionView().parse(pos, node, expected))
+            return true;
+    }
+
     if (!id_parser.parse(pos, identifier, expected))
         return false;
 
@@ -309,36 +318,6 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         }
     }
 
-    if (!has_distinct && !has_all)
-    {
-        auto old_pos = pos;
-        auto maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket;
-
-        if (select.parse(pos, query, expected))
-        {
-            auto & select_ast = query->as<ASTSelectWithUnionQuery &>();
-            if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery)
-            {
-                // It's an subquery. Bail out.
-                pos = old_pos;
-            }
-            else
-            {
-                if (pos->type != TokenType::ClosingRoundBracket)
-                    return false;
-                ++pos;
-                auto function_node = std::make_shared<ASTFunction>();
-                tryGetIdentifierNameInto(identifier, function_node->name);
-                auto expr_list_with_single_query = std::make_shared<ASTExpressionList>();
-                expr_list_with_single_query->children.push_back(query);
-                function_node->arguments = expr_list_with_single_query;
-                function_node->children.push_back(function_node->arguments);
-                node = function_node;
-                return true;
-            }
-        }
-    }
-
     const char * contents_begin = pos->begin;
     if (!contents.parse(pos, expr_list_args, expected))
         return false;
@@ -474,6 +453,49 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     return true;
 }
 
+bool ParserTableFunctionView::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    ParserIdentifier id_parser;
+    ParserKeyword view("VIEW");
+    ParserSelectWithUnionQuery select;
+
+    ASTPtr identifier;
+    ASTPtr query;
+
+    if (!view.ignore(pos, expected))
+        return false;
+
+    if (pos->type != TokenType::OpeningRoundBracket)
+        return false;
+
+    ++pos;
+
+    bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket;
+
+    if (!select.parse(pos, query, expected))
+        return false;
+
+    auto & select_ast = query->as<ASTSelectWithUnionQuery &>();
+    if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery)
+    {
+        // It's an subquery. Bail out.
+        return false;
+    }
+
+    if (pos->type != TokenType::ClosingRoundBracket)
+        return false;
+    ++pos;
+    auto function_node = std::make_shared<ASTFunction>();
+    tryGetIdentifierNameInto(identifier, function_node->name);
+    auto expr_list_with_single_query = std::make_shared<ASTExpressionList>();
+    expr_list_with_single_query->children.push_back(query);
+    function_node->name = "view";
+    function_node->arguments = expr_list_with_single_query;
+    function_node->children.push_back(function_node->arguments);
+    node = function_node;
+    return true;
+}
+
 bool ParserWindowReference::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ASTFunction * function = dynamic_cast<ASTFunction *>(node.get());
@@ -504,6 +526,187 @@ bool ParserWindowReference::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     return parser_definition.parse(pos, function->window_definition, expected);
 }
 
+static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & pos,
+    Expected & expected)
+{
+    ParserKeyword keyword_rows("ROWS");
+    ParserKeyword keyword_groups("GROUPS");
+    ParserKeyword keyword_range("RANGE");
+
+    if (keyword_rows.ignore(pos, expected))
+    {
+        node->frame.type = WindowFrame::FrameType::Rows;
+    }
+    else if (keyword_groups.ignore(pos, expected))
+    {
+        node->frame.type = WindowFrame::FrameType::Groups;
+    }
+    else if (keyword_range.ignore(pos, expected))
+    {
+        node->frame.type = WindowFrame::FrameType::Range;
+    }
+    else
+    {
+        /* No frame clause. */
+        return true;
+    }
+
+    ParserKeyword keyword_between("BETWEEN");
+    ParserKeyword keyword_unbounded("UNBOUNDED");
+    ParserKeyword keyword_preceding("PRECEDING");
+    ParserKeyword keyword_following("FOLLOWING");
+    ParserKeyword keyword_and("AND");
+    ParserKeyword keyword_current_row("CURRENT ROW");
+
+    // There are two variants of grammar for the frame:
+    // 1) ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+    // 2) ROWS UNBOUNDED PRECEDING
+    // When the frame end is not specified (2), it defaults to CURRENT ROW.
+    const bool has_frame_end = keyword_between.ignore(pos, expected);
+
+    if (keyword_current_row.ignore(pos, expected))
+    {
+        node->frame.begin_type = WindowFrame::BoundaryType::Current;
+    }
+    else
+    {
+        ParserLiteral parser_literal;
+        ASTPtr ast_literal;
+        if (keyword_unbounded.ignore(pos, expected))
+        {
+            node->frame.begin_type = WindowFrame::BoundaryType::Unbounded;
+        }
+        else if (parser_literal.parse(pos, ast_literal, expected))
+        {
+            const Field & value = ast_literal->as<ASTLiteral &>().value;
+            if (!isInt64FieldType(value.getType()))
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "Only integer frame offsets are supported, '{}' is not supported.",
+                    Field::Types::toString(value.getType()));
+            }
+            node->frame.begin_offset = value.get<Int64>();
+            node->frame.begin_type = WindowFrame::BoundaryType::Offset;
+            // We can easily get a UINT64_MAX here, which doesn't even fit into
+            // int64_t. Not sure what checks we are going to need here after we
+            // support floats and dates.
+            if (node->frame.begin_offset > INT_MAX || node->frame.begin_offset < INT_MIN)
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "Frame offset must be between {} and {}, but {} is given",
+                    INT_MAX, INT_MIN, node->frame.begin_offset);
+            }
+
+            if (node->frame.begin_offset < 0)
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "Frame start offset must be greater than zero, {} given",
+                    node->frame.begin_offset);
+            }
+        }
+        else
+        {
+            return false;
+        }
+
+        if (keyword_preceding.ignore(pos, expected))
+        {
+            node->frame.begin_preceding = true;
+        }
+        else if (keyword_following.ignore(pos, expected))
+        {
+            node->frame.begin_preceding = false;
+            if (node->frame.begin_type == WindowFrame::BoundaryType::Unbounded)
+            {
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                    "Frame start UNBOUNDED FOLLOWING is not implemented");
+            }
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    if (has_frame_end)
+    {
+        if (!keyword_and.ignore(pos, expected))
+        {
+            return false;
+        }
+
+        if (keyword_current_row.ignore(pos, expected))
+        {
+            node->frame.end_type = WindowFrame::BoundaryType::Current;
+        }
+        else
+        {
+            ParserLiteral parser_literal;
+            ASTPtr ast_literal;
+            if (keyword_unbounded.ignore(pos, expected))
+            {
+                node->frame.end_type = WindowFrame::BoundaryType::Unbounded;
+            }
+            else if (parser_literal.parse(pos, ast_literal, expected))
+            {
+                const Field & value = ast_literal->as<ASTLiteral &>().value;
+                if (!isInt64FieldType(value.getType()))
+                {
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Only integer frame offsets are supported, '{}' is not supported.",
+                        Field::Types::toString(value.getType()));
+                }
+                node->frame.end_offset = value.get<Int64>();
+                node->frame.end_type = WindowFrame::BoundaryType::Offset;
+
+                if (node->frame.end_offset > INT_MAX || node->frame.end_offset < INT_MIN)
+                {
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Frame offset must be between {} and {}, but {} is given",
+                        INT_MAX, INT_MIN, node->frame.end_offset);
+                }
+
+                if (node->frame.end_offset < 0)
+                {
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Frame end offset must be greater than zero, {} given",
+                        node->frame.end_offset);
+                }
+            }
+            else
+            {
+                return false;
+            }
+
+            if (keyword_preceding.ignore(pos, expected))
+            {
+                node->frame.end_preceding = true;
+                if (node->frame.end_type == WindowFrame::BoundaryType::Unbounded)
+                {
+                    throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Frame end UNBOUNDED PRECEDING is not implemented");
+                }
+            }
+            else if (keyword_following.ignore(pos, expected))
+            {
+                // Positive offset or UNBOUNDED FOLLOWING.
+                node->frame.end_preceding = false;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+
+    if (!(node->frame == WindowFrame{}))
+    {
+        node->frame.is_default = false;
+    }
+
+    return true;
+}
+
 bool ParserWindowDefinition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     auto result = std::make_shared<ASTWindowDefinition>();
@@ -548,6 +751,12 @@ bool ParserWindowDefinition::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
         }
     }
 
+    if (!tryParseFrameDefinition(result.get(), pos, expected))
+    {
+        /* Broken frame definition. */
+        return false;
+    }
+
     ParserToken parser_closing_bracket(TokenType::ClosingRoundBracket);
     if (!parser_closing_bracket.ignore(pos, expected))
     {
@@ -655,7 +864,7 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
         expr_list_args->children.push_back(std::move(type_literal));
 
         auto func_node = std::make_shared<ASTFunction>();
-        func_node->name = "cast";
+        func_node->name = "CAST";
         func_node->arguments = std::move(expr_list_args);
         func_node->children.push_back(func_node->arguments);
 
@@ -2008,9 +2217,12 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserIdentifier parser_identifier;
     ParserStringLiteral parser_string_literal;
     ParserExpression parser_exp;
-    ParserExpressionList parser_expression_list(false);
+    ParserExpressionList parser_keys_list(false);
     ParserCodec parser_codec;
 
+    ParserList parser_assignment_list(
+        std::make_unique<ParserAssignment>(), std::make_unique<ParserToken>(TokenType::Comma));
+
     ASTPtr ttl_expr;
     if (!parser_exp.parse(pos, ttl_expr, expected))
         return false;
@@ -2044,9 +2256,9 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     }
 
     ASTPtr where_expr;
-    ASTPtr ast_group_by_key;
+    ASTPtr group_by_key;
     ASTPtr recompression_codec;
-    std::vector<std::pair<String, ASTPtr>> group_by_aggregations;
+    ASTPtr group_by_assignments;
 
     if (mode == TTLMode::MOVE)
     {
@@ -2058,30 +2270,13 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     }
     else if (mode == TTLMode::GROUP_BY)
     {
-        if (!parser_expression_list.parse(pos, ast_group_by_key, expected))
+        if (!parser_keys_list.parse(pos, group_by_key, expected))
             return false;
 
         if (s_set.ignore(pos))
         {
-            while (true)
-            {
-                if (!group_by_aggregations.empty() && !s_comma.ignore(pos))
-                    break;
-
-                ASTPtr name;
-                ASTPtr value;
-                if (!parser_identifier.parse(pos, name, expected))
-                    return false;
-                if (!s_eq.ignore(pos))
-                    return false;
-                if (!parser_exp.parse(pos, value, expected))
-                    return false;
-
-                String name_str;
-                if (!tryGetIdentifierNameInto(name, name_str))
-                    return false;
-                group_by_aggregations.emplace_back(name_str, std::move(value));
-            }
+            if (!parser_assignment_list.parse(pos, group_by_assignments, expected))
+                return false;
         }
     }
     else if (mode == TTLMode::DELETE && s_where.ignore(pos))
@@ -2105,8 +2300,9 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
     if (mode == TTLMode::GROUP_BY)
     {
-        ttl_element->group_by_key = std::move(ast_group_by_key->children);
-        ttl_element->group_by_aggregations = std::move(group_by_aggregations);
+        ttl_element->group_by_key = std::move(group_by_key->children);
+        if (group_by_assignments)
+            ttl_element->group_by_assignments = std::move(group_by_assignments->children);
     }
 
     if (mode == TTLMode::RECOMPRESS)
@@ -2141,4 +2337,31 @@ bool ParserIdentifierWithOptionalParameters::parseImpl(Pos & pos, ASTPtr & node,
     return false;
 }
 
+bool ParserAssignment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    auto assignment = std::make_shared<ASTAssignment>();
+    node = assignment;
+
+    ParserIdentifier p_identifier;
+    ParserToken s_equals(TokenType::Equals);
+    ParserExpression p_expression;
+
+    ASTPtr column;
+    if (!p_identifier.parse(pos, column, expected))
+        return false;
+
+    if (!s_equals.ignore(pos, expected))
+        return false;
+
+    ASTPtr expression;
+    if (!p_expression.parse(pos, expression, expected))
+        return false;
+
+    tryGetIdentifierNameInto(column, assignment->column_name);
+    if (expression)
+        assignment->children.push_back(expression);
+
+    return true;
+}
+
 }
diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h
index 6369e14aa58..b6194f981fe 100644
--- a/src/Parsers/ExpressionElementParsers.h
+++ b/src/Parsers/ExpressionElementParsers.h
@@ -149,11 +149,25 @@ protected:
 class ParserFunction : public IParserBase
 {
 public:
-    ParserFunction(bool allow_function_parameters_ = true) : allow_function_parameters(allow_function_parameters_) {}
+    ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false)
+        : allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_)
+    {
+    }
+
 protected:
     const char * getName() const override { return "function"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
     bool allow_function_parameters;
+    bool is_table_function;
+};
+
+// A special function parser for view table function.
+// It parses an SELECT query as its argument and doesn't support getColumnName().
+class ParserTableFunctionView : public IParserBase
+{
+protected:
+    const char * getName() const override { return "function"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
 };
 
 // Window reference (the thing that goes after OVER) for window function.
@@ -483,4 +497,12 @@ protected:
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
 };
 
+/// Part of the UPDATE command or TTL with GROUP BY of the form: col_name = expr
+class ParserAssignment : public IParserBase
+{
+protected:
+    const char * getName() const  override{ return "column assignment"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
 }
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index afe85f069c7..e9ad65af471 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -468,6 +468,14 @@ bool ParserLambdaExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
 }
 
 
+bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    if (ParserTableFunctionView().parse(pos, node, expected))
+        return true;
+    return elem_parser.parse(pos, node, expected);
+}
+
+
 bool ParserPrefixUnaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     /// try to find any of the valid operators
@@ -570,9 +578,10 @@ bool ParserTupleElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected
 }
 
 
-ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword)
-    : impl(std::make_unique<ParserWithOptionalAlias>(std::make_unique<ParserExpression>(),
-                                                     allow_alias_without_as_keyword))
+ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function)
+    : impl(std::make_unique<ParserWithOptionalAlias>(
+        is_table_function ? ParserPtr(std::make_unique<ParserTableFunctionExpression>()) : ParserPtr(std::make_unique<ParserExpression>()),
+        allow_alias_without_as_keyword))
 {
 }
 
@@ -580,7 +589,7 @@ ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_
 bool ParserExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     return ParserList(
-        std::make_unique<ParserExpressionWithOptionalAlias>(allow_alias_without_as_keyword),
+        std::make_unique<ParserExpressionWithOptionalAlias>(allow_alias_without_as_keyword, is_table_function),
         std::make_unique<ParserToken>(TokenType::Comma))
         .parse(pos, node, expected);
 }
diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h
index 90b27950873..2371e006c09 100644
--- a/src/Parsers/ExpressionListParsers.h
+++ b/src/Parsers/ExpressionListParsers.h
@@ -436,13 +436,26 @@ protected:
 };
 
 
+// It's used to parse expressions in table function.
+class ParserTableFunctionExpression : public IParserBase
+{
+private:
+    ParserLambdaExpression elem_parser;
+
+protected:
+    const char * getName() const override { return "table function expression"; }
+
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+
 using ParserExpression = ParserLambdaExpression;
 
 
 class ParserExpressionWithOptionalAlias : public IParserBase
 {
 public:
-    ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword);
+    explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function = false);
 protected:
     ParserPtr impl;
 
@@ -459,11 +472,12 @@ protected:
 class ParserExpressionList : public IParserBase
 {
 public:
-    ParserExpressionList(bool allow_alias_without_as_keyword_)
-        : allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {}
+    explicit ParserExpressionList(bool allow_alias_without_as_keyword_, bool is_table_function_ = false)
+        : allow_alias_without_as_keyword(allow_alias_without_as_keyword_), is_table_function(is_table_function_) {}
 
 protected:
     bool allow_alias_without_as_keyword;
+    bool is_table_function; // This expression list is used by a table function
 
     const char * getName() const override { return "list of expressions"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
@@ -473,7 +487,7 @@ protected:
 class ParserNotEmptyExpressionList : public IParserBase
 {
 public:
-    ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword)
+    explicit ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword)
         : nested_parser(allow_alias_without_as_keyword) {}
 private:
     ParserExpressionList nested_parser;
diff --git a/src/Parsers/New/CMakeLists.txt b/src/Parsers/New/CMakeLists.txt
index 360dd4d7488..468394b7bd8 100644
--- a/src/Parsers/New/CMakeLists.txt
+++ b/src/Parsers/New/CMakeLists.txt
@@ -65,8 +65,6 @@ target_compile_options (clickhouse_parsers_new
         -Wno-documentation-deprecated-sync
         -Wno-shadow-field
         -Wno-unused-parameter
-
-    PUBLIC
         -Wno-extra-semi
         -Wno-inconsistent-missing-destructor-override
 )
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index f916537f438..5d20e27e486 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -11,7 +11,6 @@
 #include <Parsers/ASTIndexDeclaration.h>
 #include <Parsers/ASTAlterQuery.h>
 #include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTAssignment.h>
 #include <Parsers/parseDatabaseAndTableName.h>
 
 
@@ -651,34 +650,6 @@ bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
 }
 
 
-bool ParserAssignment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
-    auto assignment = std::make_shared<ASTAssignment>();
-    node = assignment;
-
-    ParserIdentifier p_identifier;
-    ParserToken s_equals(TokenType::Equals);
-    ParserExpression p_expression;
-
-    ASTPtr column;
-    if (!p_identifier.parse(pos, column, expected))
-        return false;
-
-    if (!s_equals.ignore(pos, expected))
-        return false;
-
-    ASTPtr expression;
-    if (!p_expression.parse(pos, expression, expected))
-        return false;
-
-    tryGetIdentifierNameInto(column, assignment->column_name);
-    if (expression)
-        assignment->children.push_back(expression);
-
-    return true;
-}
-
-
 bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     auto query = std::make_shared<ASTAlterQuery>();
diff --git a/src/Parsers/ParserAlterQuery.h b/src/Parsers/ParserAlterQuery.h
index 514ef876430..b22b1c6ded2 100644
--- a/src/Parsers/ParserAlterQuery.h
+++ b/src/Parsers/ParserAlterQuery.h
@@ -63,12 +63,4 @@ public:
 };
 
 
-/// Part of the UPDATE command of the form: col_name = expr
-class ParserAssignment : public IParserBase
-{
-protected:
-    const char * getName() const  override{ return "column assignment"; }
-    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
 }
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 552ea9df320..4cef79fdf42 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -569,10 +569,14 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
     ASTPtr as_table;
     ASTPtr select;
     ASTPtr live_view_timeout;
+    ASTPtr live_view_periodic_refresh;
 
     String cluster_str;
     bool attach = false;
     bool if_not_exists = false;
+    bool with_and = false;
+    bool with_timeout = false;
+    bool with_periodic_refresh = false;
 
     if (!s_create.ignore(pos, expected))
     {
@@ -594,10 +598,35 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
     if (!table_name_p.parse(pos, table, expected))
         return false;
 
-    if (ParserKeyword{"WITH TIMEOUT"}.ignore(pos, expected))
+    if (ParserKeyword{"WITH"}.ignore(pos, expected))
     {
-        if (!ParserNumber{}.parse(pos, live_view_timeout, expected))
-            live_view_timeout = std::make_shared<ASTLiteral>(static_cast<UInt64>(DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC));
+        if (ParserKeyword{"TIMEOUT"}.ignore(pos, expected))
+        {
+            if (!ParserNumber{}.parse(pos, live_view_timeout, expected))
+            {
+                live_view_timeout = std::make_shared<ASTLiteral>(static_cast<UInt64>(DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC));
+            }
+
+            /// Optional - AND
+            if (ParserKeyword{"AND"}.ignore(pos, expected))
+                with_and = true;
+
+            with_timeout = true;
+        }
+
+        if (ParserKeyword{"REFRESH"}.ignore(pos, expected) || ParserKeyword{"PERIODIC REFRESH"}.ignore(pos, expected))
+        {
+            if (!ParserNumber{}.parse(pos, live_view_periodic_refresh, expected))
+                live_view_periodic_refresh = std::make_shared<ASTLiteral>(static_cast<UInt64>(DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC));
+
+            with_periodic_refresh = true;
+        }
+
+        else if (with_and)
+            return false;
+
+        if (!with_timeout && !with_periodic_refresh)
+            return false;
     }
 
     if (ParserKeyword{"ON"}.ignore(pos, expected))
@@ -656,6 +685,9 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
     if (live_view_timeout)
         query->live_view_timeout.emplace(live_view_timeout->as<ASTLiteral &>().value.safeGet<UInt64>());
 
+    if (live_view_periodic_refresh)
+        query->live_view_periodic_refresh.emplace(live_view_periodic_refresh->as<ASTLiteral &>().value.safeGet<UInt64>());
+
     return true;
 }
 
diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp
index 0148f2f3bb9..dd495fe6d53 100644
--- a/src/Parsers/ParserDataType.cpp
+++ b/src/Parsers/ParserDataType.cpp
@@ -14,21 +14,31 @@ namespace
 {
 
 /// Wrapper to allow mixed lists of nested and normal types.
-class ParserNestedTableOrExpression : public IParserBase
+/// Parameters are either:
+/// - Nested table elements;
+/// - Enum element in form of 'a' = 1;
+/// - literal;
+/// - another data type (or identifier)
+class ParserDataTypeArgument : public IParserBase
 {
-    private:
-        const char * getName() const override { return "data type or expression"; }
-        bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
-        {
-            ParserNestedTable parser1;
+private:
+    const char * getName() const override { return "data type argument"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
+    {
+        ParserNestedTable nested_parser;
+        ParserDataType data_type_parser;
+        ParserLiteral literal_parser;
 
-            if (parser1.parse(pos, node, expected))
-                return true;
+        const char * operators[] = {"=", "equals", nullptr};
+        ParserLeftAssociativeBinaryOperatorList enum_parser(operators, std::make_unique<ParserLiteral>());
 
-            ParserExpression parser2;
+        if (pos->type == TokenType::BareWord && std::string_view(pos->begin, pos->size()) == "Nested")
+            return nested_parser.parse(pos, node, expected);
 
-            return parser2.parse(pos, node, expected);
-        }
+        return enum_parser.parse(pos, node, expected)
+            || literal_parser.parse(pos, node, expected)
+            || data_type_parser.parse(pos, node, expected);
+    }
 };
 
 }
@@ -104,7 +114,7 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ++pos;
 
     /// Parse optional parameters
-    ParserList args_parser(std::make_unique<ParserNestedTableOrExpression>(), std::make_unique<ParserToken>(TokenType::Comma));
+    ParserList args_parser(std::make_unique<ParserDataTypeArgument>(), std::make_unique<ParserToken>(TokenType::Comma));
     ASTPtr expr_list_args;
 
     if (!args_parser.parse(pos, expr_list_args, expected))
diff --git a/src/Parsers/ParserInsertQuery.h b/src/Parsers/ParserInsertQuery.h
index b6a199c9d71..f98e433551d 100644
--- a/src/Parsers/ParserInsertQuery.h
+++ b/src/Parsers/ParserInsertQuery.h
@@ -30,7 +30,7 @@ private:
     const char * getName() const override { return "INSERT query"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
 public:
-    ParserInsertQuery(const char * end_) : end(end_) {}
+    explicit ParserInsertQuery(const char * end_) : end(end_) {}
 };
 
 /** Insert accepts an identifier and an asterisk with variants.
diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp
index 1264acefe64..2e20279dbe1 100644
--- a/src/Parsers/ParserTablesInSelectQuery.cpp
+++ b/src/Parsers/ParserTablesInSelectQuery.cpp
@@ -22,7 +22,7 @@ bool ParserTableExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     auto res = std::make_shared<ASTTableExpression>();
 
     if (!ParserWithOptionalAlias(std::make_unique<ParserSubquery>(), true).parse(pos, res->subquery, expected)
-        && !ParserWithOptionalAlias(std::make_unique<ParserFunction>(), true).parse(pos, res->table_function, expected)
+        && !ParserWithOptionalAlias(std::make_unique<ParserFunction>(true, true), true).parse(pos, res->table_function, expected)
         && !ParserWithOptionalAlias(std::make_unique<ParserCompoundIdentifier>(false, true), true).parse(pos, res->database_and_table_name, expected))
         return false;
 
diff --git a/src/Parsers/TokenIterator.cpp b/src/Parsers/TokenIterator.cpp
index 18360ed29ae..08877e0b2fe 100644
--- a/src/Parsers/TokenIterator.cpp
+++ b/src/Parsers/TokenIterator.cpp
@@ -4,13 +4,14 @@
 namespace DB
 {
 
-UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last)
+UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin)
 {
     /// We have just two kind of parentheses: () and [].
     UnmatchedParentheses stack;
 
-    for (TokenIterator it = begin;
-        it.isValid() && it->begin <= last.begin; ++it)
+    /// We have to iterate through all tokens until the end to avoid false positive "Unmatched parentheses" error
+    /// when parser failed in the middle of the query.
+    for (TokenIterator it = begin; it.isValid(); ++it)
     {
         if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket)
         {
diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h
index a95465500e0..e3a5b9f79c3 100644
--- a/src/Parsers/TokenIterator.h
+++ b/src/Parsers/TokenIterator.h
@@ -80,6 +80,6 @@ public:
 
 /// Returns positions of unmatched parentheses.
 using UnmatchedParentheses = std::vector<Token>;
-UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last);
+UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin);
 
 }
diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp
index 48a92534e74..aeb57b7f9b6 100644
--- a/src/Parsers/parseQuery.cpp
+++ b/src/Parsers/parseQuery.cpp
@@ -79,7 +79,7 @@ void writeQueryWithHighlightedErrorPositions(
     {
         const char * current_position_to_hilite = positions_to_hilite[position_to_hilite_idx].begin;
 
-        assert(current_position_to_hilite < end);
+        assert(current_position_to_hilite <= end);
         assert(current_position_to_hilite >= begin);
 
         out.write(pos, current_position_to_hilite - pos);
@@ -269,14 +269,6 @@ ASTPtr tryParseQuery(
     // most of the checks.
     if (insert && insert->data)
     {
-        if (!parse_res)
-        {
-            // Generic parse error.
-            out_error_message = getSyntaxErrorMessage(query_begin, all_queries_end,
-                last_token, expected, hilite, query_description);
-            return nullptr;
-        }
-
         return res;
     }
 
@@ -290,7 +282,7 @@ ASTPtr tryParseQuery(
     }
 
     /// Unmatched parentheses
-    UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens), last_token);
+    UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens));
     if (!unmatched_parens.empty())
     {
         out_error_message = getUnmatchedParenthesesErrorMessage(query_begin,
diff --git a/src/Processors/DelayedPortsProcessor.cpp b/src/Processors/DelayedPortsProcessor.cpp
index cb181a4e4ac..ae4ba4659aa 100644
--- a/src/Processors/DelayedPortsProcessor.cpp
+++ b/src/Processors/DelayedPortsProcessor.cpp
@@ -12,7 +12,7 @@ DelayedPortsProcessor::DelayedPortsProcessor(
     const Block & header, size_t num_ports, const PortNumbers & delayed_ports, bool assert_main_ports_empty)
     : IProcessor(InputPorts(num_ports, header),
                  OutputPorts((assert_main_ports_empty ? delayed_ports.size() : num_ports), header))
-    , num_delayed(delayed_ports.size())
+    , num_delayed_ports(delayed_ports.size())
 {
     port_pairs.resize(num_ports);
     output_to_pair.reserve(outputs.size());
@@ -36,29 +36,34 @@ DelayedPortsProcessor::DelayedPortsProcessor(
     }
 }
 
+void DelayedPortsProcessor::finishPair(PortsPair & pair)
+{
+    if (!pair.is_finished)
+    {
+        if (pair.output_port)
+            pair.output_port->finish();
+
+        pair.input_port->close();
+
+        pair.is_finished = true;
+        ++num_finished_pairs;
+
+        if (pair.output_port)
+            ++num_finished_outputs;
+    }
+}
+
 bool DelayedPortsProcessor::processPair(PortsPair & pair)
 {
-    auto finish = [&]()
-    {
-        if (!pair.is_finished)
-        {
-            pair.is_finished = true;
-            ++num_finished;
-        }
-    };
-
     if (pair.output_port && pair.output_port->isFinished())
     {
-        pair.input_port->close();
-        finish();
+        finishPair(pair);
         return false;
     }
 
     if (pair.input_port->isFinished())
     {
-        if (pair.output_port)
-            pair.output_port->finish();
-        finish();
+        finishPair(pair);
         return false;
     }
 
@@ -72,7 +77,7 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair)
             throw Exception(ErrorCodes::LOGICAL_ERROR,
                             "Input port for DelayedPortsProcessor is assumed to have no data, but it has one");
 
-        pair.output_port->pushData(pair.input_port->pullData());
+        pair.output_port->pushData(pair.input_port->pullData(true));
     }
 
     return true;
@@ -80,7 +85,7 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair)
 
 IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs)
 {
-    bool skip_delayed = (num_finished + num_delayed) < port_pairs.size();
+    bool skip_delayed = (num_finished_pairs + num_delayed_ports) < port_pairs.size();
     bool need_data = false;
 
     if (!are_inputs_initialized && !updated_outputs.empty())
@@ -95,9 +100,22 @@ IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_in
 
     for (const auto & output_number : updated_outputs)
     {
-        auto pair_num = output_to_pair[output_number];
-        if (!skip_delayed || !port_pairs[pair_num].is_delayed)
-            need_data = processPair(port_pairs[pair_num]) || need_data;
+        auto & pair = port_pairs[output_to_pair[output_number]];
+
+        /// Finish pair of ports earlier if possible.
+        if (!pair.is_finished && pair.output_port && pair.output_port->isFinished())
+            finishPair(pair);
+        else if (!skip_delayed || !pair.is_delayed)
+            need_data = processPair(pair) || need_data;
+    }
+
+    /// Do not wait for delayed ports if all output ports are finished.
+    if (num_finished_outputs == outputs.size())
+    {
+        for (auto & pair : port_pairs)
+            finishPair(pair);
+
+        return Status::Finished;
     }
 
     for (const auto & input_number : updated_inputs)
@@ -107,14 +125,14 @@ IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_in
     }
 
     /// In case if main streams are finished at current iteration, start processing delayed streams.
-    if (skip_delayed && (num_finished + num_delayed) >= port_pairs.size())
+    if (skip_delayed && (num_finished_pairs + num_delayed_ports) >= port_pairs.size())
     {
         for (auto & pair : port_pairs)
             if (pair.is_delayed)
                 need_data = processPair(pair) || need_data;
     }
 
-    if (num_finished == port_pairs.size())
+    if (num_finished_pairs == port_pairs.size())
         return Status::Finished;
 
     if (need_data)
diff --git a/src/Processors/DelayedPortsProcessor.h b/src/Processors/DelayedPortsProcessor.h
index 3e44c945bd4..a6a9590e0c8 100644
--- a/src/Processors/DelayedPortsProcessor.h
+++ b/src/Processors/DelayedPortsProcessor.h
@@ -28,13 +28,15 @@ private:
     };
 
     std::vector<PortsPair> port_pairs;
-    size_t num_delayed;
-    size_t num_finished = 0;
+    const size_t num_delayed_ports;
+    size_t num_finished_pairs = 0;
+    size_t num_finished_outputs = 0;
 
     std::vector<size_t> output_to_pair;
     bool are_inputs_initialized = false;
 
     bool processPair(PortsPair & pair);
+    void finishPair(PortsPair & pair);
 };
 
 }
diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp
index 6192828784f..a724f22ed31 100644
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@@ -540,7 +540,12 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, size_t num_threads, st
                     /// If we execute in single thread, wait for async tasks here.
                     auto res = async_task_queue.wait(lock);
                     if (!res)
+                    {
+                        /// The query had been cancelled (finished is also set)
+                        if (finished)
+                            break;
                         throw Exception("Empty task was returned from async task queue", ErrorCodes::LOGICAL_ERROR);
+                    }
 
                     node = static_cast<ExecutingGraph::Node *>(res.data);
                     break;
diff --git a/src/Processors/Executors/PollingQueue.cpp b/src/Processors/Executors/PollingQueue.cpp
index 93edfe53987..3636fa82f73 100644
--- a/src/Processors/Executors/PollingQueue.cpp
+++ b/src/Processors/Executors/PollingQueue.cpp
@@ -88,11 +88,12 @@ PollingQueue::TaskData PollingQueue::wait(std::unique_lock<std::mutex> & lock)
     event.data.ptr = nullptr;
     int num_events = 0;
 
-    while (num_events == 0)
+    while (num_events <= 0)
     {
-        num_events = epoll_wait(epoll_fd, &event, 1, 0);
-        if (num_events == -1)
-            throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+        num_events = epoll_wait(epoll_fd, &event, 1, -1);
+
+        if (num_events == -1 && errno != EINTR)
+                throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
     }
 
     lock.lock();
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
index 8422f09e364..f7f08411dfa 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@@ -15,6 +15,7 @@ namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
     extern const int INCORRECT_DATA;
+    extern const int LOGICAL_ERROR;
 }
 
 
@@ -436,9 +437,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
         if (quotes)
         {
             pos = find_first_symbols<'"'>(pos, in.buffer().end());
-            if (pos == in.buffer().end())
+            if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+            else if (pos == in.buffer().end())
                 continue;
-            if (*pos == '"')
+            else if (*pos == '"')
             {
                 ++pos;
                 if (loadAtPosition(in, memory, pos) && *pos == '"')
@@ -450,9 +453,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
         else
         {
             pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end());
-            if (pos == in.buffer().end())
+            if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+            else if (pos == in.buffer().end())
                 continue;
-            if (*pos == '"')
+            else if (*pos == '"')
             {
                 quotes = true;
                 ++pos;
diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
index d7a65c2f15d..1685688f02d 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@@ -626,7 +626,7 @@ void ConstantExpressionTemplate::TemplateStructure::addNodesToCastResult(const I
         expr = makeASTFunction("assumeNotNull", std::move(expr));
     }
 
-    expr = makeASTFunction("cast", std::move(expr), std::make_shared<ASTLiteral>(result_column_type.getName()));
+    expr = makeASTFunction("CAST", std::move(expr), std::make_shared<ASTLiteral>(result_column_type.getName()));
 
     if (null_as_default)
     {
diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index 019ffc73d81..ec6a7a65573 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -133,112 +133,112 @@ ORC_UNIQUE_PTR<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr & t
 
 template <typename NumberType, typename NumberVectorBatch, typename ConvertFunc>
 void ORCBlockOutputFormat::writeNumbers(
-        orc::ColumnVectorBatch * orc_column,
+        orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
         const PaddedPODArray<UInt8> * null_bytemap,
         ConvertFunc convert)
 {
-    NumberVectorBatch * number_orc_column = dynamic_cast<NumberVectorBatch *>(orc_column);
+    NumberVectorBatch & number_orc_column = dynamic_cast<NumberVectorBatch &>(orc_column);
     const auto & number_column = assert_cast<const ColumnVector<NumberType> &>(column);
-    number_orc_column->resize(number_column.size());
+    number_orc_column.resize(number_column.size());
 
     for (size_t i = 0; i != number_column.size(); ++i)
     {
         if (null_bytemap && (*null_bytemap)[i])
         {
-            number_orc_column->notNull[i] = 0;
+            number_orc_column.notNull[i] = 0;
             continue;
         }
-        number_orc_column->data[i] = convert(number_column.getElement(i));
+        number_orc_column.data[i] = convert(number_column.getElement(i));
     }
-    number_orc_column->numElements = number_column.size();
+    number_orc_column.numElements = number_column.size();
 }
 
 template <typename Decimal, typename DecimalVectorBatch, typename ConvertFunc>
 void ORCBlockOutputFormat::writeDecimals(
-        orc::ColumnVectorBatch * orc_column,
+        orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
         DataTypePtr & type,
         const PaddedPODArray<UInt8> * null_bytemap,
         ConvertFunc convert)
 {
-    DecimalVectorBatch *decimal_orc_column = dynamic_cast<DecimalVectorBatch *>(orc_column);
+    DecimalVectorBatch & decimal_orc_column = dynamic_cast<DecimalVectorBatch &>(orc_column);
     const auto & decimal_column = assert_cast<const ColumnDecimal<Decimal> &>(column);
     const auto * decimal_type = typeid_cast<const DataTypeDecimal<Decimal> *>(type.get());
-    decimal_orc_column->precision = decimal_type->getPrecision();
-    decimal_orc_column->scale = decimal_type->getScale();
-    decimal_orc_column->resize(decimal_column.size());
+    decimal_orc_column.precision = decimal_type->getPrecision();
+    decimal_orc_column.scale = decimal_type->getScale();
+    decimal_orc_column.resize(decimal_column.size());
     for (size_t i = 0; i != decimal_column.size(); ++i)
     {
         if (null_bytemap && (*null_bytemap)[i])
         {
-            decimal_orc_column->notNull[i] = 0;
+            decimal_orc_column.notNull[i] = 0;
             continue;
         }
-        decimal_orc_column->values[i] = convert(decimal_column.getElement(i).value);
+        decimal_orc_column.values[i] = convert(decimal_column.getElement(i).value);
     }
-    decimal_orc_column->numElements = decimal_column.size();
+    decimal_orc_column.numElements = decimal_column.size();
 }
 
 template <typename ColumnType>
 void ORCBlockOutputFormat::writeStrings(
-        orc::ColumnVectorBatch * orc_column,
+        orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
         const PaddedPODArray<UInt8> * null_bytemap)
 {
-    orc::StringVectorBatch * string_orc_column = dynamic_cast<orc::StringVectorBatch *>(orc_column);
+    orc::StringVectorBatch & string_orc_column = dynamic_cast<orc::StringVectorBatch &>(orc_column);
     const auto & string_column = assert_cast<const ColumnType &>(column);
-    string_orc_column->resize(string_column.size());
+    string_orc_column.resize(string_column.size());
 
     for (size_t i = 0; i != string_column.size(); ++i)
     {
         if (null_bytemap && (*null_bytemap)[i])
         {
-            string_orc_column->notNull[i] = 0;
+            string_orc_column.notNull[i] = 0;
             continue;
         }
         const StringRef & string = string_column.getDataAt(i);
-        string_orc_column->data[i] = const_cast<char *>(string.data);
-        string_orc_column->length[i] = string.size;
+        string_orc_column.data[i] = const_cast<char *>(string.data);
+        string_orc_column.length[i] = string.size;
     }
-    string_orc_column->numElements = string_column.size();
+    string_orc_column.numElements = string_column.size();
 }
 
 template <typename ColumnType, typename GetSecondsFunc, typename GetNanosecondsFunc>
 void ORCBlockOutputFormat::writeDateTimes(
-        orc::ColumnVectorBatch * orc_column,
+        orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
         const PaddedPODArray<UInt8> * null_bytemap,
         GetSecondsFunc get_seconds,
         GetNanosecondsFunc get_nanoseconds)
 {
-    orc::TimestampVectorBatch * timestamp_orc_column = dynamic_cast<orc::TimestampVectorBatch *>(orc_column);
+    orc::TimestampVectorBatch & timestamp_orc_column = dynamic_cast<orc::TimestampVectorBatch &>(orc_column);
     const auto & timestamp_column = assert_cast<const ColumnType &>(column);
-    timestamp_orc_column->resize(timestamp_column.size());
+    timestamp_orc_column.resize(timestamp_column.size());
 
     for (size_t i = 0; i != timestamp_column.size(); ++i)
     {
         if (null_bytemap && (*null_bytemap)[i])
         {
-            timestamp_orc_column->notNull[i] = 0;
+            timestamp_orc_column.notNull[i] = 0;
             continue;
         }
-        timestamp_orc_column->data[i] = get_seconds(timestamp_column.getElement(i));
-        timestamp_orc_column->nanoseconds[i] = get_nanoseconds(timestamp_column.getElement(i));
+        timestamp_orc_column.data[i] = get_seconds(timestamp_column.getElement(i));
+        timestamp_orc_column.nanoseconds[i] = get_nanoseconds(timestamp_column.getElement(i));
     }
-    timestamp_orc_column->numElements = timestamp_column.size();
+    timestamp_orc_column.numElements = timestamp_column.size();
 }
 
 void ORCBlockOutputFormat::writeColumn(
-        orc::ColumnVectorBatch * orc_column,
-        const IColumn & column,
-        DataTypePtr & type,
-        const PaddedPODArray<UInt8> * null_bytemap)
+    orc::ColumnVectorBatch & orc_column,
+    const IColumn & column,
+    DataTypePtr & type,
+    const PaddedPODArray<UInt8> * null_bytemap)
 {
     if (null_bytemap)
     {
-        orc_column->hasNulls = true;
-        orc_column->notNull.resize(column.size());
+        orc_column.hasNulls = true;
+        orc_column.notNull.resize(column.size());
     }
     switch (type->getTypeId())
     {
@@ -364,20 +364,20 @@ void ORCBlockOutputFormat::writeColumn(
         }
         case TypeIndex::Array:
         {
-            orc::ListVectorBatch * list_orc_column = dynamic_cast<orc::ListVectorBatch *>(orc_column);
+            orc::ListVectorBatch & list_orc_column = dynamic_cast<orc::ListVectorBatch &>(orc_column);
             const auto & list_column = assert_cast<const ColumnArray &>(column);
             auto nested_type = assert_cast<const DataTypeArray &>(*type).getNestedType();
             const ColumnArray::Offsets & offsets = list_column.getOffsets();
-            list_orc_column->resize(list_column.size());
+            list_orc_column.resize(list_column.size());
             /// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i].
-            list_orc_column->offsets[0] = 0;
+            list_orc_column.offsets[0] = 0;
             for (size_t i = 0; i != list_column.size(); ++i)
             {
-                list_orc_column->offsets[i + 1] = offsets[i];
+                list_orc_column.offsets[i + 1] = offsets[i];
             }
-            orc::ColumnVectorBatch * nested_orc_column = list_orc_column->elements.get();
+            orc::ColumnVectorBatch & nested_orc_column = *list_orc_column.elements;
             writeColumn(nested_orc_column, list_column.getData(), nested_type, null_bytemap);
-            list_orc_column->numElements = list_column.size();
+            list_orc_column.numElements = list_column.size();
             break;
         }
         default:
@@ -414,12 +414,12 @@ void ORCBlockOutputFormat::consume(Chunk chunk)
     /// getMaxColumnSize is needed to write arrays.
     /// The size of the batch must be no less than total amount of array elements.
     ORC_UNIQUE_PTR<orc::ColumnVectorBatch> batch = writer->createRowBatch(getMaxColumnSize(chunk));
-    orc::StructVectorBatch *root = dynamic_cast<orc::StructVectorBatch *>(batch.get());
+    orc::StructVectorBatch & root = dynamic_cast<orc::StructVectorBatch &>(*batch);
     for (size_t i = 0; i != columns_num; ++i)
     {
-        writeColumn(root->fields[i], *chunk.getColumns()[i], data_types[i], nullptr);
+        writeColumn(*root.fields[i], *chunk.getColumns()[i], data_types[i], nullptr);
     }
-    root->numElements = rows_num;
+    root.numElements = rows_num;
     writer->add(*batch);
 }
 
diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h
index ce599dabe23..05053317533 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h
@@ -48,23 +48,23 @@ private:
     /// ConvertFunc is needed for type UInt8, because firstly UInt8 (char8_t) must be
     /// converted to unsigned char (bugprone-signed-char-misuse in clang).
     template <typename NumberType, typename NumberVectorBatch, typename ConvertFunc>
-    void writeNumbers(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap, ConvertFunc convert);
+    void writeNumbers(orc::ColumnVectorBatch & orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap, ConvertFunc convert);
 
     /// ConvertFunc is needed to convert ClickHouse Int128 to ORC Int128.
     template <typename Decimal, typename DecimalVectorBatch, typename ConvertFunc>
-    void writeDecimals(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type,
+    void writeDecimals(orc::ColumnVectorBatch & orc_column, const IColumn & column, DataTypePtr & type,
                         const PaddedPODArray<UInt8> * null_bytemap, ConvertFunc convert);
 
     template <typename ColumnType>
-    void writeStrings(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap);
+    void writeStrings(orc::ColumnVectorBatch & orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap);
 
     /// ORC column TimestampVectorBatch stores only seconds and nanoseconds,
     /// GetSecondsFunc and GetNanosecondsFunc are needed to extract them from DataTime type.
     template <typename ColumnType, typename GetSecondsFunc, typename GetNanosecondsFunc>
-    void writeDateTimes(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap,
+    void writeDateTimes(orc::ColumnVectorBatch & orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap,
                         GetSecondsFunc get_seconds, GetNanosecondsFunc get_nanoseconds);
 
-    void writeColumn(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray<UInt8> * null_bytemap);
+    void writeColumn(orc::ColumnVectorBatch & orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray<UInt8> * null_bytemap);
 
     /// These two functions are needed to know maximum nested size of arrays to
     /// create an ORC Batch with the appropriate size
diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
index eda2665119a..0ebca3661b4 100644
--- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
@@ -147,12 +147,13 @@ namespace DB
             /// We want to preallocate memory buffer (increase capacity)
             /// and put the pointer at the beginning of the buffer
             unit.segment.resize(DBMS_DEFAULT_BUFFER_SIZE);
-            /// The second invocation won't release memory, only set size equals to 0.
-            unit.segment.resize(0);
 
             unit.actual_memory_size = 0;
             BufferWithOutsideMemory<WriteBuffer> out_buffer(unit.segment);
 
+            /// The second invocation won't release memory, only set size equals to 0.
+            unit.segment.resize(0);
+
             auto formatter = internal_formatter_creator(out_buffer);
 
             switch (unit.type)
diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
index d1420d0d38e..22a758b80f6 100644
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
@@ -1,57 +1,48 @@
 #include "ProtobufRowInputFormat.h"
 
 #if USE_PROTOBUF
-#include <Core/Block.h>
-#include <Formats/FormatFactory.h>
-#include <Formats/FormatSchemaInfo.h>
-#include <Formats/ProtobufSchemas.h>
-#include <Interpreters/Context.h>
+#   include <Core/Block.h>
+#   include <Formats/FormatFactory.h>
+#   include <Formats/FormatSchemaInfo.h>
+#   include <Formats/ProtobufReader.h>
+#   include <Formats/ProtobufSchemas.h>
+#   include <Formats/ProtobufSerializer.h>
+#   include <Interpreters/Context.h>
+#   include <ext/range.h>
 
 
 namespace DB
 {
-
-ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_, const bool use_length_delimiters_)
+ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_)
     : IRowInputFormat(header_, in_, params_)
-    , data_types(header_.getDataTypes())
-    , reader(in, ProtobufSchemas::instance().getMessageTypeForFormatSchema(info_), header_.getNames(), use_length_delimiters_)
+    , reader(std::make_unique<ProtobufReader>(in_))
+    , serializer(ProtobufSerializer::create(
+          header_.getNames(),
+          header_.getDataTypes(),
+          missing_column_indices,
+          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
+          with_length_delimiter_,
+         *reader))
 {
 }
 
 ProtobufRowInputFormat::~ProtobufRowInputFormat() = default;
 
-bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & extra)
+bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & row_read_extension)
 {
-    if (!reader.startMessage())
-        return false; // EOF reached, no more messages.
+    if (reader->eof())
+        return false;
 
-    // Set of columns for which the values were read. The rest will be filled with default values.
-    auto & read_columns = extra.read_columns;
-    read_columns.assign(columns.size(), false);
+    size_t row_num = columns.empty() ? 0 : columns[0]->size();
+    if (!row_num)
+        serializer->setColumns(columns.data(), columns.size());
 
-    // Read values from this message and put them to the columns while it's possible.
-    size_t column_index;
-    while (reader.readColumnIndex(column_index))
-    {
-        bool allow_add_row = !static_cast<bool>(read_columns[column_index]);
-        do
-        {
-            bool row_added;
-            data_types[column_index]->deserializeProtobuf(*columns[column_index], reader, allow_add_row, row_added);
-            if (row_added)
-            {
-                read_columns[column_index] = true;
-                allow_add_row = false;
-            }
-        } while (reader.canReadMoreValues());
-    }
+    serializer->readRow(row_num);
 
-    // Fill non-visited columns with the default values.
-    for (column_index = 0; column_index < read_columns.size(); ++column_index)
-        if (!read_columns[column_index])
-            data_types[column_index]->insertDefaultInto(*columns[column_index]);
-
-    reader.endMessage();
+    row_read_extension.read_columns.clear();
+    row_read_extension.read_columns.resize(columns.size(), true);
+    for (size_t column_idx : missing_column_indices)
+        row_read_extension.read_columns[column_idx] = false;
     return true;
 }
 
@@ -62,14 +53,14 @@ bool ProtobufRowInputFormat::allowSyncAfterError() const
 
 void ProtobufRowInputFormat::syncAfterError()
 {
-    reader.endMessage(true);
+    reader->endMessage(true);
 }
 
 void registerInputFormatProcessorProtobuf(FormatFactory & factory)
 {
-    for (bool use_length_delimiters : {false, true})
+    for (bool with_length_delimiter : {false, true})
     {
-        factory.registerInputFormatProcessor(use_length_delimiters ? "Protobuf" : "ProtobufSingle", [use_length_delimiters](
+        factory.registerInputFormatProcessor(with_length_delimiter ? "Protobuf" : "ProtobufSingle", [with_length_delimiter](
             ReadBuffer & buf,
             const Block & sample,
             IRowInputFormat::Params params,
@@ -78,7 +69,7 @@ void registerInputFormatProcessorProtobuf(FormatFactory & factory)
             return std::make_shared<ProtobufRowInputFormat>(buf, sample, std::move(params),
                 FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true,
                                 settings.schema.is_server, settings.schema.format_schema_path),
-                use_length_delimiters);
+                with_length_delimiter);
         });
     }
 }
diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
index c6bc350e893..b2eabd4f37c 100644
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
@@ -5,14 +5,14 @@
 #endif
 
 #if USE_PROTOBUF
-#    include <DataTypes/IDataType.h>
-#    include <Formats/ProtobufReader.h>
 #    include <Processors/Formats/IRowInputFormat.h>
 
 namespace DB
 {
 class Block;
 class FormatSchemaInfo;
+class ProtobufReader;
+class ProtobufSerializer;
 
 
 /** Stream designed to deserialize data from the google protobuf format.
@@ -29,18 +29,19 @@ class FormatSchemaInfo;
 class ProtobufRowInputFormat : public IRowInputFormat
 {
 public:
-    ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_, const bool use_length_delimiters_);
+    ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_);
     ~ProtobufRowInputFormat() override;
 
     String getName() const override { return "ProtobufRowInputFormat"; }
 
-    bool readRow(MutableColumns & columns, RowReadExtension & extra) override;
+    bool readRow(MutableColumns & columns, RowReadExtension &) override;
     bool allowSyncAfterError() const override;
     void syncAfterError() override;
 
 private:
-    DataTypes data_types;
-    ProtobufReader reader;
+    std::unique_ptr<ProtobufReader> reader;
+    std::vector<size_t> missing_column_indices;
+    std::unique_ptr<ProtobufSerializer> serializer;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
index 3c885e80e31..d3b9a0124c1 100644
--- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
@@ -1,13 +1,13 @@
-#include <Formats/FormatFactory.h>
 #include "ProtobufRowOutputFormat.h"
 
 #if USE_PROTOBUF
-
-#include <Core/Block.h>
-#include <Formats/FormatSchemaInfo.h>
-#include <Formats/ProtobufSchemas.h>
-#include <Interpreters/Context.h>
-#include <google/protobuf/descriptor.h>
+#   include <Formats/FormatFactory.h>
+#   include <Core/Block.h>
+#   include <Formats/FormatSchemaInfo.h>
+#   include <Formats/ProtobufSchemas.h>
+#   include <Formats/ProtobufSerializer.h>
+#   include <Formats/ProtobufWriter.h>
+#   include <google/protobuf/descriptor.h>
 
 
 namespace DB
@@ -20,58 +20,55 @@ namespace ErrorCodes
 
 ProtobufRowOutputFormat::ProtobufRowOutputFormat(
     WriteBuffer & out_,
-    const Block & header,
+    const Block & header_,
     const RowOutputFormatParams & params_,
-    const FormatSchemaInfo & format_schema,
-    const FormatSettings & settings)
-    : IRowOutputFormat(header, out_, params_)
-    , data_types(header.getDataTypes())
-    , writer(out,
-        ProtobufSchemas::instance().getMessageTypeForFormatSchema(format_schema),
-        header.getNames(), settings.protobuf.write_row_delimiters)
-    , allow_only_one_row(
-        !settings.protobuf.write_row_delimiters
-            && !settings.protobuf.allow_many_rows_no_delimiters)
+    const FormatSchemaInfo & schema_info_,
+    const FormatSettings & settings_,
+    bool with_length_delimiter_)
+    : IRowOutputFormat(header_, out_, params_)
+    , writer(std::make_unique<ProtobufWriter>(out))
+    , serializer(ProtobufSerializer::create(
+          header_.getNames(),
+          header_.getDataTypes(),
+          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
+          with_length_delimiter_,
+          *writer))
+    , allow_multiple_rows(with_length_delimiter_ || settings_.protobuf.allow_multiple_rows_without_delimiter)
 {
-    value_indices.resize(header.columns());
 }
 
 void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num)
 {
-    if (allow_only_one_row && !first_row)
-    {
-        throw Exception("The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.", ErrorCodes::NO_ROW_DELIMITER);
-    }
+    if (!allow_multiple_rows && !first_row)
+        throw Exception(
+            "The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.",
+            ErrorCodes::NO_ROW_DELIMITER);
 
-    writer.startMessage();
-    std::fill(value_indices.begin(), value_indices.end(), 0);
-    size_t column_index;
-    while (writer.writeField(column_index))
-        data_types[column_index]->serializeProtobuf(
-                *columns[column_index], row_num, writer, value_indices[column_index]);
-    writer.endMessage();
+    if (!row_num)
+        serializer->setColumns(columns.data(), columns.size());
+
+    serializer->writeRow(row_num);
 }
 
 
 void registerOutputFormatProcessorProtobuf(FormatFactory & factory)
 {
-    for (bool write_row_delimiters : {false, true})
+    for (bool with_length_delimiter : {false, true})
     {
         factory.registerOutputFormatProcessor(
-            write_row_delimiters ? "Protobuf" : "ProtobufSingle",
-            [write_row_delimiters](WriteBuffer & buf,
+            with_length_delimiter ? "Protobuf" : "ProtobufSingle",
+            [with_length_delimiter](WriteBuffer & buf,
                const Block & header,
                const RowOutputFormatParams & params,
-               const FormatSettings & _settings)
+               const FormatSettings & settings)
             {
-                FormatSettings settings = _settings;
-                settings.protobuf.write_row_delimiters = write_row_delimiters;
                 return std::make_shared<ProtobufRowOutputFormat>(
                     buf, header, params,
                     FormatSchemaInfo(settings.schema.format_schema, "Protobuf",
                         true, settings.schema.is_server,
                         settings.schema.format_schema_path),
-                    settings);
+                    settings,
+                    with_length_delimiter);
             });
     }
 }
diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h
index 847f7607ff5..5f82950e891 100644
--- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h
@@ -8,21 +8,16 @@
 #    include <Core/Block.h>
 #    include <Formats/FormatSchemaInfo.h>
 #    include <Formats/FormatSettings.h>
-#    include <Formats/ProtobufWriter.h>
 #    include <Processors/Formats/IRowOutputFormat.h>
 
 
-namespace google
-{
-namespace protobuf
-{
-    class Message;
-}
-}
-
-
 namespace DB
 {
+class ProtobufWriter;
+class ProtobufSerializer;
+class FormatSchemaInfo;
+struct FormatSettings;
+
 /** Stream designed to serialize data in the google protobuf format.
   * Each row is written as a separated message.
   *
@@ -38,10 +33,11 @@ class ProtobufRowOutputFormat : public IRowOutputFormat
 public:
     ProtobufRowOutputFormat(
         WriteBuffer & out_,
-        const Block & header,
+        const Block & header_,
         const RowOutputFormatParams & params_,
-        const FormatSchemaInfo & format_schema,
-        const FormatSettings & settings);
+        const FormatSchemaInfo & schema_info_,
+        const FormatSettings & settings_,
+        bool with_length_delimiter_);
 
     String getName() const override { return "ProtobufRowOutputFormat"; }
 
@@ -50,10 +46,9 @@ public:
     std::string getContentType() const override { return "application/octet-stream"; }
 
 private:
-    DataTypes data_types;
-    ProtobufWriter writer;
-    std::vector<size_t> value_indices;
-    const bool allow_only_one_row;
+    std::unique_ptr<ProtobufWriter> writer;
+    std::unique_ptr<ProtobufSerializer> serializer;
+    const bool allow_multiple_rows;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
index 6e14a1dc3c8..108f4d9d321 100644
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
@@ -11,6 +11,7 @@ namespace ErrorCodes
 {
     extern const int INCORRECT_DATA;
     extern const int BAD_ARGUMENTS;
+    extern const int LOGICAL_ERROR;
 }
 
 RegexpRowInputFormat::RegexpRowInputFormat(
@@ -182,7 +183,9 @@ static std::pair<bool, size_t> fileSegmentationEngineRegexpImpl(ReadBuffer & in,
     while (loadAtPosition(in, memory, pos) && need_more_data)
     {
         pos = find_first_symbols<'\n', '\r'>(pos, in.buffer().end());
-        if (pos == in.buffer().end())
+        if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+        else if (pos == in.buffer().end())
             continue;
 
         // Support DOS-style newline ("\r\n")
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index 69a5e61caf2..96b01a5bd9b 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -15,6 +15,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int INCORRECT_DATA;
+    extern const int LOGICAL_ERROR;
 }
 
 
@@ -433,10 +434,11 @@ static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer
     {
         pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end());
 
-        if (pos == in.buffer().end())
+        if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+        else if (pos == in.buffer().end())
             continue;
-
-        if (*pos == '\\')
+        else if (*pos == '\\')
         {
             ++pos;
             if (loadAtPosition(in, memory, pos))
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
index c3b753e7261..34a4a98f16b 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
@@ -24,6 +24,7 @@ namespace ErrorCodes
     extern const int TYPE_MISMATCH;
     extern const int SUPPORT_IS_DISABLED;
     extern const int ARGUMENT_OUT_OF_BOUND;
+    extern const int CANNOT_READ_ALL_DATA;
 }
 
 
@@ -412,6 +413,15 @@ void ValuesBlockInputFormat::readPrefix()
 
 void ValuesBlockInputFormat::readSuffix()
 {
+    if (!buf.eof() && *buf.position() == ';')
+    {
+        ++buf.position();
+        skipWhitespaceIfAny(buf);
+        if (buf.hasUnreadData())
+            throw Exception("Cannot read data after semicolon", ErrorCodes::CANNOT_READ_ALL_DATA);
+        return;
+    }
+
     if (buf.hasUnreadData())
         throw Exception("Unread data in PeekableReadBuffer will be lost. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR);
 }
diff --git a/src/Processors/Pipe.cpp b/src/Processors/Pipe.cpp
index e8943790e68..129bebf452a 100644
--- a/src/Processors/Pipe.cpp
+++ b/src/Processors/Pipe.cpp
@@ -105,6 +105,8 @@ Pipe::Holder & Pipe::Holder::operator=(Holder && rhs)
     for (auto & plan : rhs.query_plans)
         query_plans.emplace_back(std::move(plan));
 
+    query_id_holder = std::move(rhs.query_id_holder);
+
     return *this;
 }
 
diff --git a/src/Processors/Pipe.h b/src/Processors/Pipe.h
index 2d64de3e664..f21f4761977 100644
--- a/src/Processors/Pipe.h
+++ b/src/Processors/Pipe.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <Processors/IProcessor.h>
 #include <Processors/Sources/SourceWithProgress.h>
+#include <Processors/QueryPlan/QueryIdHolder.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 
 namespace DB
@@ -108,6 +109,7 @@ public:
     /// This methods are from QueryPipeline. Needed to make conversion from pipeline to pipe possible.
     void addInterpreterContext(std::shared_ptr<Context> context) { holder.interpreter_context.emplace_back(std::move(context)); }
     void addStorageHolder(StoragePtr storage) { holder.storage_holders.emplace_back(std::move(storage)); }
+    void addQueryIdHolder(std::shared_ptr<QueryIdHolder> query_id_holder) { holder.query_id_holder = std::move(query_id_holder); }
     /// For queries with nested interpreters (i.e. StorageDistributed)
     void addQueryPlan(std::unique_ptr<QueryPlan> plan) { holder.query_plans.emplace_back(std::move(plan)); }
 
@@ -128,6 +130,7 @@ private:
         std::vector<StoragePtr> storage_holders;
         std::vector<TableLockHolder> table_locks;
         std::vector<std::unique_ptr<QueryPlan>> query_plans;
+        std::shared_ptr<QueryIdHolder> query_id_holder;
     };
 
     Holder holder;
diff --git a/src/Processors/QueryPlan/AddingConstColumnStep.cpp b/src/Processors/QueryPlan/AddingConstColumnStep.cpp
deleted file mode 100644
index 27c7720e58e..00000000000
--- a/src/Processors/QueryPlan/AddingConstColumnStep.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <Processors/QueryPlan/AddingConstColumnStep.h>
-#include <Processors/QueryPipeline.h>
-#include <Processors/Transforms/AddingConstColumnTransform.h>
-#include <IO/Operators.h>
-
-namespace DB
-{
-
-static ITransformingStep::Traits getTraits()
-{
-    return ITransformingStep::Traits
-    {
-        {
-            .preserves_distinct_columns = true,
-            .returns_single_stream = false,
-            .preserves_number_of_streams = true,
-            .preserves_sorting = true,
-        },
-        {
-            .preserves_number_of_rows = true,
-        }
-    };
-}
-
-AddingConstColumnStep::AddingConstColumnStep(const DataStream & input_stream_, ColumnWithTypeAndName column_)
-    : ITransformingStep(input_stream_,
-                        AddingConstColumnTransform::transformHeader(input_stream_.header, column_),
-                        getTraits())
-    , column(std::move(column_))
-{
-}
-
-void AddingConstColumnStep::transformPipeline(QueryPipeline & pipeline)
-{
-    pipeline.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<AddingConstColumnTransform>(header, column);
-    });
-}
-
-}
diff --git a/src/Processors/QueryPlan/AddingConstColumnStep.h b/src/Processors/QueryPlan/AddingConstColumnStep.h
deleted file mode 100644
index baa63873f21..00000000000
--- a/src/Processors/QueryPlan/AddingConstColumnStep.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-#include <Processors/QueryPlan/ITransformingStep.h>
-
-namespace DB
-{
-
-/// Adds a materialized const column with a specified value.
-class AddingConstColumnStep : public ITransformingStep
-{
-public:
-    AddingConstColumnStep(const DataStream & input_stream_, ColumnWithTypeAndName column_);
-
-    String getName() const override { return "AddingConstColumn"; }
-
-    void transformPipeline(QueryPipeline & pipeline) override;
-
-private:
-    ColumnWithTypeAndName column;
-};
-
-}
-
diff --git a/src/Processors/QueryPlan/AddingMissedStep.cpp b/src/Processors/QueryPlan/AddingMissedStep.cpp
deleted file mode 100644
index 359d0d46a87..00000000000
--- a/src/Processors/QueryPlan/AddingMissedStep.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include <Processors/QueryPlan/AddingMissedStep.h>
-#include <Processors/QueryPipeline.h>
-#include <Processors/Transforms/AddingMissedTransform.h>
-#include <IO/Operators.h>
-
-namespace DB
-{
-
-static ITransformingStep::Traits getTraits()
-{
-    return ITransformingStep::Traits
-    {
-        {
-            .preserves_distinct_columns = false, /// TODO: check if true later.
-            .returns_single_stream = false,
-            .preserves_number_of_streams = true,
-            .preserves_sorting = true,
-        },
-        {
-            .preserves_number_of_rows = true,
-        }
-    };
-}
-
-AddingMissedStep::AddingMissedStep(
-    const DataStream & input_stream_,
-    Block result_header_,
-    ColumnsDescription columns_,
-    const Context & context_)
-    : ITransformingStep(input_stream_, result_header_, getTraits())
-    , columns(std::move(columns_))
-    , context(context_)
-{
-    updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
-}
-
-void AddingMissedStep::transformPipeline(QueryPipeline & pipeline)
-{
-    pipeline.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<AddingMissedTransform>(header, output_stream->header, columns, context);
-    });
-}
-
-}
diff --git a/src/Processors/QueryPlan/AddingMissedStep.h b/src/Processors/QueryPlan/AddingMissedStep.h
deleted file mode 100644
index ce755b79fdf..00000000000
--- a/src/Processors/QueryPlan/AddingMissedStep.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#pragma once
-#include <Processors/QueryPlan/ITransformingStep.h>
-#include <Storages/ColumnsDescription.h>
-
-namespace DB
-{
-
-/// Convert one block structure to another. See ConvertingTransform.
-class AddingMissedStep : public ITransformingStep
-{
-public:
-    AddingMissedStep(const DataStream & input_stream_,
-                     Block result_header_,
-                     ColumnsDescription columns_,
-                     const Context & context_);
-
-    String getName() const override { return "AddingMissed"; }
-
-    void transformPipeline(QueryPipeline & pipeline) override;
-
-private:
-    ColumnsDescription columns;
-    const Context & context;
-};
-
-}
diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h
new file mode 100644
index 00000000000..454eab9649a
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@@ -0,0 +1,56 @@
+#pragma once
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <array>
+
+namespace DB
+{
+
+namespace QueryPlanOptimizations
+{
+
+/// This is the main function which optimizes the whole QueryPlan tree.
+void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
+
+/// Optimization is a function applied to QueryPlan::Node.
+/// It can read and update subtree of specified node.
+/// It return the number of updated layers of subtree if some change happened.
+/// It must guarantee that the structure of tree is correct.
+///
+/// New nodes should be added to QueryPlan::Nodes list.
+/// It is not needed to remove old nodes from the list.
+struct Optimization
+{
+    using Function = size_t (*)(QueryPlan::Node *, QueryPlan::Nodes &);
+    const Function apply = nullptr;
+    const char * name;
+};
+
+/// Move ARRAY JOIN up if possible.
+size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
+
+/// Move LimitStep down if possible.
+size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
+
+/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
+size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
+
+/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
+/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
+size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
+
+inline const auto & getOptimizations()
+{
+    static const std::array<Optimization, 4> optimizations =
+    {{
+        {tryLiftUpArrayJoin, "liftUpArrayJoin"},
+        {tryPushDownLimit, "pushDownLimit"},
+        {trySplitFilter, "splitFilter"},
+        {tryMergeExpressions, "mergeExpressions"},
+     }};
+
+    return optimizations;
+}
+
+}
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
new file mode 100644
index 00000000000..e20c5f93d6e
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
@@ -0,0 +1,85 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/FilterStep.h>
+#include <Processors/QueryPlan/ExpressionStep.h>
+#include <Processors/QueryPlan/ArrayJoinStep.h>
+#include <Interpreters/ActionsDAG.h>
+#include <Interpreters/ArrayJoinAction.h>
+
+namespace DB::QueryPlanOptimizations
+{
+
+size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
+{
+    if (parent_node->children.size() != 1)
+        return 0;
+
+    QueryPlan::Node * child_node = parent_node->children.front();
+
+    auto & parent = parent_node->step;
+    auto & child = child_node->step;
+    auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
+    auto * filter_step = typeid_cast<FilterStep *>(parent.get());
+    auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
+
+    if (!(expression_step || filter_step) || !array_join_step)
+        return 0;
+
+    const auto & array_join = array_join_step->arrayJoin();
+    const auto & expression = expression_step ? expression_step->getExpression()
+                                              : filter_step->getExpression();
+
+    auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
+
+    /// No actions can be moved before ARRAY JOIN.
+    if (split_actions.first->trivial())
+        return 0;
+
+    auto description = parent->getStepDescription();
+
+    /// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
+    if (split_actions.second->trivial())
+    {
+        auto expected_header = parent->getOutputStream().header;
+
+        /// Expression/Filter -> ArrayJoin
+        std::swap(parent, child);
+        /// ArrayJoin -> Expression/Filter
+
+        if (expression_step)
+            child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
+                                                     std::move(split_actions.first));
+        else
+            child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
+                                                 std::move(split_actions.first),
+                                                 filter_step->getFilterColumnName(),
+                                                 filter_step->removesFilterColumn());
+
+        child->setStepDescription(std::move(description));
+
+        array_join_step->updateInputStream(child->getOutputStream(), expected_header);
+        return 2;
+    }
+
+    /// Add new expression step before ARRAY JOIN.
+    /// Expression/Filter -> ArrayJoin -> Something
+    auto & node = nodes.emplace_back();
+    node.children.swap(child_node->children);
+    child_node->children.emplace_back(&node);
+    /// Expression/Filter -> ArrayJoin -> node -> Something
+
+    node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
+                                                 std::move(split_actions.first));
+    node.step->setStepDescription(description);
+    array_join_step->updateInputStream(node.step->getOutputStream(), {});
+
+    if (expression_step)
+        parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
+    else
+        parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
+                                              filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
+
+    parent->setStepDescription(description + " [split]");
+    return 3;
+}
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
new file mode 100644
index 00000000000..01af6a2bbde
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
@@ -0,0 +1,114 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/ITransformingStep.h>
+#include <Processors/QueryPlan/LimitStep.h>
+#include <Processors/QueryPlan/TotalsHavingStep.h>
+#include <Processors/QueryPlan/MergingSortedStep.h>
+#include <Processors/QueryPlan/FinishSortingStep.h>
+#include <Processors/QueryPlan/MergeSortingStep.h>
+#include <Processors/QueryPlan/PartialSortingStep.h>
+#include <Common/typeid_cast.h>
+
+namespace DB::QueryPlanOptimizations
+{
+
+/// If plan looks like Limit -> Sorting, update limit for Sorting
+static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
+{
+    if (limit == 0)
+        return false;
+
+    QueryPlanStepPtr & step = node->step;
+    QueryPlan::Node * child = nullptr;
+    bool updated = false;
+
+    if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
+    {
+        /// TODO: remove LimitStep here.
+        merging_sorted->updateLimit(limit);
+        updated = true;
+        child = node->children.front();
+    }
+    else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
+    {
+        /// TODO: remove LimitStep here.
+        finish_sorting->updateLimit(limit);
+        updated = true;
+    }
+    else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
+    {
+        merge_sorting->updateLimit(limit);
+        updated = true;
+        child = node->children.front();
+    }
+    else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
+    {
+        partial_sorting->updateLimit(limit);
+        updated = true;
+    }
+
+    /// We often have chain PartialSorting -> MergeSorting -> MergingSorted
+    /// Try update limit for them also if possible.
+    if (child)
+        tryUpdateLimitForSortingSteps(child, limit);
+
+    return updated;
+}
+
+size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
+{
+    if (parent_node->children.size() != 1)
+        return 0;
+
+    QueryPlan::Node * child_node = parent_node->children.front();
+
+    auto & parent = parent_node->step;
+    auto & child = child_node->step;
+    auto * limit = typeid_cast<LimitStep *>(parent.get());
+
+    if (!limit)
+        return 0;
+
+    /// Skip LIMIT WITH TIES by now.
+    if (limit->withTies())
+        return 0;
+
+    const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
+
+    /// Skip everything which is not transform.
+    if (!transforming)
+        return 0;
+
+    /// Special cases for sorting steps.
+    if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
+        return 0;
+
+    /// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
+    if (typeid_cast<const TotalsHavingStep *>(child.get()))
+        return 0;
+
+    /// Now we should decide if pushing down limit possible for this step.
+
+    const auto & transform_traits = transforming->getTransformTraits();
+    const auto & data_stream_traits = transforming->getDataStreamTraits();
+
+    /// Cannot push down if child changes the number of rows.
+    if (!transform_traits.preserves_number_of_rows)
+        return 0;
+
+    /// Cannot push down if data was sorted exactly by child stream.
+    if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
+        return 0;
+
+    /// Now we push down limit only if it doesn't change any stream properties.
+    /// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
+    if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
+        return 0;
+
+    /// Input stream for Limit have changed.
+    limit->updateInputStream(transforming->getInputStreams().front());
+
+    parent.swap(child);
+    return 2;
+}
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
new file mode 100644
index 00000000000..dfd15a2a929
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
@@ -0,0 +1,65 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/FilterStep.h>
+#include <Processors/QueryPlan/ExpressionStep.h>
+#include <Interpreters/ActionsDAG.h>
+
+namespace DB::QueryPlanOptimizations
+{
+
+size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
+{
+    if (parent_node->children.size() != 1)
+        return false;
+
+    QueryPlan::Node * child_node = parent_node->children.front();
+
+    auto & parent = parent_node->step;
+    auto & child = child_node->step;
+
+    auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
+    auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
+    auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
+
+    if (parent_expr && child_expr)
+    {
+        const auto & child_actions = child_expr->getExpression();
+        const auto & parent_actions = parent_expr->getExpression();
+
+        /// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
+        /// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
+        /// Such a query will return two zeroes if we combine actions together.
+        if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
+            return 0;
+
+        auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
+
+        auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
+        expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
+
+        parent_node->step = std::move(expr);
+        parent_node->children.swap(child_node->children);
+        return 1;
+    }
+    else if (parent_filter && child_expr)
+    {
+        const auto & child_actions = child_expr->getExpression();
+        const auto & parent_actions = parent_filter->getExpression();
+
+        if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
+            return 0;
+
+        auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
+
+        auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
+                                                   parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
+        filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
+
+        parent_node->step = std::move(filter);
+        parent_node->children.swap(child_node->children);
+        return 1;
+    }
+
+    return 0;
+}
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
new file mode 100644
index 00000000000..e5ccc173ed8
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -0,0 +1,75 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <stack>
+
+namespace DB::QueryPlanOptimizations
+{
+
+void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
+{
+    const auto & optimizations = getOptimizations();
+
+    struct Frame
+    {
+        QueryPlan::Node * node;
+
+        /// If not zero, traverse only depth_limit layers of tree (if no other optimizations happen).
+        /// Otherwise, traverse all children.
+        size_t depth_limit = 0;
+
+        /// Next child to process.
+        size_t next_child = 0;
+    };
+
+    std::stack<Frame> stack;
+    stack.push(Frame{.node = &root});
+
+    while (!stack.empty())
+    {
+        auto & frame = stack.top();
+
+        /// If traverse_depth_limit == 0, then traverse without limit (first entrance)
+        /// If traverse_depth_limit > 1, then traverse with (limit - 1)
+        if (frame.depth_limit != 1)
+        {
+            /// Traverse all children first.
+            if (frame.next_child < frame.node->children.size())
+            {
+                stack.push(Frame
+                {
+                       .node = frame.node->children[frame.next_child],
+                       .depth_limit = frame.depth_limit ? (frame.depth_limit - 1) : 0,
+                });
+
+                ++frame.next_child;
+                continue;
+            }
+        }
+
+        size_t max_update_depth = 0;
+
+        /// Apply all optimizations.
+        for (const auto & optimization : optimizations)
+        {
+            /// Just in case, skip optimization if it is not initialized.
+            if (!optimization.apply)
+                continue;
+
+            /// Try to apply optimization.
+            auto update_depth = optimization.apply(frame.node, nodes);
+            max_update_depth = std::max<size_t>(max_update_depth, update_depth);
+        }
+
+        /// Traverse `max_update_depth` layers of tree again.
+        if (max_update_depth)
+        {
+            frame.depth_limit = max_update_depth;
+            frame.next_child = 0;
+            continue;
+        }
+
+        /// Nothing was applied.
+        stack.pop();
+    }
+}
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
new file mode 100644
index 00000000000..8c212936195
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
@@ -0,0 +1,51 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/FilterStep.h>
+#include <Processors/QueryPlan/ExpressionStep.h>
+#include <Interpreters/ActionsDAG.h>
+
+namespace DB::QueryPlanOptimizations
+{
+
+/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
+size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
+{
+    auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
+    if (!filter_step)
+        return 0;
+
+    const auto & expr = filter_step->getExpression();
+
+    /// Do not split if there are function like runningDifference.
+    if (expr->hasStatefulFunctions())
+        return 0;
+
+    auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
+
+    if (split.second->trivial())
+        return 0;
+
+    bool remove_filter = false;
+    if (filter_step->removesFilterColumn())
+        remove_filter = split.second->removeUnusedResult(filter_step->getFilterColumnName());
+
+    auto description = filter_step->getStepDescription();
+
+    auto & filter_node = nodes.emplace_back();
+    node->children.swap(filter_node.children);
+    node->children.push_back(&filter_node);
+
+    filter_node.step = std::make_unique<FilterStep>(
+            filter_node.children.at(0)->step->getOutputStream(),
+            std::move(split.first),
+            filter_step->getFilterColumnName(),
+            remove_filter);
+
+    node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
+
+    filter_node.step->setStepDescription("(" + description + ")[split]");
+    node->step->setStepDescription(description);
+
+    return 2;
+}
+
+}
diff --git a/src/Processors/QueryPlan/QueryIdHolder.cpp b/src/Processors/QueryPlan/QueryIdHolder.cpp
new file mode 100644
index 00000000000..87f6f892cd1
--- /dev/null
+++ b/src/Processors/QueryPlan/QueryIdHolder.cpp
@@ -0,0 +1,15 @@
+#include <Processors/QueryPlan/QueryIdHolder.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+
+namespace DB
+{
+QueryIdHolder::QueryIdHolder(const String & query_id_, const MergeTreeData & data_) : query_id(query_id_), data(data_)
+{
+}
+
+QueryIdHolder::~QueryIdHolder()
+{
+    data.removeQueryId(query_id);
+}
+
+}
diff --git a/src/Processors/QueryPlan/QueryIdHolder.h b/src/Processors/QueryPlan/QueryIdHolder.h
new file mode 100644
index 00000000000..ed8f9ec1d6b
--- /dev/null
+++ b/src/Processors/QueryPlan/QueryIdHolder.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <string>
+
+namespace DB
+{
+class MergeTreeData;
+
+/// Holds the current query id and do something meaningful in destructor.
+/// Currently it's used for cleaning query id in the MergeTreeData query set.
+struct QueryIdHolder
+{
+    QueryIdHolder(const std::string & query_id_, const MergeTreeData & data_);
+
+    ~QueryIdHolder();
+
+    std::string query_id;
+    const MergeTreeData & data;
+};
+
+}
diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp
index 6fba991a56c..755944fdf9f 100644
--- a/src/Processors/QueryPlan/QueryPlan.cpp
+++ b/src/Processors/QueryPlan/QueryPlan.cpp
@@ -6,15 +6,7 @@
 #include <Interpreters/ActionsDAG.h>
 #include <Interpreters/ArrayJoinAction.h>
 #include <stack>
-#include <Processors/QueryPlan/LimitStep.h>
-#include "MergingSortedStep.h"
-#include "FinishSortingStep.h"
-#include "MergeSortingStep.h"
-#include "PartialSortingStep.h"
-#include "TotalsHavingStep.h"
-#include "ExpressionStep.h"
-#include "ArrayJoinStep.h"
-#include "FilterStep.h"
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
 
 namespace DB
 {
@@ -341,318 +333,9 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
     }
 }
 
-/// If plan looks like Limit -> Sorting, update limit for Sorting
-bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
-{
-    if (limit == 0)
-        return false;
-
-    QueryPlanStepPtr & step = node->step;
-    QueryPlan::Node * child = nullptr;
-    bool updated = false;
-
-    if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
-    {
-        /// TODO: remove LimitStep here.
-        merging_sorted->updateLimit(limit);
-        updated = true;
-        child = node->children.front();
-    }
-    else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
-    {
-        /// TODO: remove LimitStep here.
-        finish_sorting->updateLimit(limit);
-        updated = true;
-    }
-    else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
-    {
-        merge_sorting->updateLimit(limit);
-        updated = true;
-        child = node->children.front();
-    }
-    else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
-    {
-        partial_sorting->updateLimit(limit);
-        updated = true;
-    }
-
-    /// We often have chain PartialSorting -> MergeSorting -> MergingSorted
-    /// Try update limit for them also if possible.
-    if (child)
-        tryUpdateLimitForSortingSteps(child, limit);
-
-    return updated;
-}
-
-/// Move LimitStep down if possible.
-static void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node)
-{
-    auto & child = child_node->step;
-    auto * limit = typeid_cast<LimitStep *>(parent.get());
-
-    if (!limit)
-        return;
-
-    /// Skip LIMIT WITH TIES by now.
-    if (limit->withTies())
-        return;
-
-    const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
-
-    /// Skip everything which is not transform.
-    if (!transforming)
-        return;
-
-    /// Special cases for sorting steps.
-    if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
-        return;
-
-    /// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
-    if (typeid_cast<const TotalsHavingStep *>(child.get()))
-        return;
-
-    /// Now we should decide if pushing down limit possible for this step.
-
-    const auto & transform_traits = transforming->getTransformTraits();
-    const auto & data_stream_traits = transforming->getDataStreamTraits();
-
-    /// Cannot push down if child changes the number of rows.
-    if (!transform_traits.preserves_number_of_rows)
-        return;
-
-    /// Cannot push down if data was sorted exactly by child stream.
-    if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
-        return;
-
-    /// Now we push down limit only if it doesn't change any stream properties.
-    /// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
-    if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
-        return;
-
-    /// Input stream for Limit have changed.
-    limit->updateInputStream(transforming->getInputStreams().front());
-
-    parent.swap(child);
-}
-
-/// Move ARRAY JOIN up if possible.
-static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_node, QueryPlan::Nodes & nodes)
-{
-    auto & parent = parent_node->step;
-    auto & child = child_node->step;
-    auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
-    auto * filter_step = typeid_cast<FilterStep *>(parent.get());
-    auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
-
-    if (!(expression_step || filter_step) || !array_join_step)
-        return;
-
-    const auto & array_join = array_join_step->arrayJoin();
-    const auto & expression = expression_step ? expression_step->getExpression()
-                                              : filter_step->getExpression();
-
-    auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
-
-    /// No actions can be moved before ARRAY JOIN.
-    if (split_actions.first->empty())
-        return;
-
-    auto description = parent->getStepDescription();
-
-    /// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
-    if (split_actions.second->empty())
-    {
-        auto expected_header = parent->getOutputStream().header;
-
-        /// Expression/Filter -> ArrayJoin
-        std::swap(parent, child);
-        /// ArrayJoin -> Expression/Filter
-
-        if (expression_step)
-            child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
-                                                     std::move(split_actions.first));
-        else
-            child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
-                                                 std::move(split_actions.first),
-                                                 filter_step->getFilterColumnName(),
-                                                 filter_step->removesFilterColumn());
-
-        child->setStepDescription(std::move(description));
-
-        array_join_step->updateInputStream(child->getOutputStream(), expected_header);
-        return;
-    }
-
-    /// Add new expression step before ARRAY JOIN.
-    /// Expression/Filter -> ArrayJoin -> Something
-    auto & node = nodes.emplace_back();
-    node.children.swap(child_node->children);
-    child_node->children.emplace_back(&node);
-    /// Expression/Filter -> ArrayJoin -> node -> Something
-
-    node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
-                                                 std::move(split_actions.first));
-    node.step->setStepDescription(description);
-    array_join_step->updateInputStream(node.step->getOutputStream(), {});
-
-    if (expression_step)
-        parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
-    else
-        parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
-                                              filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
-
-    parent->setStepDescription(description + " [split]");
-}
-
-/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
-/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
-static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node)
-{
-    auto & parent = parent_node->step;
-    auto & child = child_node->step;
-
-    auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
-    auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
-    auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
-
-    if (parent_expr && child_expr)
-    {
-        const auto & child_actions = child_expr->getExpression();
-        const auto & parent_actions = parent_expr->getExpression();
-
-        /// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
-        /// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
-        /// Such a query will return two zeroes if we combine actions together.
-        if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
-            return false;
-
-        auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
-
-        auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
-        expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
-
-        parent_node->step = std::move(expr);
-        parent_node->children.swap(child_node->children);
-        return true;
-    }
-    else if (parent_filter && child_expr)
-    {
-        const auto & child_actions = child_expr->getExpression();
-        const auto & parent_actions = parent_filter->getExpression();
-
-        if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
-            return false;
-
-        auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
-
-        auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
-                                                   parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
-        filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
-
-        parent_node->step = std::move(filter);
-        parent_node->children.swap(child_node->children);
-        return true;
-    }
-
-    return false;
-}
-
-/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
-static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
-{
-    auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
-    if (!filter_step)
-        return false;
-
-    const auto & expr = filter_step->getExpression();
-
-    /// Do not split if there are function like runningDifference.
-    if (expr->hasStatefulFunctions())
-        return false;
-
-    auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
-
-    if (split.second->empty())
-        return false;
-
-    if (filter_step->removesFilterColumn())
-        split.second->removeUnusedInput(filter_step->getFilterColumnName());
-
-    auto description = filter_step->getStepDescription();
-
-    auto & filter_node = nodes.emplace_back();
-    node->children.swap(filter_node.children);
-    node->children.push_back(&filter_node);
-
-    filter_node.step = std::make_unique<FilterStep>(
-            filter_node.children.at(0)->step->getOutputStream(),
-            std::move(split.first),
-            filter_step->getFilterColumnName(),
-            filter_step->removesFilterColumn());
-
-    node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
-
-    filter_node.step->setStepDescription("(" + description + ")[split]");
-    node->step->setStepDescription(description);
-
-    return true;
-}
-
 void QueryPlan::optimize()
 {
-    /* Stack contains info for every nodes in the path from tree root to the current node.
-     * Every optimization changes only current node and it's children.
-     * Optimization may change QueryPlanStep, but not QueryPlan::Node (only add a new one).
-     * So, QueryPlan::Node::children will be always valid.
-     */
-
-    struct Frame
-    {
-        Node * node;
-        size_t next_child = 0;
-    };
-
-    std::stack<Frame> stack;
-    stack.push(Frame{.node = root});
-
-    while (!stack.empty())
-    {
-        auto & frame = stack.top();
-
-        if (frame.next_child == 0)
-        {
-            if (frame.node->children.size() == 1)
-            {
-                tryPushDownLimit(frame.node->step, frame.node->children.front());
-
-                while (tryMergeExpressions(frame.node, frame.node->children.front()));
-
-                if (frame.node->children.size() == 1)
-                    tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
-
-                trySplitFilter(frame.node, nodes);
-            }
-        }
-
-        if (frame.next_child < frame.node->children.size())
-        {
-            stack.push(Frame{frame.node->children[frame.next_child]});
-            ++frame.next_child;
-        }
-        else
-        {
-            if (frame.node->children.size() == 1)
-            {
-                while (tryMergeExpressions(frame.node, frame.node->children.front()));
-
-                trySplitFilter(frame.node, nodes);
-
-                tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
-            }
-
-            stack.pop();
-        }
-    }
+    QueryPlanOptimizations::optimizeTree(*root, nodes);
 }
 
 }
diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index 82c589b8b20..2b824f91b45 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -57,6 +57,9 @@ WindowStep::WindowStep(const DataStream & input_stream_,
 {
     // We don't remove any columns, only add, so probably we don't have to update
     // the output DataStream::distinct_columns.
+
+    window_description.checkValid();
+
 }
 
 void WindowStep::transformPipeline(QueryPipeline & pipeline)
diff --git a/src/Processors/QueryPlan/WindowStep.h b/src/Processors/QueryPlan/WindowStep.h
index 069d02c655c..ffd5e78df67 100644
--- a/src/Processors/QueryPlan/WindowStep.h
+++ b/src/Processors/QueryPlan/WindowStep.h
@@ -1,7 +1,7 @@
 #pragma once
 #include <Processors/QueryPlan/ITransformingStep.h>
 
-#include <Interpreters/AggregateDescription.h>
+#include <Interpreters/WindowDescription.h>
 
 namespace DB
 {
diff --git a/src/Processors/Transforms/AddingConstColumnTransform.h b/src/Processors/Transforms/AddingConstColumnTransform.h
deleted file mode 100644
index 15e9addd924..00000000000
--- a/src/Processors/Transforms/AddingConstColumnTransform.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#pragma once
-#include <Processors/ISimpleTransform.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-/// Adds a materialized const column to the chunk with a specified value.
-class AddingConstColumnTransform : public ISimpleTransform
-{
-public:
-    AddingConstColumnTransform(const Block & header, ColumnWithTypeAndName column_)
-        : ISimpleTransform(header, transformHeader(header, column_), false)
-        , column(std::move(column_))
-    {
-        if (!column.column || !isColumnConst(*column.column) || !column.column->empty())
-            throw Exception("AddingConstColumnTransform expected empty const column", ErrorCodes::LOGICAL_ERROR);
-    }
-
-    String getName() const override { return "AddingConstColumnTransform"; }
-
-    static Block transformHeader(Block header, ColumnWithTypeAndName & column_)
-    {
-        header.insert(column_);
-        return header;
-    }
-
-protected:
-    void transform(Chunk & chunk) override
-    {
-        auto num_rows = chunk.getNumRows();
-        chunk.addColumn(column.column->cloneResized(num_rows)->convertToFullColumnIfConst());
-    }
-
-private:
-    ColumnWithTypeAndName column;
-};
-
-}
diff --git a/src/Processors/Transforms/AddingMissedTransform.cpp b/src/Processors/Transforms/AddingMissedTransform.cpp
deleted file mode 100644
index 1344cce22a7..00000000000
--- a/src/Processors/Transforms/AddingMissedTransform.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include <Processors/Transforms/AddingMissedTransform.h>
-#include <Interpreters/addMissingDefaults.h>
-
-
-namespace DB
-{
-
-AddingMissedTransform::AddingMissedTransform(
-    Block header_,
-    Block result_header_,
-    const ColumnsDescription & columns_,
-    const Context & context_)
-    : ISimpleTransform(std::move(header_), std::move(result_header_), false)
-    , columns(columns_), context(context_)
-{
-}
-
-void AddingMissedTransform::transform(Chunk & chunk)
-{
-    auto num_rows = chunk.getNumRows();
-    Block src = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
-
-    auto res = addMissingDefaults(src, getOutputPort().getHeader().getNamesAndTypesList(), columns, context);
-    chunk.setColumns(res.getColumns(), num_rows);
-}
-
-}
diff --git a/src/Processors/Transforms/AddingMissedTransform.h b/src/Processors/Transforms/AddingMissedTransform.h
deleted file mode 100644
index 561f908acef..00000000000
--- a/src/Processors/Transforms/AddingMissedTransform.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-
-#include <Processors/ISimpleTransform.h>
-#include <Storages/ColumnsDescription.h>
-
-
-namespace DB
-{
-
-
-/** This stream adds three types of columns into block
-  * 1. Columns, that are missed inside request, but present in table without defaults (missed columns)
-  * 2. Columns, that are missed inside request, but present in table with defaults (columns with default values)
-  * 3. Columns that materialized from other columns (materialized columns)
-  * All three types of columns are materialized (not constants).
-  */
-class AddingMissedTransform : public ISimpleTransform
-{
-public:
-    AddingMissedTransform(
-        Block header_,
-        Block result_header_,
-        const ColumnsDescription & columns_,
-        const Context & context_);
-
-    String getName() const override { return "AddingMissed"; }
-
-private:
-    void transform(Chunk &) override;
-
-    const ColumnsDescription columns;
-    const Context & context;
-};
-
-
-}
diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp
index ce6d0ad1f6c..1806693db3a 100644
--- a/src/Processors/Transforms/MergeSortingTransform.cpp
+++ b/src/Processors/Transforms/MergeSortingTransform.cpp
@@ -8,7 +8,7 @@
 #include <Compression/CompressedWriteBuffer.h>
 #include <DataStreams/NativeBlockInputStream.h>
 #include <DataStreams/NativeBlockOutputStream.h>
-#include <Disks/StoragePolicy.h>
+#include <Disks/IVolume.h>
 
 
 namespace ProfileEvents
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index b200e306213..0013e0061e2 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -1,48 +1,171 @@
 #include <Processors/Transforms/WindowTransform.h>
 
-#include <Interpreters/ExpressionActions.h>
-
+#include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <Common/Arena.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Interpreters/ExpressionActions.h>
+#include <Interpreters/convertFieldToType.h>
+
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int NOT_IMPLEMENTED;
+}
+
+// Interface for true window functions. It's not much of an interface, they just
+// accept the guts of WindowTransform and do 'something'. Given a small number of
+// true window functions, and the fact that the WindowTransform internals are
+// pretty much well defined in domain terms (e.g. frame boundaries), this is
+// somewhat acceptable.
+class IWindowFunction
+{
+public:
+    virtual ~IWindowFunction() = default;
+
+    // Must insert the result for current_row.
+    virtual void windowInsertResultInto(IColumn & to, const WindowTransform * transform) = 0;
+};
+
+// Compares ORDER BY column values at given rows to find the boundaries of frame:
+// [compared] with [reference] +/- offset. Return value is -1/0/+1, like in
+// sorting predicates -- -1 means [compared] is less than [reference] +/- offset.
+template <typename ColumnType>
+static int compareValuesWithOffset(const IColumn * _compared_column,
+    size_t compared_row, const IColumn * _reference_column,
+    size_t reference_row,
+    uint64_t _offset,
+    bool offset_is_preceding)
+{
+    // Casting the columns to the known type here makes it faster, probably
+    // because the getData call can be devirtualized.
+    const auto * compared_column = assert_cast<const ColumnType *>(
+        _compared_column);
+    const auto * reference_column = assert_cast<const ColumnType *>(
+        _reference_column);
+    const auto offset = static_cast<typename ColumnType::ValueType>(_offset);
+
+    const auto compared_value_data = compared_column->getDataAt(compared_row);
+    assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
+    auto compared_value = unalignedLoad<typename ColumnType::ValueType>(
+        compared_value_data.data);
+
+    const auto reference_value_data = reference_column->getDataAt(reference_row);
+    assert(reference_value_data.size == sizeof(typename ColumnType::ValueType));
+    auto reference_value = unalignedLoad<typename ColumnType::ValueType>(
+        reference_value_data.data);
+
+    bool is_overflow;
+    bool overflow_to_negative;
+    if (offset_is_preceding)
+    {
+        is_overflow = __builtin_sub_overflow(reference_value, offset,
+            &reference_value);
+        overflow_to_negative = offset > 0;
+    }
+    else
+    {
+        is_overflow = __builtin_add_overflow(reference_value, offset,
+            &reference_value);
+        overflow_to_negative = offset < 0;
+    }
+
+//    fmt::print(stderr,
+//        "compared [{}] = {}, ref [{}] = {}, offset {} preceding {} overflow {} to negative {}\n",
+//        compared_row, toString(compared_value),
+//        reference_row, toString(reference_value),
+//        toString(offset), offset_is_preceding,
+//        is_overflow, overflow_to_negative);
+
+    if (is_overflow)
+    {
+        if (overflow_to_negative)
+        {
+            // Overflow to the negative, [compared] must be greater.
+            return 1;
+        }
+        else
+        {
+            // Overflow to the positive, [compared] must be less.
+            return -1;
+        }
+    }
+    else
+    {
+        // No overflow, compare normally.
+        return compared_value < reference_value ? -1
+            : compared_value == reference_value ? 0 : 1;
+    }
+}
+
+// Helper macros to dispatch on type of the ORDER BY column
+#define APPLY_FOR_ONE_TYPE(FUNCTION, TYPE) \
+else if (typeid_cast<const TYPE *>(column)) \
+{ \
+    /* clang-tidy you're dumb, I can't put FUNCTION in braces here. */ \
+    compare_values_with_offset = FUNCTION<TYPE>; /* NOLINT */ \
+}
+
+#define APPLY_FOR_TYPES(FUNCTION) \
+if (false) /* NOLINT */ \
+{ \
+    /* Do nothing, a starter condition. */ \
+} \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt8>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt16>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt32>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt64>) \
+else \
+{ \
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, \
+        "The RANGE OFFSET frame for '{}' ORDER BY column is not implemented", \
+        demangle(typeid(*column).name())); \
+}
+
 WindowTransform::WindowTransform(const Block & input_header_,
         const Block & output_header_,
         const WindowDescription & window_description_,
-        const std::vector<WindowFunctionDescription> & window_function_descriptions
-        )
-    : ISimpleTransform(input_header_, output_header_,
-        false /* skip_empty_chunks */)
+        const std::vector<WindowFunctionDescription> & functions)
+    : IProcessor({input_header_}, {output_header_})
+    , input(inputs.front())
+    , output(outputs.front())
     , input_header(input_header_)
     , window_description(window_description_)
 {
-    workspaces.reserve(window_function_descriptions.size());
-    for (const auto & f : window_function_descriptions)
+    workspaces.reserve(functions.size());
+    for (const auto & f : functions)
     {
         WindowFunctionWorkspace workspace;
-        workspace.window_function = f;
-
-        const auto & aggregate_function
-            = workspace.window_function.aggregate_function;
+        workspace.aggregate_function = f.aggregate_function;
+        const auto & aggregate_function = workspace.aggregate_function;
         if (!arena && aggregate_function->allocatesMemoryInArena())
         {
             arena = std::make_unique<Arena>();
         }
 
-        workspace.argument_column_indices.reserve(
-            workspace.window_function.argument_names.size());
-        workspace.argument_columns.reserve(
-            workspace.window_function.argument_names.size());
-        for (const auto & argument_name : workspace.window_function.argument_names)
+        workspace.argument_column_indices.reserve(f.argument_names.size());
+        for (const auto & argument_name : f.argument_names)
         {
             workspace.argument_column_indices.push_back(
                 input_header.getPositionByName(argument_name));
         }
+        workspace.argument_columns.assign(f.argument_names.size(), nullptr);
 
-        workspace.aggregate_function_state.reset(aggregate_function->sizeOfData(),
-            aggregate_function->alignOfData());
-        aggregate_function->create(workspace.aggregate_function_state.data());
+        workspace.window_function_impl = aggregate_function->asWindowFunction();
+        if (!workspace.window_function_impl)
+        {
+            workspace.aggregate_function_state.reset(
+                aggregate_function->sizeOfData(),
+                aggregate_function->alignOfData());
+            aggregate_function->create(workspace.aggregate_function_state.data());
+        }
 
         workspaces.push_back(std::move(workspace));
     }
@@ -53,8 +176,27 @@ WindowTransform::WindowTransform(const Block & input_header_,
         partition_by_indices.push_back(
             input_header.getPositionByName(column.column_name));
     }
-    partition_start_columns.resize(partition_by_indices.size(), nullptr);
-    partition_start_row = 0;
+
+    order_by_indices.reserve(window_description.order_by.size());
+    for (const auto & column : window_description.order_by)
+    {
+        order_by_indices.push_back(
+            input_header.getPositionByName(column.column_name));
+    }
+
+    // Choose a row comparison function for RANGE OFFSET frame based on the
+    // type of the ORDER BY column.
+    if (window_description.frame.type == WindowFrame::FrameType::Range
+        && (window_description.frame.begin_type
+                == WindowFrame::BoundaryType::Offset
+            || window_description.frame.end_type
+                == WindowFrame::BoundaryType::Offset))
+    {
+        assert(order_by_indices.size() == 1);
+        const IColumn * column = input_header.getByPosition(
+            order_by_indices[0]).column.get();
+        APPLY_FOR_TYPES(compareValuesWithOffset)
+    }
 }
 
 WindowTransform::~WindowTransform()
@@ -62,128 +204,1157 @@ WindowTransform::~WindowTransform()
     // Some states may be not created yet if the creation failed.
     for (auto & ws : workspaces)
     {
-        ws.window_function.aggregate_function->destroy(
-            ws.aggregate_function_state.data());
+        if (!ws.window_function_impl)
+        {
+            ws.aggregate_function->destroy(
+                ws.aggregate_function_state.data());
+        }
     }
 }
 
-void WindowTransform::transform(Chunk & chunk)
+void WindowTransform::advancePartitionEnd()
 {
-    const size_t num_rows = chunk.getNumRows();
-    auto columns = chunk.detachColumns();
-
-    for (auto & ws : workspaces)
+    if (partition_ended)
     {
-        ws.argument_columns.clear();
-        for (const auto column_index : ws.argument_column_indices)
-        {
-            // Aggregate functions can't work with constant columns, so we have to
-            // materialize them like the Aggregator does.
-            columns[column_index]
-                = std::move(columns[column_index])->convertToFullColumnIfConst();
-
-            ws.argument_columns.push_back(columns[column_index].get());
-        }
-
-        ws.result_column = ws.window_function.aggregate_function->getReturnType()
-            ->createColumn();
+        return;
     }
 
-    // We loop for all window functions for each row. Switching the loops might
-    // be more efficient, because we would run less code and access less data in
-    // the inner loop. If you change this, don't forget to fix the calculation of
-    // partition boundaries. Probably it has to be precalculated and stored as
-    // an array of offsets. An interesting optimization would be to pass it as
-    // an extra column from the previous sorting step -- that step might need to
-    // make similar comparison anyway, if it's sorting only by the PARTITION BY
-    // columns.
-    for (size_t row = 0; row < num_rows; row++)
-    {
-        // Check whether the new partition has started. We have to reset the
-        // aggregate functions when the new partition starts.
-        assert(partition_start_columns.size() == partition_by_indices.size());
-        bool new_partition = false;
-        if (partition_start_columns.empty())
-        {
-            // No PARTITION BY at all, do nothing.
-        }
-        else if (partition_start_columns[0] == nullptr)
-        {
-            // This is the first partition.
-            new_partition = true;
-            partition_start_columns.clear();
-            for (const auto i : partition_by_indices)
-            {
-                partition_start_columns.push_back(columns[i]);
-            }
-            partition_start_row = row;
-        }
-        else
-        {
-            // Check whether the new partition started, by comparing all the
-            // PARTITION BY columns.
-            size_t first_inequal_column = 0;
-            for (; first_inequal_column < partition_start_columns.size();
-                  ++first_inequal_column)
-            {
-                const auto * current_column = columns[
-                    partition_by_indices[first_inequal_column]].get();
+    const RowNumber end = blocksEnd();
 
-                if (current_column->compareAt(row, partition_start_row,
-                    *partition_start_columns[first_inequal_column],
+//    fmt::print(stderr, "end {}, partition_end {}\n", end, partition_end);
+
+    // If we're at the total end of data, we must end the partition. This is one
+    // of the few places in calculations where we need special handling for end
+    // of data, other places will work as usual based on
+    // `partition_ended` = true, because end of data is logically the same as
+    // any other end of partition.
+    // We must check this first, because other calculations might not be valid
+    // when we're at the end of data.
+    if (input_is_finished)
+    {
+        partition_ended = true;
+        // We receive empty chunk at the end of data, so the partition_end must
+        // be already at the end of data.
+        assert(partition_end == end);
+        return;
+    }
+
+    // If we got to the end of the block already, but we are going to get more
+    // input data, wait for it.
+    if (partition_end == end)
+    {
+        return;
+    }
+
+    // We process one block at a time, but we can process each block many times,
+    // if it contains multiple partitions. The `partition_end` is a
+    // past-the-end pointer, so it must be already in the "next" block we haven't
+    // processed yet. This is also the last block we have.
+    // The exception to this rule is end of data, for which we checked above.
+    assert(end.block == partition_end.block + 1);
+
+    // Try to advance the partition end pointer.
+    const size_t n = partition_by_indices.size();
+    if (n == 0)
+    {
+        // No PARTITION BY. All input is one partition, which will end when the
+        // input ends.
+        partition_end = end;
+        return;
+    }
+
+    // Check for partition end.
+    // The partition ends when the PARTITION BY columns change. We need
+    // some reference columns for comparison. We might have already
+    // dropped the blocks where the partition starts, but any row in the
+    // partition will do. We use the current_row for this. It might be the same
+    // as the partition_end if we're at the first row of the first partition, so
+    // we will compare it to itself, but it still works correctly.
+    const auto block_rows = blockRowsNumber(partition_end);
+    for (; partition_end.row < block_rows; ++partition_end.row)
+    {
+        size_t i = 0;
+        for (; i < n; i++)
+        {
+            const auto * ref = inputAt(current_row)[partition_by_indices[i]].get();
+            const auto * c = inputAt(partition_end)[partition_by_indices[i]].get();
+            if (c->compareAt(partition_end.row,
+                    current_row.row, *ref,
                     1 /* nan_direction_hint */) != 0)
+            {
+                break;
+            }
+        }
+
+        if (i < n)
+        {
+            partition_ended = true;
+            return;
+        }
+    }
+
+    // Went until the end of block, go to the next.
+    assert(partition_end.row == block_rows);
+    ++partition_end.block;
+    partition_end.row = 0;
+
+    // Went until the end of data and didn't find the new partition.
+    assert(!partition_ended && partition_end == blocksEnd());
+}
+
+auto WindowTransform::moveRowNumberNoCheck(const RowNumber & _x, int offset) const
+{
+    RowNumber x = _x;
+
+    if (offset > 0)
+    {
+        for (;;)
+        {
+            assertValid(x);
+            assert(offset >= 0);
+
+            const auto block_rows = blockRowsNumber(x);
+            x.row += offset;
+            if (x.row >= block_rows)
+            {
+                offset = x.row - block_rows;
+                x.row = 0;
+                x.block++;
+
+                if (x == blocksEnd())
                 {
                     break;
                 }
             }
-
-            if (first_inequal_column < partition_start_columns.size())
+            else
             {
-                // The new partition has started. Remember where.
-                new_partition = true;
-                partition_start_columns.clear();
-                for (const auto i : partition_by_indices)
-                {
-                    partition_start_columns.push_back(columns[i]);
-                }
-                partition_start_row = row;
+                offset = 0;
+                break;
             }
         }
+    }
+    else if (offset < 0)
+    {
+        for (;;)
+        {
+            assertValid(x);
+            assert(offset <= 0);
+
+            if (x.row >= static_cast<uint64_t>(-offset))
+            {
+                x.row -= -offset;
+                offset = 0;
+                break;
+            }
+
+            // Move to the first row in current block. Note that the offset is
+            // negative.
+            offset += x.row;
+            x.row = 0;
+
+            // Move to the last row of the previous block, if we are not at the
+            // first one. Offset also is incremented by one, because we pass over
+            // the first row of this block.
+            if (x.block == first_block_number)
+            {
+                break;
+            }
+
+            --x.block;
+            offset += 1;
+            x.row = blockRowsNumber(x) - 1;
+        }
+    }
+
+    return std::tuple{x, offset};
+}
+
+auto WindowTransform::moveRowNumber(const RowNumber & _x, int offset) const
+{
+    auto [x, o] = moveRowNumberNoCheck(_x, offset);
+
+#ifndef NDEBUG
+    // Check that it was reversible.
+    auto [xx, oo] = moveRowNumberNoCheck(x, -(offset - o));
+
+//    fmt::print(stderr, "{} -> {}, result {}, {}, new offset {}, twice {}, {}\n",
+//        _x, offset, x, o, -(offset - o), xx, oo);
+    assert(xx == _x);
+    assert(oo == 0);
+#endif
+
+    return std::tuple{x, o};
+}
+
+
+void WindowTransform::advanceFrameStartRowsOffset()
+{
+    // Just recalculate it each time by walking blocks.
+    const auto [moved_row, offset_left] = moveRowNumber(current_row,
+        window_description.frame.begin_offset
+            * (window_description.frame.begin_preceding ? -1 : 1));
+
+    frame_start = moved_row;
+
+    assertValid(frame_start);
+
+//    fmt::print(stderr, "frame start {} left {} partition start {}\n",
+//        frame_start, offset_left, partition_start);
+
+    if (frame_start <= partition_start)
+    {
+        // Got to the beginning of partition and can't go further back.
+        frame_start = partition_start;
+        frame_started = true;
+        return;
+    }
+
+    if (partition_end <= frame_start)
+    {
+        // A FOLLOWING frame start ran into the end of partition.
+        frame_start = partition_end;
+        frame_started = partition_ended;
+        return;
+    }
+
+    // Handled the equality case above. Now the frame start is inside the
+    // partition, if we walked all the offset, it's final.
+    assert(partition_start < frame_start);
+    frame_started = offset_left == 0;
+
+    // If we ran into the start of data (offset left is negative), we won't be
+    // able to make progress. Should have handled this case above.
+    assert(offset_left >= 0);
+}
+
+
+void WindowTransform::advanceFrameStartRangeOffset()
+{
+    // See the comment for advanceFrameEndRangeOffset().
+    const int direction = window_description.order_by[0].direction;
+    const bool preceding = window_description.frame.begin_preceding
+        == (direction > 0);
+    const auto * reference_column
+        = inputAt(current_row)[order_by_indices[0]].get();
+    for (; frame_start < partition_end; advanceRowNumber(frame_start))
+    {
+        // The first frame value is [current_row] with offset, so we advance
+        // while [frames_start] < [current_row] with offset.
+        const auto * compared_column
+            = inputAt(frame_start)[order_by_indices[0]].get();
+        if (compare_values_with_offset(compared_column, frame_start.row,
+            reference_column, current_row.row,
+            window_description.frame.begin_offset,
+            preceding)
+                * direction >= 0)
+        {
+            frame_started = true;
+            return;
+        }
+    }
+
+    frame_started = partition_ended;
+}
+
+void WindowTransform::advanceFrameStart()
+{
+    if (frame_started)
+    {
+        return;
+    }
+
+    const auto frame_start_before = frame_start;
+
+    switch (window_description.frame.begin_type)
+    {
+        case WindowFrame::BoundaryType::Unbounded:
+            // UNBOUNDED PRECEDING, just mark it valid. It is initialized when
+            // the new partition starts.
+            frame_started = true;
+            break;
+        case WindowFrame::BoundaryType::Current:
+            // CURRENT ROW differs between frame types only in how the peer
+            // groups are accounted.
+            assert(partition_start <= peer_group_start);
+            assert(peer_group_start < partition_end);
+            assert(peer_group_start <= current_row);
+            frame_start = peer_group_start;
+            frame_started = true;
+            break;
+        case WindowFrame::BoundaryType::Offset:
+            switch (window_description.frame.type)
+            {
+                case WindowFrame::FrameType::Rows:
+                    advanceFrameStartRowsOffset();
+                    break;
+                case WindowFrame::FrameType::Range:
+                    advanceFrameStartRangeOffset();
+                    break;
+                default:
+                    throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Frame start type '{}' for frame '{}' is not implemented",
+                        WindowFrame::toString(window_description.frame.begin_type),
+                        WindowFrame::toString(window_description.frame.type));
+            }
+            break;
+    }
+
+    assert(frame_start_before <= frame_start);
+    if (frame_start == frame_start_before)
+    {
+        // If the frame start didn't move, this means we validated that the frame
+        // starts at the point we reached earlier but were unable to validate.
+        // This probably only happens in degenerate cases where the frame start
+        // is further than the end of partition, and the partition ends at the
+        // last row of the block, but we can only tell for sure after a new
+        // block arrives. We still have to update the state of aggregate
+        // functions when the frame start becomes valid, so we continue.
+        assert(frame_started);
+    }
+
+    assert(partition_start <= frame_start);
+    assert(frame_start <= partition_end);
+    if (partition_ended && frame_start == partition_end)
+    {
+        // Check that if the start of frame (e.g. FOLLOWING) runs into the end
+        // of partition, it is marked as valid -- we can't advance it any
+        // further.
+        assert(frame_started);
+    }
+}
+
+bool WindowTransform::arePeers(const RowNumber & x, const RowNumber & y) const
+{
+    if (x == y)
+    {
+        // For convenience, a row is always its own peer.
+        return true;
+    }
+
+    if (window_description.frame.type == WindowFrame::FrameType::Rows)
+    {
+        // For ROWS frame, row is only peers with itself (checked above);
+        return false;
+    }
+
+    // For RANGE and GROUPS frames, rows that compare equal w/ORDER BY are peers.
+    assert(window_description.frame.type == WindowFrame::FrameType::Range);
+    const size_t n = order_by_indices.size();
+    if (n == 0)
+    {
+        // No ORDER BY, so all rows are peers.
+        return true;
+    }
+
+    size_t i = 0;
+    for (; i < n; i++)
+    {
+        const auto * column_x = inputAt(x)[order_by_indices[i]].get();
+        const auto * column_y = inputAt(y)[order_by_indices[i]].get();
+        if (column_x->compareAt(x.row, y.row, *column_y,
+                1 /* nan_direction_hint */) != 0)
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void WindowTransform::advanceFrameEndCurrentRow()
+{
+//    fmt::print(stderr, "starting from frame_end {}\n", frame_end);
+
+    // We only process one block here, and frame_end must be already in it: if
+    // we didn't find the end in the previous block, frame_end is now the first
+    // row of the current block. We need this knowledge to write a simpler loop
+    // (only loop over rows and not over blocks), that should hopefully be more
+    // efficient.
+    // partition_end is either in this new block or past-the-end.
+    assert(frame_end.block  == partition_end.block
+        || frame_end.block + 1 == partition_end.block);
+
+    if (frame_end == partition_end)
+    {
+        // The case when we get a new block and find out that the partition has
+        // ended.
+        assert(partition_ended);
+        frame_ended = partition_ended;
+        return;
+    }
+
+    // We advance until the partition end. It's either in the current block or
+    // in the next one, which is also the past-the-end block. Figure out how
+    // many rows we have to process.
+    uint64_t rows_end;
+    if (partition_end.row == 0)
+    {
+        assert(partition_end == blocksEnd());
+        rows_end = blockRowsNumber(frame_end);
+    }
+    else
+    {
+        assert(frame_end.block == partition_end.block);
+        rows_end = partition_end.row;
+    }
+    // Equality would mean "no data to process", for which we checked above.
+    assert(frame_end.row < rows_end);
+
+//    fmt::print(stderr, "first row {} last {}\n", frame_end.row, rows_end);
+
+    // Advance frame_end while it is still peers with the current row.
+    for (; frame_end.row < rows_end; ++frame_end.row)
+    {
+        if (!arePeers(current_row, frame_end))
+        {
+//            fmt::print(stderr, "{} and {} don't match\n", reference, frame_end);
+            frame_ended = true;
+            return;
+        }
+    }
+
+    // Might have gotten to the end of the current block, have to properly
+    // update the row number.
+    if (frame_end.row == blockRowsNumber(frame_end))
+    {
+        ++frame_end.block;
+        frame_end.row = 0;
+    }
+
+    // Got to the end of partition (frame ended as well then) or end of data.
+    assert(frame_end == partition_end);
+    frame_ended = partition_ended;
+}
+
+void WindowTransform::advanceFrameEndUnbounded()
+{
+    // The UNBOUNDED FOLLOWING frame ends when the partition ends.
+    frame_end = partition_end;
+    frame_ended = partition_ended;
+}
+
+void WindowTransform::advanceFrameEndRowsOffset()
+{
+    // Walk the specified offset from the current row. The "+1" is needed
+    // because the frame_end is a past-the-end pointer.
+    const auto [moved_row, offset_left] = moveRowNumber(current_row,
+        window_description.frame.end_offset
+            * (window_description.frame.end_preceding ? -1 : 1)
+            + 1);
+
+    if (partition_end <= moved_row)
+    {
+        // Clamp to the end of partition. It might not have ended yet, in which
+        // case wait for more data.
+        frame_end = partition_end;
+        frame_ended = partition_ended;
+        return;
+    }
+
+    if (moved_row <= partition_start)
+    {
+        // Clamp to the start of partition.
+        frame_end = partition_start;
+        frame_ended = true;
+        return;
+    }
+
+    // Frame end inside partition, if we walked all the offset, it's final.
+    frame_end = moved_row;
+    frame_ended = offset_left == 0;
+
+    // If we ran into the start of data (offset left is negative), we won't be
+    // able to make progress. Should have handled this case above.
+    assert(offset_left >= 0);
+}
+
+void WindowTransform::advanceFrameEndRangeOffset()
+{
+    // PRECEDING/FOLLOWING change direction for DESC order.
+    // See CD 9075-2:201?(E) 7.14 <window clause> p. 429.
+    const int direction = window_description.order_by[0].direction;
+    const bool preceding = window_description.frame.end_preceding
+        == (direction > 0);
+    const auto * reference_column
+        = inputAt(current_row)[order_by_indices[0]].get();
+    for (; frame_end < partition_end; advanceRowNumber(frame_end))
+    {
+        // The last frame value is current_row with offset, and we need a
+        // past-the-end pointer, so we advance while
+        // [frame_end] <= [current_row] with offset.
+        const auto * compared_column
+            = inputAt(frame_end)[order_by_indices[0]].get();
+        if (compare_values_with_offset(compared_column, frame_end.row,
+            reference_column, current_row.row,
+            window_description.frame.end_offset,
+            preceding)
+                * direction > 0)
+        {
+            frame_ended = true;
+            return;
+        }
+    }
+
+    frame_ended = partition_ended;
+}
+
+void WindowTransform::advanceFrameEnd()
+{
+    // No reason for this function to be called again after it succeeded.
+    assert(!frame_ended);
+
+    const auto frame_end_before = frame_end;
+
+    switch (window_description.frame.end_type)
+    {
+        case WindowFrame::BoundaryType::Current:
+            advanceFrameEndCurrentRow();
+            break;
+        case WindowFrame::BoundaryType::Unbounded:
+            advanceFrameEndUnbounded();
+            break;
+        case WindowFrame::BoundaryType::Offset:
+            switch (window_description.frame.type)
+            {
+                case WindowFrame::FrameType::Rows:
+                    advanceFrameEndRowsOffset();
+                    break;
+                case WindowFrame::FrameType::Range:
+                    advanceFrameEndRangeOffset();
+                    break;
+                default:
+                    throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "The frame end type '{}' is not implemented",
+                        WindowFrame::toString(window_description.frame.end_type));
+            }
+            break;
+    }
+
+//    fmt::print(stderr, "frame_end {} -> {}\n", frame_end_before, frame_end);
+
+    // We might not have advanced the frame end if we found out we reached the
+    // end of input or the partition, or if we still don't know the frame start.
+    if (frame_end_before == frame_end)
+    {
+        return;
+    }
+}
+
+// Update the aggregation states after the frame has changed.
+void WindowTransform::updateAggregationState()
+{
+//    fmt::print(stderr, "update agg states [{}, {}) -> [{}, {})\n",
+//        prev_frame_start, prev_frame_end, frame_start, frame_end);
+
+    // Assert that the frame boundaries are known, have proper order wrt each
+    // other, and have not gone back wrt the previous frame.
+    assert(frame_started);
+    assert(frame_ended);
+    assert(frame_start <= frame_end);
+    assert(prev_frame_start <= prev_frame_end);
+    assert(prev_frame_start <= frame_start);
+    assert(prev_frame_end <= frame_end);
+
+    // We might have to reset aggregation state and/or add some rows to it.
+    // Figure out what to do.
+    bool reset_aggregation = false;
+    RowNumber rows_to_add_start;
+    RowNumber rows_to_add_end;
+    if (frame_start == prev_frame_start)
+    {
+        // The frame start didn't change, add the tail rows.
+        reset_aggregation = false;
+        rows_to_add_start = prev_frame_end;
+        rows_to_add_end = frame_end;
+    }
+    else
+    {
+        // The frame start changed, reset the state and aggregate over the
+        // entire frame. This can be made per-function after we learn to
+        // subtract rows from some types of aggregation states, but for now we
+        // always have to reset when the frame start changes.
+        reset_aggregation = true;
+        rows_to_add_start = frame_start;
+        rows_to_add_end = frame_end;
+    }
+
+    for (auto & ws : workspaces)
+    {
+        if (ws.window_function_impl)
+        {
+            // No need to do anything for true window functions.
+            continue;
+        }
+
+        const auto * a = ws.aggregate_function.get();
+        auto * buf = ws.aggregate_function_state.data();
+
+        if (reset_aggregation)
+        {
+//            fmt::print(stderr, "(2) reset aggregation\n");
+            a->destroy(buf);
+            a->create(buf);
+        }
+
+        // To achieve better performance, we will have to loop over blocks and
+        // rows manually, instead of using advanceRowNumber().
+        // For this purpose, the past-the-end block can be different than the
+        // block of the past-the-end row (it's usually the next block).
+        const auto past_the_end_block = rows_to_add_end.row == 0
+            ? rows_to_add_end.block
+            : rows_to_add_end.block + 1;
+
+        for (auto block_number = rows_to_add_start.block;
+             block_number < past_the_end_block;
+             ++block_number)
+        {
+            auto & block = blockAt(block_number);
+
+            if (ws.cached_block_number != block_number)
+            {
+                for (size_t i = 0; i < ws.argument_column_indices.size(); ++i)
+                {
+                    ws.argument_columns[i] = block.input_columns[
+                        ws.argument_column_indices[i]].get();
+                }
+                ws.cached_block_number = block_number;
+            }
+
+            // First and last blocks may be processed partially, and other blocks
+            // are processed in full.
+            const auto first_row = block_number == rows_to_add_start.block
+                ? rows_to_add_start.row : 0;
+            const auto past_the_end_row = block_number == rows_to_add_end.block
+                ? rows_to_add_end.row : block.rows;
+
+            // We should add an addBatch analog that can accept a starting offset.
+            // For now, add the values one by one.
+            auto * columns = ws.argument_columns.data();
+            // Removing arena.get() from the loop makes it faster somehow...
+            auto * arena_ptr = arena.get();
+            for (auto row = first_row; row < past_the_end_row; ++row)
+            {
+                a->add(buf, columns, row, arena_ptr);
+            }
+        }
+    }
+
+    prev_frame_start = frame_start;
+    prev_frame_end = frame_end;
+}
+
+void WindowTransform::writeOutCurrentRow()
+{
+    assert(current_row < partition_end);
+    assert(current_row.block >= first_block_number);
+
+    const auto & block = blockAt(current_row);
+    for (size_t wi = 0; wi < workspaces.size(); ++wi)
+    {
+        auto & ws = workspaces[wi];
+        IColumn * result_column = block.output_columns[wi].get();
+
+        if (ws.window_function_impl)
+        {
+            ws.window_function_impl->windowInsertResultInto(*result_column, this);
+        }
+        else
+        {
+            const auto * a = ws.aggregate_function.get();
+            auto * buf = ws.aggregate_function_state.data();
+            // FIXME does it also allocate the result on the arena?
+            // We'll have to pass it out with blocks then...
+            a->insertResultInto(buf, *result_column, arena.get());
+        }
+    }
+}
+
+void WindowTransform::appendChunk(Chunk & chunk)
+{
+//    fmt::print(stderr, "new chunk, {} rows, finished={}\n", chunk.getNumRows(),
+//        input_is_finished);
+
+    // First, prepare the new input block and add it to the queue. We might not
+    // have it if it's end of data, though.
+    if (!input_is_finished)
+    {
+        assert(chunk.hasRows());
+        blocks.push_back({});
+        auto & block = blocks.back();
+        block.input_columns = chunk.detachColumns();
+
+        // Even in case of `count() over ()` we should have a dummy input column.
+        // Not sure how reliable this is...
+        block.rows = block.input_columns[0]->size();
+
+        for (auto & ws : workspaces)
+        {
+            // Aggregate functions can't work with constant columns, so we have to
+            // materialize them like the Aggregator does.
+            for (const auto column_index : ws.argument_column_indices)
+            {
+                block.input_columns[column_index]
+                    = std::move(block.input_columns[column_index])
+                        ->convertToFullColumnIfConst();
+            }
+
+            block.output_columns.push_back(ws.aggregate_function->getReturnType()
+                ->createColumn());
+            block.output_columns.back()->reserve(block.rows);
+        }
+    }
+
+    // Start the calculations. First, advance the partition end.
+    for (;;)
+    {
+        advancePartitionEnd();
+//        fmt::print(stderr, "partition [{}, {}), {}\n",
+//            partition_start, partition_end, partition_ended);
+
+        // Either we ran out of data or we found the end of partition (maybe
+        // both, but this only happens at the total end of data).
+        assert(partition_ended || partition_end == blocksEnd());
+        if (partition_ended && partition_end == blocksEnd())
+        {
+            assert(input_is_finished);
+        }
+
+        // After that, try to calculate window functions for each next row.
+        // We can continue until the end of partition or current end of data,
+        // which is precisely the definition of `partition_end`.
+        while (current_row < partition_end)
+        {
+//            fmt::print(stderr, "(1) row {} frame [{}, {}) {}, {}\n",
+//                current_row, frame_start, frame_end,
+//                frame_started, frame_ended);
+
+            // We now know that the current row is valid, so we can update the
+            // peer group start.
+            if (!arePeers(peer_group_start, current_row))
+            {
+                peer_group_start = current_row;
+                peer_group_start_row_number = current_row_number;
+                ++peer_group_number;
+            }
+
+            // Advance the frame start.
+            advanceFrameStart();
+
+            if (!frame_started)
+            {
+                // Wait for more input data to find the start of frame.
+                assert(!input_is_finished);
+                assert(!partition_ended);
+                return;
+            }
+
+            // frame_end must be greater or equal than frame_start, so if the
+            // frame_start is already past the current frame_end, we can start
+            // from it to save us some work.
+            if (frame_end < frame_start)
+            {
+                frame_end = frame_start;
+            }
+
+            // Advance the frame end.
+            advanceFrameEnd();
+
+            if (!frame_ended)
+            {
+                // Wait for more input data to find the end of frame.
+                assert(!input_is_finished);
+                assert(!partition_ended);
+                return;
+            }
+
+//            fmt::print(stderr, "(2) row {} frame [{}, {}) {}, {}\n",
+//                current_row, frame_start, frame_end,
+//                frame_started, frame_ended);
+
+            // The frame can be empty sometimes, e.g. the boundaries coincide
+            // or the start is after the partition end. But hopefully start is
+            // not after end.
+            assert(frame_started);
+            assert(frame_ended);
+            assert(frame_start <= frame_end);
+
+            // Now that we know the new frame boundaries, update the aggregation
+            // states. Theoretically we could do this simultaneously with moving
+            // the frame boundaries, but it would require some care not to
+            // perform unnecessary work while we are still looking for the frame
+            // start, so do it the simple way for now.
+            updateAggregationState();
+
+            // Write out the aggregation results.
+            writeOutCurrentRow();
+
+            // Move to the next row. The frame will have to be recalculated.
+            // The peer group start is updated at the beginning of the loop,
+            // because current_row might now be past-the-end.
+            advanceRowNumber(current_row);
+            ++current_row_number;
+            first_not_ready_row = current_row;
+            frame_ended = false;
+            frame_started = false;
+        }
+
+        if (input_is_finished)
+        {
+            // We finalized the last partition in the above loop, and don't have
+            // to do anything else.
+            return;
+        }
+
+        if (!partition_ended)
+        {
+            // Wait for more input data to find the end of partition.
+            // Assert that we processed all the data we currently have, and that
+            // we are going to receive more data.
+            assert(partition_end == blocksEnd());
+            assert(!input_is_finished);
+            break;
+        }
+
+        // Start the next partition.
+        partition_start = partition_end;
+        advanceRowNumber(partition_end);
+        partition_ended = false;
+        // We have to reset the frame and other pointers when the new partition
+        // starts.
+        frame_start = partition_start;
+        frame_end = partition_start;
+        prev_frame_start = partition_start;
+        prev_frame_end = partition_start;
+        assert(current_row == partition_start);
+        current_row_number = 1;
+        peer_group_start = partition_start;
+        peer_group_start_row_number = 1;
+        peer_group_number = 1;
+
+//        fmt::print(stderr, "reinitialize agg data at start of {}\n",
+//            new_partition_start);
+        // Reinitialize the aggregate function states because the new partition
+        // has started.
+        for (auto & ws : workspaces)
+        {
+            if (ws.window_function_impl)
+            {
+                continue;
+            }
+
+            const auto * a = ws.aggregate_function.get();
+            auto * buf = ws.aggregate_function_state.data();
+
+            a->destroy(buf);
+        }
+
+        // Release the arena we use for aggregate function states, so that it
+        // doesn't grow without limit. Not sure if it's actually correct, maybe
+        // it allocates the return values in the Arena as well...
+        if (arena)
+        {
+            arena = std::make_unique<Arena>();
+        }
 
         for (auto & ws : workspaces)
         {
-            const auto & f = ws.window_function;
-            const auto * a = f.aggregate_function.get();
-            auto * buf = ws.aggregate_function_state.data();
-
-            if (new_partition)
+            if (ws.window_function_impl)
             {
-                // Reset the aggregate function states.
-                a->destroy(buf);
-                a->create(buf);
+                continue;
             }
 
-            // Update the aggregate function state and save the result.
-            a->add(buf,
-                ws.argument_columns.data(),
-                row,
-                arena.get());
+            const auto * a = ws.aggregate_function.get();
+            auto * buf = ws.aggregate_function_state.data();
 
-            a->insertResultInto(buf,
-                *ws.result_column,
-                arena.get());
+            a->create(buf);
         }
     }
+}
 
-    // We have to release the mutable reference to the result column before we
-    // return this block, or else extra copying may occur when the subsequent
-    // processors modify the block. Workspaces live longer than individual blocks.
-    for (auto & ws : workspaces)
+IProcessor::Status WindowTransform::prepare()
+{
+//    fmt::print(stderr, "prepare, next output {}, not ready row {}, first block {}, hold {} blocks\n",
+//        next_output_block_number, first_not_ready_row, first_block_number,
+//        blocks.size());
+
+    if (output.isFinished())
     {
-        columns.push_back(std::move(ws.result_column));
+        // The consumer asked us not to continue (or we decided it ourselves),
+        // so we abort.
+        input.close();
+        return Status::Finished;
     }
 
-    chunk.setColumns(std::move(columns), num_rows);
+    if (output_data.exception)
+    {
+        // An exception occurred during processing.
+        output.pushData(std::move(output_data));
+        output.finish();
+        input.close();
+        return Status::Finished;
+    }
+
+    assert(first_not_ready_row.block >= first_block_number);
+    // The first_not_ready_row might be past-the-end if we have already
+    // calculated the window functions for all input rows. That's why the
+    // equality is also valid here.
+    assert(first_not_ready_row.block <= first_block_number + blocks.size());
+    assert(next_output_block_number >= first_block_number);
+
+    // Output the ready data prepared by work().
+    // We inspect the calculation state and create the output chunk right here,
+    // because this is pretty lightweight.
+    if (next_output_block_number < first_not_ready_row.block)
+    {
+        if (output.canPush())
+        {
+            // Output the ready block.
+//            fmt::print(stderr, "output block {}\n", next_output_block_number);
+            const auto i = next_output_block_number - first_block_number;
+            ++next_output_block_number;
+            auto & block = blocks[i];
+            auto columns = block.input_columns;
+            for (auto & res : block.output_columns)
+            {
+                columns.push_back(ColumnPtr(std::move(res)));
+            }
+            output_data.chunk.setColumns(columns, block.rows);
+
+            output.pushData(std::move(output_data));
+        }
+
+        // We don't need input.setNotNeeded() here, because we already pull with
+        // the set_not_needed flag.
+        return Status::PortFull;
+    }
+
+    if (input_is_finished)
+    {
+        // The input data ended at the previous prepare() + work() cycle,
+        // and we don't have ready output data (checked above). We must be
+        // finished.
+        assert(next_output_block_number == first_block_number + blocks.size());
+        assert(first_not_ready_row == blocksEnd());
+
+        // FIXME do we really have to do this?
+        output.finish();
+
+        return Status::Finished;
+    }
+
+    // Consume input data if we have any ready.
+    if (!has_input && input.hasData())
+    {
+        // Pulling with set_not_needed = true and using an explicit setNeeded()
+        // later is somewhat more efficient, because after the setNeeded(), the
+        // required input block will be generated in the same thread and passed
+        // to our prepare() + work() methods in the same thread right away, so
+        // hopefully we will work on hot (cached) data.
+        input_data = input.pullData(true /* set_not_needed */);
+
+        // If we got an exception from input, just return it and mark that we're
+        // finished.
+        if (input_data.exception)
+        {
+            output.pushData(std::move(input_data));
+            output.finish();
+
+            return Status::PortFull;
+        }
+
+        has_input = true;
+
+        // Now we have new input and can try to generate more output in work().
+        return Status::Ready;
+    }
+
+    // We 1) don't have any ready output (checked above),
+    // 2) don't have any more input (also checked above).
+    // Will we get any more input?
+    if (input.isFinished())
+    {
+        // We won't, time to finalize the calculation in work(). We should only
+        // do this once.
+        assert(!input_is_finished);
+        input_is_finished = true;
+        return Status::Ready;
+    }
+
+    // We have to wait for more input.
+    input.setNeeded();
+    return Status::NeedData;
+}
+
+void WindowTransform::work()
+{
+    // Exceptions should be skipped in prepare().
+    assert(!input_data.exception);
+
+    assert(has_input || input_is_finished);
+
+    try
+    {
+        has_input = false;
+        appendChunk(input_data.chunk);
+    }
+    catch (DB::Exception &)
+    {
+        output_data.exception = std::current_exception();
+        has_input = false;
+        return;
+    }
+
+    // We don't really have to keep the entire partition, and it can be big, so
+    // we want to drop the starting blocks to save memory.
+    // We can drop the old blocks if we already returned them as output, and the
+    // frame and the current row are already past them. Note that the frame
+    // start can be further than current row for some frame specs (e.g. EXCLUDE
+    // CURRENT ROW), so we have to check both.
+    const auto first_used_block = std::min(next_output_block_number,
+        std::min(frame_start.block, current_row.block));
+
+    if (first_block_number < first_used_block)
+    {
+//        fmt::print(stderr, "will drop blocks from {} to {}\n", first_block_number,
+//            first_used_block);
+
+        blocks.erase(blocks.begin(),
+            blocks.begin() + (first_used_block - first_block_number));
+        first_block_number = first_used_block;
+
+        assert(next_output_block_number >= first_block_number);
+        assert(frame_start.block >= first_block_number);
+        assert(current_row.block >= first_block_number);
+        assert(peer_group_start.block >= first_block_number);
+    }
+}
+
+// A basic implementation for a true window function. It pretends to be an
+// aggregate function, but refuses to work as such.
+struct WindowFunction
+    : public IAggregateFunctionHelper<WindowFunction>
+    , public IWindowFunction
+{
+    std::string name;
+
+    WindowFunction(const std::string & name_, const DataTypes & argument_types_,
+               const Array & parameters_)
+        : IAggregateFunctionHelper<WindowFunction>(argument_types_, parameters_)
+        , name(name_)
+    {}
+
+    IWindowFunction * asWindowFunction() override { return this; }
+
+    [[noreturn]] void fail() const
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "The function '{}' can only be used as a window function, not as an aggregate function",
+            getName());
+    }
+
+    String getName() const override { return name; }
+    void create(AggregateDataPtr __restrict) const override { fail(); }
+    void destroy(AggregateDataPtr __restrict) const noexcept override {}
+    bool hasTrivialDestructor() const override { return true; }
+    size_t sizeOfData() const override { return 0; }
+    size_t alignOfData() const override { return 1; }
+    void add(AggregateDataPtr __restrict, const IColumn **, size_t, Arena *) const override { fail(); }
+    void merge(AggregateDataPtr __restrict, ConstAggregateDataPtr, Arena *) const override { fail(); }
+    void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &) const override { fail(); }
+    void deserialize(AggregateDataPtr __restrict, ReadBuffer &, Arena *) const override { fail(); }
+    void insertResultInto(AggregateDataPtr __restrict, IColumn &, Arena *) const override { fail(); }
+};
+
+struct WindowFunctionRank final : public WindowFunction
+{
+    WindowFunctionRank(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : WindowFunction(name_, argument_types_, parameters_)
+    {}
+
+    DataTypePtr getReturnType() const override
+    { return std::make_shared<DataTypeUInt64>(); }
+
+    void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(
+            transform->peer_group_start_row_number);
+    }
+};
+
+struct WindowFunctionDenseRank final : public WindowFunction
+{
+    WindowFunctionDenseRank(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : WindowFunction(name_, argument_types_, parameters_)
+    {}
+
+    DataTypePtr getReturnType() const override
+    { return std::make_shared<DataTypeUInt64>(); }
+
+    void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(
+            transform->peer_group_number);
+    }
+};
+
+struct WindowFunctionRowNumber final : public WindowFunction
+{
+    WindowFunctionRowNumber(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : WindowFunction(name_, argument_types_, parameters_)
+    {}
+
+    DataTypePtr getReturnType() const override
+    { return std::make_shared<DataTypeUInt64>(); }
+
+    void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(
+            transform->current_row_number);
+    }
+};
+
+void registerWindowFunctions(AggregateFunctionFactory & factory)
+{
+    // Why didn't I implement lag/lead yet? Because they are a mess. I imagine
+    // they are from the older generation of window functions, when the concept
+    // of frame was not yet invented, so they ignore the frame and use the
+    // partition instead. This means we have to track a separate frame for
+    // these functions, which would  make the window transform completely
+    // impenetrable to human mind. We can't just get away with materializing
+    // the whole partition like Postgres does, because using a linear amount
+    // of additional memory is not an option when we have a lot of data. We must
+    // be able to process at least the lag/lead in streaming fashion.
+    // Our best bet is probably rewriting, say `lag(value, offset)` to
+    // `any(value) over (rows between offset preceding and offset preceding)`,
+    // at the query planning stage.
+    // Functions like cume_dist() do require materializing the entire
+    // partition, but it's probably also simpler to implement them by rewriting
+    // to a (rows between unbounded preceding and unbounded following) frame,
+    // instead of adding separate logic for them.
+
+    factory.registerFunction("rank", [](const std::string & name,
+            const DataTypes & argument_types, const Array & parameters)
+        {
+            return std::make_shared<WindowFunctionRank>(name, argument_types,
+                parameters);
+        });
+
+    factory.registerFunction("dense_rank", [](const std::string & name,
+            const DataTypes & argument_types, const Array & parameters)
+        {
+            return std::make_shared<WindowFunctionDenseRank>(name, argument_types,
+                parameters);
+        });
+
+    factory.registerFunction("row_number", [](const std::string & name,
+            const DataTypes & argument_types, const Array & parameters)
+        {
+            return std::make_shared<WindowFunctionRowNumber>(name, argument_types,
+                parameters);
+        });
 }
 
 }
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 3d284263171..5001b984e9a 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -1,10 +1,13 @@
 #pragma once
-#include <Processors/ISimpleTransform.h>
 
-#include <Interpreters/AggregateDescription.h>
+#include <Interpreters/WindowDescription.h>
+
+#include <Processors/IProcessor.h>
 
 #include <Common/AlignedBuffer.h>
 
+#include <deque>
+
 namespace DB
 {
 
@@ -13,23 +16,71 @@ using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
 
 class Arena;
 
-// Runtime data for computing one window function
+// Runtime data for computing one window function.
 struct WindowFunctionWorkspace
 {
-    WindowFunctionDescription window_function;
-    AlignedBuffer aggregate_function_state;
+    AggregateFunctionPtr aggregate_function;
+
+    // This field is set for pure window functions. When set, we ignore the
+    // window_function.aggregate_function, and work through this interface
+    // instead.
+    IWindowFunction * window_function_impl = nullptr;
+
     std::vector<size_t> argument_column_indices;
 
-    // Argument and result columns. Be careful, they are per-chunk.
+    // Will not be initialized for a pure window function.
+    AlignedBuffer aggregate_function_state;
+
+    // Argument columns. Be careful, this is a per-block cache.
     std::vector<const IColumn *> argument_columns;
-    MutableColumnPtr result_column;
+    uint64_t cached_block_number = std::numeric_limits<uint64_t>::max();
+};
+
+struct WindowTransformBlock
+{
+    Columns input_columns;
+    MutableColumns output_columns;
+
+    size_t rows = 0;
+};
+
+struct RowNumber
+{
+    uint64_t block = 0;
+    uint64_t row = 0;
+
+    bool operator < (const RowNumber & other) const
+    {
+        return block < other.block
+            || (block == other.block && row < other.row);
+    }
+
+    bool operator == (const RowNumber & other) const
+    {
+        return block == other.block && row == other.row;
+    }
+
+    bool operator <= (const RowNumber & other) const
+    {
+        return *this < other || *this == other;
+    }
 };
 
 /*
  * Computes several window functions that share the same window. The input must
- * be sorted correctly for this window (PARTITION BY, then ORDER BY).
+ * be sorted by PARTITION BY (in any order), then by ORDER BY.
+ * We need to track the following pointers:
+ * 1) boundaries of partition -- rows that compare equal w/PARTITION BY.
+ * 2) current row for which we will compute the window functions.
+ * 3) boundaries of the frame for this row.
+ * Both the peer group and the frame are inside the partition, but can have any
+ * position relative to each other.
+ * All pointers only move forward. For partition boundaries, this is ensured by
+ * the order of input data. This property also trivially holds for the ROWS and
+ * GROUPS frames. For the RANGE frame, the proof requires the additional fact
+ * that the ranges are specified in terms of (the single) ORDER BY column.
  */
-class WindowTransform : public ISimpleTransform
+class WindowTransform : public IProcessor /* public ISimpleTransform */
 {
 public:
     WindowTransform(
@@ -37,7 +88,7 @@ public:
             const Block & output_header_,
             const WindowDescription & window_description_,
             const std::vector<WindowFunctionDescription> &
-                window_function_descriptions);
+                functions);
 
     ~WindowTransform() override;
 
@@ -48,30 +99,256 @@ public:
 
     static Block transformHeader(Block header, const ExpressionActionsPtr & expression);
 
-    void transform(Chunk & chunk) override;
+    /*
+     * (former) Implementation of ISimpleTransform.
+     */
+    void appendChunk(Chunk & chunk) /*override*/;
+
+    /*
+     * Implementation of IProcessor;
+     */
+    Status prepare() override;
+    void work() override;
+
+private:
+    void advancePartitionEnd();
+
+    bool arePeers(const RowNumber & x, const RowNumber & y) const;
+
+    void advanceFrameStartRowsOffset();
+    void advanceFrameStartRangeOffset();
+    void advanceFrameStart();
+
+    void advanceFrameEndRowsOffset();
+    void advanceFrameEndCurrentRow();
+    void advanceFrameEndUnbounded();
+    void advanceFrameEnd();
+    void advanceFrameEndRangeOffset();
+
+    void updateAggregationState();
+    void writeOutCurrentRow();
+
+    Columns & inputAt(const RowNumber & x)
+    {
+        assert(x.block >= first_block_number);
+        assert(x.block - first_block_number < blocks.size());
+        return blocks[x.block - first_block_number].input_columns;
+    }
+
+    const Columns & inputAt(const RowNumber & x) const
+    { return const_cast<WindowTransform *>(this)->inputAt(x); }
+
+    auto & blockAt(const uint64_t block_number)
+    {
+        assert(block_number >= first_block_number);
+        assert(block_number - first_block_number < blocks.size());
+        return blocks[block_number - first_block_number];
+    }
+
+    const auto & blockAt(const uint64_t block_number) const
+    { return const_cast<WindowTransform *>(this)->blockAt(block_number); }
+
+    auto & blockAt(const RowNumber & x)
+    { return blockAt(x.block); }
+
+    const auto & blockAt(const RowNumber & x) const
+    { return const_cast<WindowTransform *>(this)->blockAt(x); }
+
+    size_t blockRowsNumber(const RowNumber & x) const
+    {
+        return blockAt(x).rows;
+    }
+
+    MutableColumns & outputAt(const RowNumber & x)
+    {
+        assert(x.block >= first_block_number);
+        assert(x.block - first_block_number < blocks.size());
+        return blocks[x.block - first_block_number].output_columns;
+    }
+
+    void advanceRowNumber(RowNumber & x) const
+    {
+        assert(x.block >= first_block_number);
+        assert(x.block - first_block_number < blocks.size());
+
+        const auto block_rows = blockAt(x).rows;
+        assert(x.row < block_rows);
+
+        x.row++;
+        if (x.row < block_rows)
+        {
+            return;
+        }
+
+        x.row = 0;
+        ++x.block;
+    }
+
+    void retreatRowNumber(RowNumber & x) const
+    {
+        if (x.row > 0)
+        {
+            --x.row;
+            return;
+        }
+
+        --x.block;
+        assert(x.block >= first_block_number);
+        assert(x.block < first_block_number + blocks.size());
+        assert(blockAt(x).rows > 0);
+        x.row = blockAt(x).rows - 1;
+
+#ifndef NDEBUG
+        auto xx = x;
+        advanceRowNumber(xx);
+        assert(xx == x);
+#endif
+    }
+
+    auto moveRowNumber(const RowNumber & _x, int offset) const;
+    auto moveRowNumberNoCheck(const RowNumber & _x, int offset) const;
+
+    void assertValid(const RowNumber & x) const
+    {
+        assert(x.block >= first_block_number);
+        if (x.block == first_block_number + blocks.size())
+        {
+            assert(x.row == 0);
+        }
+        else
+        {
+            assert(x.row < blockRowsNumber(x));
+        }
+    }
+
+    RowNumber blocksEnd() const
+    { return RowNumber{first_block_number + blocks.size(), 0}; }
+
+    RowNumber blocksBegin() const
+    { return RowNumber{first_block_number, 0}; }
 
 public:
+    /*
+     * Data (formerly) inherited from ISimpleTransform, needed for the
+     * implementation of the IProcessor interface.
+     */
+    InputPort & input;
+    OutputPort & output;
+
+    bool has_input = false;
+    bool input_is_finished = false;
+    Port::Data input_data;
+    bool has_output = false;
+    Port::Data output_data;
+
+    /*
+     * Data for window transform itself.
+     */
     Block input_header;
 
     WindowDescription window_description;
 
     // Indices of the PARTITION BY columns in block.
     std::vector<size_t> partition_by_indices;
+    // Indices of the ORDER BY columns in block;
+    std::vector<size_t> order_by_indices;
 
-    // The columns for PARTITION BY and the row in these columns where the
-    // current partition started. They might be in some of the previous blocks,
-    // so we have to keep the shared ownership of the columns. We don't keep the
-    // entire block to save memory, only the needed columns, in the same order
-    // as the partition_by_indices array.
-    // Can be empty if there is no PARTITION BY.
-    // Columns are nullptr when it is the first partition.
-    std::vector<ColumnPtr> partition_start_columns;
-    size_t partition_start_row = 0;
-
-    // Data for computing the window functions.
+    // Per-window-function scratch spaces.
     std::vector<WindowFunctionWorkspace> workspaces;
 
+    // FIXME Reset it when the partition changes. We only save the temporary
+    // states in it (probably?).
     std::unique_ptr<Arena> arena;
+
+    // A sliding window of blocks we currently need. We add the input blocks as
+    // they arrive, and discard the blocks we don't need anymore. The blocks
+    // have an always-incrementing index. The index of the first block is in
+    // `first_block_number`.
+    std::deque<WindowTransformBlock> blocks;
+    uint64_t first_block_number = 0;
+    // The next block we are going to pass to the consumer.
+    uint64_t next_output_block_number = 0;
+    // The first row for which we still haven't calculated the window functions.
+    // Used to determine which resulting blocks we can pass to the consumer.
+    RowNumber first_not_ready_row;
+
+    // Boundaries of the current partition.
+    // partition_start doesn't point to a valid block, because we want to drop
+    // the blocks early to save memory. We still have to track it so that we can
+    // cut off a PRECEDING frame at the partition start.
+    // The `partition_end` is past-the-end, as usual. When
+    // partition_ended = false, it still haven't ended, and partition_end is the
+    // next row to check.
+    RowNumber partition_start;
+    RowNumber partition_end;
+    bool partition_ended = false;
+
+    // The row for which we are now computing the window functions.
+    RowNumber current_row;
+    // The start of current peer group, needed for CURRENT ROW frame start.
+    // For ROWS frame, always equal to the current row, and for RANGE and GROUP
+    // frames may be earlier.
+    RowNumber peer_group_start;
+
+    // Row and group numbers in partition for calculating rank() and friends.
+    uint64_t current_row_number = 1;
+    uint64_t peer_group_start_row_number = 1;
+    uint64_t peer_group_number = 1;
+
+    // The frame is [frame_start, frame_end) if frame_ended && frame_started,
+    // and unknown otherwise. Note that when we move to the next row, both the
+    // frame_start and the frame_end may jump forward by an unknown amount of
+    // blocks, e.g. if we use a RANGE frame. This means that sometimes we don't
+    // know neither frame_end nor frame_start.
+    // We update the states of the window functions after we find the final frame
+    // boundaries.
+    // After we have found the final boundaries of the frame, we can immediately
+    // output the result for the current row, w/o waiting for more data.
+    RowNumber frame_start;
+    RowNumber frame_end;
+    bool frame_ended = false;
+    bool frame_started = false;
+
+    // The previous frame boundaries that correspond to the current state of the
+    // aggregate function. We use them to determine how to update the aggregation
+    // state after we find the new frame.
+    RowNumber prev_frame_start;
+    RowNumber prev_frame_end;
+
+    // Comparison function for RANGE OFFSET frames. We choose the appropriate
+    // overload once, based on the type of the ORDER BY column. Choosing it for
+    // each row would be slow.
+    int (* compare_values_with_offset) (
+        const IColumn * compared_column, size_t compared_row,
+        const IColumn * reference_column, size_t reference_row,
+        // We can make it a Field later if we need the Decimals. Now we only
+        // have ints and datetime, and the underlying Field type for them is
+        // uint64_t anyway.
+        uint64_t offset,
+        bool offset_is_preceding);
 };
 
 }
+
+/// See https://fmt.dev/latest/api.html#formatting-user-defined-types
+template <>
+struct fmt::formatter<DB::RowNumber>
+{
+    constexpr auto parse(format_parse_context & ctx)
+    {
+        auto it = ctx.begin();
+        auto end = ctx.end();
+
+        /// Only support {}.
+        if (it != end && *it != '}')
+            throw format_error("invalid format");
+
+        return it;
+    }
+
+    template <typename FormatContext>
+    auto format(const DB::RowNumber & x, FormatContext & ctx)
+    {
+        return format_to(ctx.out(), "{}:{}", x.block, x.row);
+    }
+};
diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index 2eb27be8899..34ff61d03c5 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -92,9 +92,7 @@ SRCS(
     Pipe.cpp
     Port.cpp
     QueryPipeline.cpp
-    QueryPlan/AddingConstColumnStep.cpp
     QueryPlan/AddingDelayedSourceStep.cpp
-    QueryPlan/AddingMissedStep.cpp
     QueryPlan/AggregatingStep.cpp
     QueryPlan/ArrayJoinStep.cpp
     QueryPlan/CreatingSetsStep.cpp
@@ -116,7 +114,13 @@ SRCS(
     QueryPlan/MergingFinal.cpp
     QueryPlan/MergingSortedStep.cpp
     QueryPlan/OffsetStep.cpp
+    QueryPlan/Optimizations/liftUpArrayJoin.cpp
+    QueryPlan/Optimizations/limitPushDown.cpp
+    QueryPlan/Optimizations/mergeExpressions.cpp
+    QueryPlan/Optimizations/optimizeTree.cpp
+    QueryPlan/Optimizations/splitFilter.cpp
     QueryPlan/PartialSortingStep.cpp
+    QueryPlan/QueryIdHolder.cpp
     QueryPlan/QueryPlan.cpp
     QueryPlan/ReadFromPreparedSource.cpp
     QueryPlan/ReadNothingStep.cpp
@@ -132,7 +136,6 @@ SRCS(
     Sources/SinkToOutputStream.cpp
     Sources/SourceFromInputStream.cpp
     Sources/SourceWithProgress.cpp
-    Transforms/AddingMissedTransform.cpp
     Transforms/AddingSelectorTransform.cpp
     Transforms/AggregatingInOrderTransform.cpp
     Transforms/AggregatingTransform.cpp
diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp
index 475bfc81801..ede9bbff063 100644
--- a/src/Server/GRPCServer.cpp
+++ b/src/Server/GRPCServer.cpp
@@ -652,7 +652,6 @@ namespace
 
         /// Create context.
         query_context.emplace(iserver.context());
-        query_scope.emplace(*query_context);
 
         /// Authentication.
         query_context->setUser(user, password, user_address);
@@ -670,6 +669,8 @@ namespace
             query_context->setSessionContext(session->context);
         }
 
+        query_scope.emplace(*query_context);
+
         /// Set client info.
         ClientInfo & client_info = query_context->getClientInfo();
         client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY;
@@ -1613,7 +1614,10 @@ private:
 
 
 GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & address_to_listen_)
-    : iserver(iserver_), address_to_listen(address_to_listen_), log(&Poco::Logger::get("GRPCServer"))
+    : iserver(iserver_)
+    , address_to_listen(address_to_listen_)
+    , log(&Poco::Logger::get("GRPCServer"))
+    , runner(std::make_unique<Runner>(*this))
 {}
 
 GRPCServer::~GRPCServer()
@@ -1644,7 +1648,6 @@ void GRPCServer::start()
 
     queue = builder.AddCompletionQueue();
     grpc_server = builder.BuildAndStart();
-    runner = std::make_unique<Runner>(*this);
     runner->start();
 }
 
diff --git a/src/Server/HTTP/HTMLForm.cpp b/src/Server/HTTP/HTMLForm.cpp
new file mode 100644
index 00000000000..ca407858c33
--- /dev/null
+++ b/src/Server/HTTP/HTMLForm.cpp
@@ -0,0 +1,381 @@
+#include <Server/HTTP/HTMLForm.h>
+
+#include <IO/EmptyReadBuffer.h>
+#include <IO/ReadBufferFromString.h>
+#include <Server/HTTP/ReadHeaders.h>
+
+#include <Poco/CountingStream.h>
+#include <Poco/Net/MultipartReader.h>
+#include <Poco/Net/MultipartWriter.h>
+#include <Poco/Net/NetException.h>
+#include <Poco/Net/NullPartHandler.h>
+#include <Poco/NullStream.h>
+#include <Poco/StreamCopier.h>
+#include <Poco/UTF8String.h>
+
+#include <sstream>
+
+
+namespace DB
+{
+
+namespace
+{
+
+class NullPartHandler : public HTMLForm::PartHandler
+{
+public:
+    void handlePart(const Poco::Net::MessageHeader &, ReadBuffer &) override {}
+};
+
+}
+
+const std::string HTMLForm::ENCODING_URL = "application/x-www-form-urlencoded";
+const std::string HTMLForm::ENCODING_MULTIPART = "multipart/form-data";
+const int HTMLForm::UNKNOWN_CONTENT_LENGTH = -1;
+
+
+HTMLForm::HTMLForm() : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH), encoding(ENCODING_URL)
+{
+}
+
+
+HTMLForm::HTMLForm(const std::string & encoding_)
+    : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH), encoding(encoding_)
+{
+}
+
+
+HTMLForm::HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler)
+    : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH)
+{
+    load(request, requestBody, handler);
+}
+
+
+HTMLForm::HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody)
+    : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH)
+{
+    load(request, requestBody);
+}
+
+
+HTMLForm::HTMLForm(const Poco::Net::HTTPRequest & request) : HTMLForm(Poco::URI(request.getURI()))
+{
+}
+
+HTMLForm::HTMLForm(const Poco::URI & uri) : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH)
+{
+    ReadBufferFromString istr(uri.getRawQuery()); // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+    readQuery(istr);
+}
+
+
+void HTMLForm::setEncoding(const std::string & encoding_)
+{
+    encoding = encoding_;
+}
+
+
+void HTMLForm::addPart(const std::string & name, Poco::Net::PartSource * source)
+{
+    poco_check_ptr(source);
+
+    Part part;
+    part.name = name;
+    part.source = std::unique_ptr<Poco::Net::PartSource>(source);
+    parts.push_back(std::move(part));
+}
+
+
+void HTMLForm::load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler)
+{
+    clear();
+
+    Poco::URI uri(request.getURI());
+    const std::string & query = uri.getRawQuery();
+    if (!query.empty())
+    {
+        ReadBufferFromString istr(query);
+        readQuery(istr);
+    }
+
+    if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST || request.getMethod() == Poco::Net::HTTPRequest::HTTP_PUT)
+    {
+        std::string media_type;
+        NameValueCollection params;
+        Poco::Net::MessageHeader::splitParameters(request.getContentType(), media_type, params);
+        encoding = media_type;
+        if (encoding == ENCODING_MULTIPART)
+        {
+            boundary = params["boundary"];
+            readMultipart(requestBody, handler);
+        }
+        else
+        {
+            readQuery(requestBody);
+        }
+    }
+}
+
+
+void HTMLForm::load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody)
+{
+    NullPartHandler nah;
+    load(request, requestBody, nah);
+}
+
+
+void HTMLForm::load(const Poco::Net::HTTPRequest & request)
+{
+    NullPartHandler nah;
+    EmptyReadBuffer nis;
+    load(request, nis, nah);
+}
+
+
+void HTMLForm::read(ReadBuffer & in, PartHandler & handler)
+{
+    if (encoding == ENCODING_URL)
+        readQuery(in);
+    else
+        readMultipart(in, handler);
+}
+
+
+void HTMLForm::read(ReadBuffer & in)
+{
+    readQuery(in);
+}
+
+
+void HTMLForm::read(const std::string & queryString)
+{
+    ReadBufferFromString istr(queryString);
+    readQuery(istr);
+}
+
+
+void HTMLForm::readQuery(ReadBuffer & in)
+{
+    size_t fields = 0;
+    char ch = 0;  // silence "uninitialized" warning from gcc-*
+    bool is_first = true;
+
+    while (true)
+    {
+        if (field_limit > 0 && fields == field_limit)
+            throw Poco::Net::HTMLFormException("Too many form fields");
+
+        std::string name;
+        std::string value;
+
+        while (in.read(ch) && ch != '=' && ch != '&')
+        {
+            if (ch == '+')
+                ch = ' ';
+            if (name.size() < MAX_NAME_LENGTH)
+                name += ch;
+            else
+                throw Poco::Net::HTMLFormException("Field name too long");
+        }
+
+        if (ch == '=')
+        {
+            while (in.read(ch) && ch != '&')
+            {
+                if (ch == '+')
+                    ch = ' ';
+                if (value.size() < value_length_limit)
+                    value += ch;
+                else
+                    throw Poco::Net::HTMLFormException("Field value too long");
+            }
+        }
+
+        // Remove UTF-8 BOM from first name, if present
+        if (is_first)
+            Poco::UTF8::removeBOM(name);
+
+        std::string decoded_name;
+        std::string decoded_value;
+        Poco::URI::decode(name, decoded_name);
+        Poco::URI::decode(value, decoded_value);
+        add(decoded_name, decoded_value);
+        ++fields;
+
+        is_first = false;
+
+        if (in.eof())
+            break;
+    }
+}
+
+
+void HTMLForm::readMultipart(ReadBuffer & in_, PartHandler & handler)
+{
+    /// Assume there is always a boundary provided.
+    assert(!boundary.empty());
+
+    size_t fields = 0;
+    MultipartReadBuffer in(in_, boundary);
+
+    /// Assume there is at least one part
+    in.skipToNextBoundary();
+
+    /// Read each part until next boundary (or last boundary)
+    while (!in.eof())
+    {
+        if (field_limit && fields > field_limit)
+            throw Poco::Net::HTMLFormException("Too many form fields");
+
+        Poco::Net::MessageHeader header;
+        readHeaders(header, in);
+        skipToNextLineOrEOF(in);
+
+        NameValueCollection params;
+        if (header.has("Content-Disposition"))
+        {
+            std::string unused;
+            Poco::Net::MessageHeader::splitParameters(header.get("Content-Disposition"), unused, params);
+        }
+
+        if (params.has("filename"))
+            handler.handlePart(header, in);
+        else
+        {
+            std::string name = params["name"];
+            std::string value;
+            char ch;
+
+            while (in.read(ch))
+            {
+                if (value.size() > value_length_limit)
+                    throw Poco::Net::HTMLFormException("Field value too long");
+                value += ch;
+            }
+
+            add(name, value);
+        }
+
+        ++fields;
+
+        /// If we already encountered EOF for the buffer |in|, it's possible that the next symbol is a start of boundary line.
+        /// In this case reading the boundary line will reset the EOF state, potentially breaking invariant of EOF idempotency -
+        /// if there is such invariant in the first place.
+        if (!in.skipToNextBoundary())
+            break;
+    }
+}
+
+
+void HTMLForm::setFieldLimit(int limit)
+{
+    poco_assert(limit >= 0);
+
+    field_limit = limit;
+}
+
+
+void HTMLForm::setValueLengthLimit(int limit)
+{
+    poco_assert(limit >= 0);
+
+    value_length_limit = limit;
+}
+
+
+HTMLForm::MultipartReadBuffer::MultipartReadBuffer(ReadBuffer & in_, const std::string & boundary_)
+    : ReadBuffer(nullptr, 0), in(in_), boundary("--" + boundary_)
+{
+    /// For consistency with |nextImpl()|
+    position() = in.position();
+}
+
+bool HTMLForm::MultipartReadBuffer::skipToNextBoundary()
+{
+    assert(working_buffer.empty() || eof());
+    assert(boundary_hit);
+
+    boundary_hit = false;
+
+    while (!in.eof())
+    {
+        auto line = readLine();
+        if (startsWith(line, boundary))
+        {
+            set(in.position(), 0);
+            next();  /// We need to restrict our buffer to size of next available line.
+            return !startsWith(line, boundary + "--");
+        }
+    }
+
+    throw Poco::Net::HTMLFormException("No boundary line found");
+}
+
+std::string HTMLForm::MultipartReadBuffer::readLine(bool strict)
+{
+    std::string line;
+    char ch = 0;  // silence "uninitialized" warning from gcc-*
+
+    while (in.read(ch) && ch != '\r' && ch != '\n')
+        line += ch;
+
+    if (in.eof())
+    {
+        if (strict)
+            throw Poco::Net::HTMLFormException("Unexpected end of message");
+        return line;
+    }
+
+    line += ch;
+
+    if (ch == '\r')
+    {
+        if (!in.read(ch) || ch != '\n')
+            throw Poco::Net::HTMLFormException("No CRLF found");
+        else
+            line += ch;
+    }
+
+    return line;
+}
+
+bool HTMLForm::MultipartReadBuffer::nextImpl()
+{
+    if (boundary_hit)
+        return false;
+
+    assert(position() >= in.position());
+
+    in.position() = position();
+
+    /// We expect to start from the first symbol after EOL, so we can put checkpoint
+    /// and safely try to read til the next EOL and check for boundary.
+    in.setCheckpoint();
+
+    /// FIXME: there is an extra copy because we cannot traverse PeekableBuffer from checkpoint to position()
+    ///        since it may store different data parts in different sub-buffers,
+    ///        anyway calling makeContinuousMemoryFromCheckpointToPos() will also make an extra copy.
+    std::string line = readLine(false);
+
+    /// According to RFC2046 the preceding CRLF is a part of boundary line.
+    if (line == "\r\n")
+    {
+        line = readLine(false);
+        boundary_hit = startsWith(line, boundary);
+        if (!boundary_hit) line = "\r\n";
+    }
+    else
+        boundary_hit = startsWith(line, boundary);
+
+    in.rollbackToCheckpoint(true);
+
+    /// Rolling back to checkpoint may change underlying buffers.
+    /// Limit readable data to a single line.
+    BufferBase::set(in.position(), line.size(), 0);
+
+    return !boundary_hit && !line.empty();
+}
+
+}
diff --git a/src/Server/HTTP/HTMLForm.h b/src/Server/HTTP/HTMLForm.h
new file mode 100644
index 00000000000..27be712e1d5
--- /dev/null
+++ b/src/Server/HTTP/HTMLForm.h
@@ -0,0 +1,175 @@
+#pragma once
+
+#include <IO/PeekableReadBuffer.h>
+#include <IO/ReadHelpers.h>
+
+#include <boost/noncopyable.hpp>
+#include <Poco/Net/HTTPRequest.h>
+#include <Poco/Net/NameValueCollection.h>
+#include <Poco/Net/PartSource.h>
+#include <Poco/URI.h>
+
+namespace DB
+{
+
+class HTMLForm : public Poco::Net::NameValueCollection, private boost::noncopyable
+{
+public:
+    class PartHandler;
+
+    enum Options
+    {
+        OPT_USE_CONTENT_LENGTH = 0x01 // don't use Chunked Transfer-Encoding for multipart requests.
+    };
+
+    /// Creates an empty HTMLForm and sets the
+    /// encoding to "application/x-www-form-urlencoded".
+    HTMLForm();
+
+    /// Creates an empty HTMLForm that uses the given encoding.
+    /// Encoding must be either "application/x-www-form-urlencoded" (which is the default) or "multipart/form-data".
+    explicit HTMLForm(const std::string & encoding);
+
+    /// Creates a HTMLForm from the given HTTP request.
+    /// Uploaded files are passed to the given PartHandler.
+    HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler);
+
+    /// Creates a HTMLForm from the given HTTP request.
+    /// Uploaded files are silently discarded.
+    HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody);
+
+    /// Creates a HTMLForm from the given HTTP request.
+    /// The request must be a GET request and the form data must be in the query string (URL encoded).
+    /// For POST requests, you must use one of the constructors taking an additional input stream for the request body.
+    explicit HTMLForm(const Poco::Net::HTTPRequest & request);
+
+    explicit HTMLForm(const Poco::URI & uri);
+
+    template <typename T>
+    T getParsed(const std::string & key, T default_value)
+    {
+        auto it = find(key);
+        return (it != end()) ? DB::parse<T>(it->second) : default_value;
+    }
+
+    template <typename T>
+    T getParsed(const std::string & key)
+    {
+        return DB::parse<T>(get(key));
+    }
+
+    /// Sets the encoding used for posting the form.
+    /// Encoding must be either "application/x-www-form-urlencoded" (which is the default) or "multipart/form-data".
+    void setEncoding(const std::string & encoding);
+
+    /// Returns the encoding used for posting the form.
+    const std::string & getEncoding() const { return encoding; }
+
+    /// Adds an part/attachment (file upload) to the form.
+    /// The form takes ownership of the PartSource and deletes it when it is no longer needed.
+    /// The part will only be sent if the encoding set for the form is "multipart/form-data"
+    void addPart(const std::string & name, Poco::Net::PartSource * pSource);
+
+    /// Reads the form data from the given HTTP request.
+    /// Uploaded files are passed to the given PartHandler.
+    void load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler);
+
+    /// Reads the form data from the given HTTP request.
+    /// Uploaded files are silently discarded.
+    void load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody);
+
+    /// Reads the form data from the given HTTP request.
+    /// The request must be a GET request and the form data must be in the query string (URL encoded).
+    /// For POST requests, you must use one of the overloads taking an additional input stream for the request body.
+    void load(const Poco::Net::HTTPRequest & request);
+
+    /// Reads the form data from the given input stream.
+    /// The form data read from the stream must be in the encoding specified for the form.
+    /// Note that read() does not clear the form before reading the new values.
+    void read(ReadBuffer & in, PartHandler & handler);
+
+    /// Reads the URL-encoded form data from the given input stream.
+    /// Note that read() does not clear the form before reading the new values.
+    void read(ReadBuffer & in);
+
+    /// Reads the form data from the given HTTP query string.
+    /// Note that read() does not clear the form before reading the new values.
+    void read(const std::string & queryString);
+
+    /// Returns the MIME boundary used for writing multipart form data.
+    const std::string & getBoundary() const { return boundary; }
+
+    /// Returns the maximum number of header fields allowed.
+    /// See setFieldLimit() for more information.
+    int getFieldLimit() const { return field_limit; }
+
+    /// Sets the maximum number of header fields allowed. This limit is used to defend certain kinds of denial-of-service attacks.
+    /// Specify 0 for unlimited (not recommended). The default limit is 100.
+    void setFieldLimit(int limit);
+
+    /// Sets the maximum size for form field values stored as strings.
+    void setValueLengthLimit(int limit);
+
+    /// Returns the maximum size for form field values stored as strings.
+    int getValueLengthLimit() const { return value_length_limit; }
+
+    static const std::string ENCODING_URL; /// "application/x-www-form-urlencoded"
+    static const std::string ENCODING_MULTIPART; /// "multipart/form-data"
+    static const int UNKNOWN_CONTENT_LENGTH;
+
+protected:
+    void readQuery(ReadBuffer & in);
+    void readMultipart(ReadBuffer & in, PartHandler & handler);
+
+private:
+    /// This buffer provides data line by line to check for boundary line in a convenient way.
+    class MultipartReadBuffer;
+
+    enum Limits
+    {
+        DFL_FIELD_LIMIT = 100,
+        MAX_NAME_LENGTH = 1024,
+        DFL_MAX_VALUE_LENGTH = 256 * 1024
+    };
+
+    struct Part
+    {
+        std::string name;
+        std::unique_ptr<Poco::Net::PartSource> source;
+    };
+
+    using PartVec = std::vector<Part>;
+
+    size_t field_limit;
+    size_t value_length_limit;
+    std::string encoding;
+    std::string boundary;
+    PartVec parts;
+};
+
+class HTMLForm::PartHandler
+{
+public:
+    virtual ~PartHandler() = default;
+    virtual void handlePart(const Poco::Net::MessageHeader &, ReadBuffer &) = 0;
+};
+
+class HTMLForm::MultipartReadBuffer : public ReadBuffer
+{
+public:
+    MultipartReadBuffer(ReadBuffer & in, const std::string & boundary);
+
+    /// Returns false if last boundary found.
+    bool skipToNextBoundary();
+
+private:
+    PeekableReadBuffer in;
+    const std::string boundary;
+    bool boundary_hit = true;
+
+    std::string readLine(bool strict = true);
+
+    bool nextImpl() override;
+};
+
+}
diff --git a/src/Server/HTTP/HTTPRequest.h b/src/Server/HTTP/HTTPRequest.h
new file mode 100644
index 00000000000..40839cbcdd2
--- /dev/null
+++ b/src/Server/HTTP/HTTPRequest.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <Poco/Net/HTTPRequest.h>
+
+namespace DB
+{
+
+using HTTPRequest = Poco::Net::HTTPRequest;
+
+}
diff --git a/src/Server/HTTP/HTTPRequestHandler.h b/src/Server/HTTP/HTTPRequestHandler.h
new file mode 100644
index 00000000000..19340866bb7
--- /dev/null
+++ b/src/Server/HTTP/HTTPRequestHandler.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <Server/HTTP/HTTPServerRequest.h>
+#include <Server/HTTP/HTTPServerResponse.h>
+
+#include <boost/noncopyable.hpp>
+
+namespace DB
+{
+
+class HTTPRequestHandler : private boost::noncopyable
+{
+public:
+    virtual ~HTTPRequestHandler() = default;
+
+    virtual void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) = 0;
+};
+
+}
diff --git a/src/Server/HTTP/HTTPRequestHandlerFactory.h b/src/Server/HTTP/HTTPRequestHandlerFactory.h
new file mode 100644
index 00000000000..3d50bf0a2ed
--- /dev/null
+++ b/src/Server/HTTP/HTTPRequestHandlerFactory.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <Server/HTTP/HTTPRequestHandler.h>
+
+#include <boost/noncopyable.hpp>
+
+namespace DB
+{
+
+class HTTPRequestHandlerFactory : private boost::noncopyable
+{
+public:
+    virtual ~HTTPRequestHandlerFactory() = default;
+
+    virtual std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) = 0;
+};
+
+using HTTPRequestHandlerFactoryPtr = std::shared_ptr<HTTPRequestHandlerFactory>;
+
+}
diff --git a/src/Server/HTTP/HTTPResponse.h b/src/Server/HTTP/HTTPResponse.h
new file mode 100644
index 00000000000..c73bcec6c39
--- /dev/null
+++ b/src/Server/HTTP/HTTPResponse.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <Poco/Net/HTTPResponse.h>
+
+namespace DB
+{
+
+using HTTPResponse = Poco::Net::HTTPResponse;
+
+}
diff --git a/src/Server/HTTP/HTTPServer.cpp b/src/Server/HTTP/HTTPServer.cpp
new file mode 100644
index 00000000000..3e050080bdd
--- /dev/null
+++ b/src/Server/HTTP/HTTPServer.cpp
@@ -0,0 +1,48 @@
+#include <Server/HTTP/HTTPServer.h>
+
+#include <Server/HTTP/HTTPServerConnectionFactory.h>
+
+
+namespace DB
+{
+HTTPServer::HTTPServer(
+    const Context & context,
+    HTTPRequestHandlerFactoryPtr factory_,
+    UInt16 portNumber,
+    Poco::Net::HTTPServerParams::Ptr params)
+    : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), portNumber, params), factory(factory_)
+{
+}
+
+HTTPServer::HTTPServer(
+    const Context & context,
+    HTTPRequestHandlerFactoryPtr factory_,
+    const Poco::Net::ServerSocket & socket,
+    Poco::Net::HTTPServerParams::Ptr params)
+    : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), socket, params), factory(factory_)
+{
+}
+
+HTTPServer::HTTPServer(
+    const Context & context,
+    HTTPRequestHandlerFactoryPtr factory_,
+    Poco::ThreadPool & threadPool,
+    const Poco::Net::ServerSocket & socket,
+    Poco::Net::HTTPServerParams::Ptr params)
+    : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), threadPool, socket, params), factory(factory_)
+{
+}
+
+HTTPServer::~HTTPServer()
+{
+    /// We should call stop and join thread here instead of destructor of parent TCPHandler,
+    /// because there's possible race on 'vptr' between this virtual destructor and 'run' method.
+    stop();
+}
+
+void HTTPServer::stopAll(bool /* abortCurrent */)
+{
+    stop();
+}
+
+}
diff --git a/src/Server/HTTP/HTTPServer.h b/src/Server/HTTP/HTTPServer.h
new file mode 100644
index 00000000000..1ce62c65ca2
--- /dev/null
+++ b/src/Server/HTTP/HTTPServer.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <Server/HTTP/HTTPRequestHandlerFactory.h>
+
+#include <Poco/Net/HTTPServerParams.h>
+#include <Poco/Net/TCPServer.h>
+
+#include <common/types.h>
+
+
+namespace DB
+{
+
+class Context;
+
+class HTTPServer : public Poco::Net::TCPServer
+{
+public:
+    explicit HTTPServer(
+        const Context & context,
+        HTTPRequestHandlerFactoryPtr factory,
+        UInt16 portNumber = 80,
+        Poco::Net::HTTPServerParams::Ptr params = new Poco::Net::HTTPServerParams);
+
+    HTTPServer(
+        const Context & context,
+        HTTPRequestHandlerFactoryPtr factory,
+        const Poco::Net::ServerSocket & socket,
+        Poco::Net::HTTPServerParams::Ptr params);
+
+    HTTPServer(
+        const Context & context,
+        HTTPRequestHandlerFactoryPtr factory,
+        Poco::ThreadPool & threadPool,
+        const Poco::Net::ServerSocket & socket,
+        Poco::Net::HTTPServerParams::Ptr params);
+
+    ~HTTPServer() override;
+
+    void stopAll(bool abortCurrent = false);
+
+private:
+    HTTPRequestHandlerFactoryPtr factory;
+};
+
+}
diff --git a/src/Server/HTTP/HTTPServerConnection.cpp b/src/Server/HTTP/HTTPServerConnection.cpp
new file mode 100644
index 00000000000..e2ee4c8882b
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerConnection.cpp
@@ -0,0 +1,128 @@
+#include <Server/HTTP/HTTPServerConnection.h>
+
+#include <Poco/Net/NetException.h>
+
+namespace DB
+{
+
+HTTPServerConnection::HTTPServerConnection(
+    const Context & context_,
+    const Poco::Net::StreamSocket & socket,
+    Poco::Net::HTTPServerParams::Ptr params_,
+    HTTPRequestHandlerFactoryPtr factory_)
+    : TCPServerConnection(socket), context(context_), params(params_), factory(factory_), stopped(false)
+{
+    poco_check_ptr(factory);
+}
+
+void HTTPServerConnection::run()
+{
+    std::string server = params->getSoftwareVersion();
+    Poco::Net::HTTPServerSession session(socket(), params);
+
+    while (!stopped && session.hasMoreRequests())
+    {
+        try
+        {
+            std::unique_lock<std::mutex> lock(mutex);
+            if (!stopped)
+            {
+                HTTPServerResponse response(session);
+                HTTPServerRequest request(context, response, session);
+
+                Poco::Timestamp now;
+                response.setDate(now);
+                response.setVersion(request.getVersion());
+                response.setKeepAlive(params->getKeepAlive() && request.getKeepAlive() && session.canKeepAlive());
+                if (!server.empty())
+                    response.set("Server", server);
+                try
+                {
+                    std::unique_ptr<HTTPRequestHandler> handler(factory->createRequestHandler(request));
+
+                    if (handler)
+                    {
+                        if (request.getExpectContinue() && response.getStatus() == Poco::Net::HTTPResponse::HTTP_OK)
+                            response.sendContinue();
+
+                        handler->handleRequest(request, response);
+                        session.setKeepAlive(params->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive());
+                    }
+                    else
+                        sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_NOT_IMPLEMENTED);
+                }
+                catch (Poco::Exception &)
+                {
+                    if (!response.sent())
+                    {
+                        try
+                        {
+                            sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
+                        }
+                        catch (...)
+                        {
+                        }
+                    }
+                    throw;
+                }
+            }
+        }
+        catch (Poco::Net::NoMessageException &)
+        {
+            break;
+        }
+        catch (Poco::Net::MessageException &)
+        {
+            sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_BAD_REQUEST);
+        }
+        catch (Poco::Exception &)
+        {
+            if (session.networkException())
+            {
+                session.networkException()->rethrow();
+            }
+            else
+                throw;
+        }
+    }
+}
+
+// static
+void HTTPServerConnection::sendErrorResponse(Poco::Net::HTTPServerSession & session, Poco::Net::HTTPResponse::HTTPStatus status)
+{
+    HTTPServerResponse response(session);
+    response.setVersion(Poco::Net::HTTPMessage::HTTP_1_1);
+    response.setStatusAndReason(status);
+    response.setKeepAlive(false);
+    response.send();
+    session.setKeepAlive(false);
+}
+
+void HTTPServerConnection::onServerStopped(const bool & abortCurrent)
+{
+    stopped = true;
+    if (abortCurrent)
+    {
+        try
+        {
+            socket().shutdown();
+        }
+        catch (...)
+        {
+        }
+    }
+    else
+    {
+        std::unique_lock<std::mutex> lock(mutex);
+
+        try
+        {
+            socket().shutdown();
+        }
+        catch (...)
+        {
+        }
+    }
+}
+
+}
diff --git a/src/Server/HTTP/HTTPServerConnection.h b/src/Server/HTTP/HTTPServerConnection.h
new file mode 100644
index 00000000000..589c33025bf
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerConnection.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <Interpreters/Context.h>
+#include <Server/HTTP/HTTPRequestHandlerFactory.h>
+
+#include <Poco/Net/HTTPServerParams.h>
+#include <Poco/Net/HTTPServerSession.h>
+#include <Poco/Net/TCPServerConnection.h>
+
+namespace DB
+{
+
+class HTTPServerConnection : public Poco::Net::TCPServerConnection
+{
+public:
+    HTTPServerConnection(
+        const Context & context,
+        const Poco::Net::StreamSocket & socket,
+        Poco::Net::HTTPServerParams::Ptr params,
+        HTTPRequestHandlerFactoryPtr factory);
+
+    void run() override;
+
+protected:
+    static void sendErrorResponse(Poco::Net::HTTPServerSession & session, Poco::Net::HTTPResponse::HTTPStatus status);
+    void onServerStopped(const bool & abortCurrent);
+
+private:
+    Context context;
+    Poco::Net::HTTPServerParams::Ptr params;
+    HTTPRequestHandlerFactoryPtr factory;
+    bool stopped;
+    std::mutex mutex;  // guards the |factory| with assumption that creating handlers is not thread-safe.
+};
+
+}
diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.cpp b/src/Server/HTTP/HTTPServerConnectionFactory.cpp
new file mode 100644
index 00000000000..876ccb9096b
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerConnectionFactory.cpp
@@ -0,0 +1,19 @@
+#include <Server/HTTP/HTTPServerConnectionFactory.h>
+
+#include <Server/HTTP/HTTPServerConnection.h>
+
+namespace DB
+{
+HTTPServerConnectionFactory::HTTPServerConnectionFactory(
+    const Context & context_, Poco::Net::HTTPServerParams::Ptr params_, HTTPRequestHandlerFactoryPtr factory_)
+    : context(context_), params(params_), factory(factory_)
+{
+    poco_check_ptr(factory);
+}
+
+Poco::Net::TCPServerConnection * HTTPServerConnectionFactory::createConnection(const Poco::Net::StreamSocket & socket)
+{
+    return new HTTPServerConnection(context, socket, params, factory);
+}
+
+}
diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.h b/src/Server/HTTP/HTTPServerConnectionFactory.h
new file mode 100644
index 00000000000..4f8ca43cbfb
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerConnectionFactory.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <Interpreters/Context.h>
+#include <Server/HTTP/HTTPRequestHandlerFactory.h>
+
+#include <Poco/Net/HTTPServerParams.h>
+#include <Poco/Net/TCPServerConnectionFactory.h>
+
+namespace DB
+{
+
+class HTTPServerConnectionFactory : public Poco::Net::TCPServerConnectionFactory
+{
+public:
+    HTTPServerConnectionFactory(const Context & context, Poco::Net::HTTPServerParams::Ptr params, HTTPRequestHandlerFactoryPtr factory);
+
+    Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override;
+
+private:
+    Context context;
+    Poco::Net::HTTPServerParams::Ptr params;
+    HTTPRequestHandlerFactoryPtr factory;
+};
+
+}
diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp
new file mode 100644
index 00000000000..bdba6a51d91
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerRequest.cpp
@@ -0,0 +1,123 @@
+#include <Server/HTTP/HTTPServerRequest.h>
+
+#include <Interpreters/Context.h>
+#include <IO/EmptyReadBuffer.h>
+#include <IO/HTTPChunkedReadBuffer.h>
+#include <IO/LimitReadBuffer.h>
+#include <IO/ReadBufferFromPocoSocket.h>
+#include <IO/ReadHelpers.h>
+#include <Server/HTTP/HTTPServerResponse.h>
+#include <Server/HTTP/ReadHeaders.h>
+
+#include <Poco/Net/HTTPHeaderStream.h>
+#include <Poco/Net/HTTPStream.h>
+#include <Poco/Net/NetException.h>
+
+namespace DB
+{
+
+HTTPServerRequest::HTTPServerRequest(const Context & context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session)
+{
+    response.attachRequest(this);
+
+    /// Now that we know socket is still connected, obtain addresses
+    client_address = session.clientAddress();
+    server_address = session.serverAddress();
+
+    auto receive_timeout = context.getSettingsRef().http_receive_timeout;
+    auto send_timeout = context.getSettingsRef().http_send_timeout;
+    auto max_query_size = context.getSettingsRef().max_query_size;
+
+    session.socket().setReceiveTimeout(receive_timeout);
+    session.socket().setSendTimeout(send_timeout);
+
+    auto in = std::make_unique<ReadBufferFromPocoSocket>(session.socket());
+    socket = session.socket().impl();
+
+    readRequest(*in);  /// Try parse according to RFC7230
+
+    if (getChunkedTransferEncoding())
+        stream = std::make_unique<HTTPChunkedReadBuffer>(std::move(in), max_query_size);
+    else if (hasContentLength())
+        stream = std::make_unique<LimitReadBuffer>(std::move(in), getContentLength(), false);
+    else if (getMethod() != HTTPRequest::HTTP_GET && getMethod() != HTTPRequest::HTTP_HEAD && getMethod() != HTTPRequest::HTTP_DELETE)
+        stream = std::move(in);
+    else
+        /// We have to distinguish empty buffer and nullptr.
+        stream = std::make_unique<EmptyReadBuffer>();
+}
+
+bool HTTPServerRequest::checkPeerConnected() const
+{
+    try
+    {
+        char b;
+        if (!socket->receiveBytes(&b, 1, MSG_DONTWAIT | MSG_PEEK))
+            return false;
+    }
+    catch (Poco::TimeoutException &)
+    {
+    }
+    catch (...)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+void HTTPServerRequest::readRequest(ReadBuffer & in)
+{
+    char ch;
+    std::string method;
+    std::string uri;
+    std::string version;
+
+    method.reserve(16);
+    uri.reserve(64);
+    version.reserve(16);
+
+    if (in.eof())
+        throw Poco::Net::NoMessageException();
+
+    skipWhitespaceIfAny(in);
+
+    if (in.eof())
+        throw Poco::Net::MessageException("No HTTP request header");
+
+    while (in.read(ch) && !Poco::Ascii::isSpace(ch) && method.size() <= MAX_METHOD_LENGTH)
+        method += ch;
+
+    if (method.size() > MAX_METHOD_LENGTH)
+        throw Poco::Net::MessageException("HTTP request method invalid or too long");
+
+    skipWhitespaceIfAny(in);
+
+    while (in.read(ch) && !Poco::Ascii::isSpace(ch) && uri.size() <= MAX_URI_LENGTH)
+        uri += ch;
+
+    if (uri.size() > MAX_URI_LENGTH)
+        throw Poco::Net::MessageException("HTTP request URI invalid or too long");
+
+    skipWhitespaceIfAny(in);
+
+    while (in.read(ch) && !Poco::Ascii::isSpace(ch) && version.size() <= MAX_VERSION_LENGTH)
+        version += ch;
+
+    if (version.size() > MAX_VERSION_LENGTH)
+        throw Poco::Net::MessageException("Invalid HTTP version string");
+
+    // since HTTP always use Windows-style EOL '\r\n' we always can safely skip to '\n'
+
+    skipToNextLineOrEOF(in);
+
+    readHeaders(*this, in);
+
+    skipToNextLineOrEOF(in);
+
+    setMethod(method);
+    setURI(uri);
+    setVersion(version);
+}
+
+}
diff --git a/src/Server/HTTP/HTTPServerRequest.h b/src/Server/HTTP/HTTPServerRequest.h
new file mode 100644
index 00000000000..7fd54850212
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerRequest.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <IO/ReadBuffer.h>
+#include <Server/HTTP/HTTPRequest.h>
+
+#include <Poco/Net/HTTPServerSession.h>
+
+namespace DB
+{
+
+class Context;
+class HTTPServerResponse;
+class ReadBufferFromPocoSocket;
+
+class HTTPServerRequest : public HTTPRequest
+{
+public:
+    HTTPServerRequest(const Context & context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session);
+
+    /// FIXME: it's a little bit inconvenient interface. The rationale is that all other ReadBuffer's wrap each other
+    ///        via unique_ptr - but we can't inherit HTTPServerRequest from ReadBuffer and pass it around,
+    ///        since we also need it in other places.
+
+    /// Returns the input stream for reading the request body.
+    ReadBuffer & getStream()
+    {
+        poco_check_ptr(stream);
+        return *stream;
+    }
+
+    bool checkPeerConnected() const;
+
+    /// Returns the client's address.
+    const Poco::Net::SocketAddress & clientAddress() const { return client_address; }
+
+    /// Returns the server's address.
+    const Poco::Net::SocketAddress & serverAddress() const { return server_address; }
+
+private:
+    /// Limits for basic sanity checks when reading a header
+    enum Limits
+    {
+        MAX_NAME_LENGTH = 256,
+        MAX_VALUE_LENGTH = 8192,
+        MAX_METHOD_LENGTH = 32,
+        MAX_URI_LENGTH = 16384,
+        MAX_VERSION_LENGTH = 8,
+        MAX_FIELDS_NUMBER = 100,
+    };
+
+    std::unique_ptr<ReadBuffer> stream;
+    Poco::Net::SocketImpl * socket;
+    Poco::Net::SocketAddress client_address;
+    Poco::Net::SocketAddress server_address;
+
+    void readRequest(ReadBuffer & in);
+};
+
+}
diff --git a/src/Server/HTTP/HTTPServerResponse.cpp b/src/Server/HTTP/HTTPServerResponse.cpp
new file mode 100644
index 00000000000..e3d52fffa80
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerResponse.cpp
@@ -0,0 +1,163 @@
+#include <Server/HTTP/HTTPServerResponse.h>
+
+#include <Server/HTTP/HTTPServerRequest.h>
+
+#include <Poco/CountingStream.h>
+#include <Poco/DateTimeFormat.h>
+#include <Poco/DateTimeFormatter.h>
+#include <Poco/File.h>
+#include <Poco/FileStream.h>
+#include <Poco/Net/HTTPChunkedStream.h>
+#include <Poco/Net/HTTPFixedLengthStream.h>
+#include <Poco/Net/HTTPHeaderStream.h>
+#include <Poco/Net/HTTPStream.h>
+#include <Poco/StreamCopier.h>
+
+namespace DB
+{
+
+HTTPServerResponse::HTTPServerResponse(Poco::Net::HTTPServerSession & session_) : session(session_)
+{
+}
+
+void HTTPServerResponse::sendContinue()
+{
+    Poco::Net::HTTPHeaderOutputStream hs(session);
+    hs << getVersion() << " 100 Continue\r\n\r\n";
+}
+
+std::shared_ptr<std::ostream> HTTPServerResponse::send()
+{
+    poco_assert(!stream);
+
+    if ((request && request->getMethod() == HTTPRequest::HTTP_HEAD) || getStatus() < 200 || getStatus() == HTTPResponse::HTTP_NO_CONTENT
+        || getStatus() == HTTPResponse::HTTP_NOT_MODIFIED)
+    {
+        Poco::CountingOutputStream cs;
+        write(cs);
+        stream = std::make_shared<Poco::Net::HTTPFixedLengthOutputStream>(session, cs.chars());
+        write(*stream);
+    }
+    else if (getChunkedTransferEncoding())
+    {
+        Poco::Net::HTTPHeaderOutputStream hs(session);
+        write(hs);
+        stream = std::make_shared<Poco::Net::HTTPChunkedOutputStream>(session);
+    }
+    else if (hasContentLength())
+    {
+        Poco::CountingOutputStream cs;
+        write(cs);
+        stream = std::make_shared<Poco::Net::HTTPFixedLengthOutputStream>(session, getContentLength64() + cs.chars());
+        write(*stream);
+    }
+    else
+    {
+        stream = std::make_shared<Poco::Net::HTTPOutputStream>(session);
+        setKeepAlive(false);
+        write(*stream);
+    }
+
+    return stream;
+}
+
+std::pair<std::shared_ptr<std::ostream>, std::shared_ptr<std::ostream>> HTTPServerResponse::beginSend()
+{
+    poco_assert(!stream);
+    poco_assert(!header_stream);
+
+    /// NOTE: Code is not exception safe.
+
+    if ((request && request->getMethod() == HTTPRequest::HTTP_HEAD) || getStatus() < 200 || getStatus() == HTTPResponse::HTTP_NO_CONTENT
+        || getStatus() == HTTPResponse::HTTP_NOT_MODIFIED)
+    {
+        throw Poco::Exception("HTTPServerResponse::beginSend is invalid for HEAD request");
+    }
+    else if (getChunkedTransferEncoding())
+    {
+        header_stream = std::make_shared<Poco::Net::HTTPHeaderOutputStream>(session);
+        beginWrite(*header_stream);
+        stream = std::make_shared<Poco::Net::HTTPChunkedOutputStream>(session);
+    }
+    else if (hasContentLength())
+    {
+        throw Poco::Exception("HTTPServerResponse::beginSend is invalid for response with Content-Length header");
+    }
+    else
+    {
+        stream = std::make_shared<Poco::Net::HTTPOutputStream>(session);
+        header_stream = stream;
+        setKeepAlive(false);
+        beginWrite(*stream);
+    }
+
+    return std::make_pair(header_stream, stream);
+}
+
+void HTTPServerResponse::sendFile(const std::string & path, const std::string & mediaType)
+{
+    poco_assert(!stream);
+
+    Poco::File f(path);
+    Poco::Timestamp date_time = f.getLastModified();
+    Poco::File::FileSize length = f.getSize();
+    set("Last-Modified", Poco::DateTimeFormatter::format(date_time, Poco::DateTimeFormat::HTTP_FORMAT));
+    setContentLength64(length);
+    setContentType(mediaType);
+    setChunkedTransferEncoding(false);
+
+    Poco::FileInputStream istr(path);
+    if (istr.good())
+    {
+        stream = std::make_shared<Poco::Net::HTTPHeaderOutputStream>(session);
+        write(*stream);
+        if (request && request->getMethod() != HTTPRequest::HTTP_HEAD)
+        {
+            Poco::StreamCopier::copyStream(istr, *stream);
+        }
+    }
+    else
+        throw Poco::OpenFileException(path);
+}
+
+void HTTPServerResponse::sendBuffer(const void * buffer, std::size_t length)
+{
+    poco_assert(!stream);
+
+    setContentLength(static_cast<int>(length));
+    setChunkedTransferEncoding(false);
+
+    stream = std::make_shared<Poco::Net::HTTPHeaderOutputStream>(session);
+    write(*stream);
+    if (request && request->getMethod() != HTTPRequest::HTTP_HEAD)
+    {
+        stream->write(static_cast<const char *>(buffer), static_cast<std::streamsize>(length));
+    }
+}
+
+void HTTPServerResponse::redirect(const std::string & uri, HTTPStatus status)
+{
+    poco_assert(!stream);
+
+    setContentLength(0);
+    setChunkedTransferEncoding(false);
+
+    setStatusAndReason(status);
+    set("Location", uri);
+
+    stream = std::make_shared<Poco::Net::HTTPHeaderOutputStream>(session);
+    write(*stream);
+}
+
+void HTTPServerResponse::requireAuthentication(const std::string & realm)
+{
+    poco_assert(!stream);
+
+    setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
+    std::string auth("Basic realm=\"");
+    auth.append(realm);
+    auth.append("\"");
+    set("WWW-Authenticate", auth);
+}
+
+}
diff --git a/src/Server/HTTP/HTTPServerResponse.h b/src/Server/HTTP/HTTPServerResponse.h
new file mode 100644
index 00000000000..82221ce3a83
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerResponse.h
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <Server/HTTP/HTTPResponse.h>
+
+#include <Poco/Net/HTTPServerSession.h>
+#include <Poco/Net/HTTPResponse.h>
+
+#include <iostream>
+#include <memory>
+
+namespace DB
+{
+
+class HTTPServerRequest;
+
+class HTTPServerResponse : public HTTPResponse
+{
+public:
+    explicit HTTPServerResponse(Poco::Net::HTTPServerSession & session);
+
+    void sendContinue(); /// Sends a 100 Continue response to the client.
+
+    /// Sends the response header to the client and
+    /// returns an output stream for sending the
+    /// response body.
+    ///
+    /// Must not be called after beginSend(), sendFile(), sendBuffer()
+    /// or redirect() has been called.
+    std::shared_ptr<std::ostream> send(); /// TODO: use some WriteBuffer implementation here.
+
+    /// Sends the response headers to the client
+    /// but do not finish headers with \r\n,
+    /// allowing to continue sending additional header fields.
+    ///
+    /// Must not be called after send(), sendFile(), sendBuffer()
+    /// or redirect() has been called.
+    std::pair<std::shared_ptr<std::ostream>, std::shared_ptr<std::ostream>> beginSend(); /// TODO: use some WriteBuffer implementation here.
+
+    /// Sends the response header to the client, followed
+    /// by the content of the given file.
+    ///
+    /// Must not be called after send(), sendBuffer()
+    /// or redirect() has been called.
+    ///
+    /// Throws a FileNotFoundException if the file
+    /// cannot be found, or an OpenFileException if
+    /// the file cannot be opened.
+    void sendFile(const std::string & path, const std::string & mediaType);
+
+    /// Sends the response header to the client, followed
+    /// by the contents of the given buffer.
+    ///
+    /// The Content-Length header of the response is set
+    /// to length and chunked transfer encoding is disabled.
+    ///
+    /// If both the HTTP message header and body (from the
+    /// given buffer) fit into one single network packet, the
+    /// complete response can be sent in one network packet.
+    ///
+    /// Must not be called after send(), sendFile()
+    /// or redirect() has been called.
+    void sendBuffer(const void * pBuffer, std::size_t length); /// FIXME: do we need this one?
+
+    /// Sets the status code, which must be one of
+    /// HTTP_MOVED_PERMANENTLY (301), HTTP_FOUND (302),
+    /// or HTTP_SEE_OTHER (303),
+    /// and sets the "Location" header field
+    /// to the given URI, which according to
+    /// the HTTP specification, must be absolute.
+    ///
+    /// Must not be called after send() has been called.
+    void redirect(const std::string & uri, Poco::Net::HTTPResponse::HTTPStatus status = Poco::Net::HTTPResponse::HTTP_FOUND);
+
+    void requireAuthentication(const std::string & realm);
+    /// Sets the status code to 401 (Unauthorized)
+    /// and sets the "WWW-Authenticate" header field
+    /// according to the given realm.
+
+    /// Returns true if the response (header) has been sent.
+    bool sent() const { return !!stream; }
+
+    void attachRequest(HTTPServerRequest * request_) { request = request_; }
+
+private:
+    Poco::Net::HTTPServerSession & session;
+    HTTPServerRequest * request;
+    std::shared_ptr<std::ostream> stream;
+    std::shared_ptr<std::ostream> header_stream;
+};
+
+}
diff --git a/src/Server/HTTP/ReadHeaders.cpp b/src/Server/HTTP/ReadHeaders.cpp
new file mode 100644
index 00000000000..77ec48c11b1
--- /dev/null
+++ b/src/Server/HTTP/ReadHeaders.cpp
@@ -0,0 +1,88 @@
+#include <Server/HTTP/ReadHeaders.h>
+
+#include <IO/ReadBuffer.h>
+#include <IO/ReadHelpers.h>
+
+#include <Poco/Net/NetException.h>
+
+namespace DB
+{
+
+void readHeaders(
+    Poco::Net::MessageHeader & headers, ReadBuffer & in, size_t max_fields_number, size_t max_name_length, size_t max_value_length)
+{
+    char ch = 0;  // silence uninitialized warning from gcc-*
+    std::string name;
+    std::string value;
+
+    name.reserve(32);
+    value.reserve(64);
+
+    size_t fields = 0;
+
+    while (true)
+    {
+        if (fields > max_fields_number)
+            throw Poco::Net::MessageException("Too many header fields");
+
+        name.clear();
+        value.clear();
+
+        /// Field name
+        while (in.peek(ch) && ch != ':' && !Poco::Ascii::isSpace(ch) && name.size() <= max_name_length)
+        {
+            name += ch;
+            in.ignore();
+        }
+
+        if (in.eof())
+            throw Poco::Net::MessageException("Field is invalid");
+
+        if (name.empty())
+        {
+            if (ch == '\r')
+                /// Start of the empty-line delimiter
+                break;
+            if (ch == ':')
+                throw Poco::Net::MessageException("Field name is empty");
+        }
+        else
+        {
+            if (name.size() > max_name_length)
+                throw Poco::Net::MessageException("Field name is too long");
+            if (ch != ':')
+                throw Poco::Net::MessageException("Field name is invalid or no colon found");
+        }
+
+        in.ignore();
+
+        skipWhitespaceIfAny(in, true);
+
+        if (in.eof())
+            throw Poco::Net::MessageException("Field is invalid");
+
+        /// Field value - folded values not supported.
+        while (in.read(ch) && ch != '\r' && ch != '\n' && value.size() <= max_value_length)
+            value += ch;
+
+        if (in.eof())
+            throw Poco::Net::MessageException("Field is invalid");
+
+        if (value.empty())
+            throw Poco::Net::MessageException("Field value is empty");
+
+        if (ch == '\n')
+            throw Poco::Net::MessageException("No CRLF found");
+
+        if (value.size() > max_value_length)
+            throw Poco::Net::MessageException("Field value is too long");
+
+        skipToNextLineOrEOF(in);
+
+        Poco::trimRightInPlace(value);
+        headers.add(name, headers.decodeWord(value));
+        ++fields;
+    }
+}
+
+}
diff --git a/src/Server/HTTP/ReadHeaders.h b/src/Server/HTTP/ReadHeaders.h
new file mode 100644
index 00000000000..e94cddcf489
--- /dev/null
+++ b/src/Server/HTTP/ReadHeaders.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <Poco/Net/MessageHeader.h>
+
+namespace DB
+{
+
+class ReadBuffer;
+
+void readHeaders(
+    Poco::Net::MessageHeader & headers,
+    ReadBuffer & in,
+    size_t max_fields_number = 100,
+    size_t max_name_length = 256,
+    size_t max_value_length = 8192);
+
+}
diff --git a/src/IO/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
similarity index 78%
rename from src/IO/WriteBufferFromHTTPServerResponse.cpp
rename to src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
index 0f30f1352e3..81f8cc30468 100644
--- a/src/IO/WriteBufferFromHTTPServerResponse.cpp
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
@@ -1,17 +1,19 @@
-#include <Poco/Version.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <IO/WriteBufferFromHTTPServerResponse.h>
-#include <IO/WriteBufferFromString.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
+
 #include <IO/HTTPCommon.h>
 #include <IO/Progress.h>
+#include <IO/WriteBufferFromString.h>
 #include <Common/Exception.h>
 #include <Common/NetException.h>
 #include <Common/Stopwatch.h>
+#include <Common/MemoryTracker.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config.h>
 #endif
 
+#include <Poco/Version.h>
+
 
 namespace DB
 {
@@ -32,16 +34,13 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders()
 
         setResponseDefaultHeaders(response, keep_alive_timeout);
 
-#if defined(POCO_CLICKHOUSE_PATCH)
-        if (request.getMethod() != Poco::Net::HTTPRequest::HTTP_HEAD)
+        if (!is_http_method_head)
             std::tie(response_header_ostr, response_body_ostr) = response.beginSend();
-#endif
     }
 }
 
 void WriteBufferFromHTTPServerResponse::writeHeaderSummary()
 {
-#if defined(POCO_CLICKHOUSE_PATCH)
     if (headers_finished_sending)
         return;
 
@@ -50,12 +49,10 @@ void WriteBufferFromHTTPServerResponse::writeHeaderSummary()
 
     if (response_header_ostr)
         *response_header_ostr << "X-ClickHouse-Summary: " << progress_string_writer.str() << "\r\n" << std::flush;
-#endif
 }
 
 void WriteBufferFromHTTPServerResponse::writeHeaderProgress()
 {
-#if defined(POCO_CLICKHOUSE_PATCH)
     if (headers_finished_sending)
         return;
 
@@ -64,7 +61,6 @@ void WriteBufferFromHTTPServerResponse::writeHeaderProgress()
 
     if (response_header_ostr)
         *response_header_ostr << "X-ClickHouse-Progress: " << progress_string_writer.str() << "\r\n" << std::flush;
-#endif
 }
 
 void WriteBufferFromHTTPServerResponse::finishSendHeaders()
@@ -74,23 +70,16 @@ void WriteBufferFromHTTPServerResponse::finishSendHeaders()
         writeHeaderSummary();
         headers_finished_sending = true;
 
-        if (request.getMethod() != Poco::Net::HTTPRequest::HTTP_HEAD)
+        if (!is_http_method_head)
         {
-#if defined(POCO_CLICKHOUSE_PATCH)
             /// Send end of headers delimiter.
             if (response_header_ostr)
                 *response_header_ostr << "\r\n" << std::flush;
-#else
-            /// Newline autosent by response.send()
-            /// if nothing to send in body:
-            if (!response_body_ostr)
-                response_body_ostr = &(response.send());
-#endif
         }
         else
         {
             if (!response_body_ostr)
-                response_body_ostr = &(response.send());
+                response_body_ostr = response.send();
         }
     }
 }
@@ -103,23 +92,15 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
 
         startSendHeaders();
 
-        if (!out && request.getMethod() != Poco::Net::HTTPRequest::HTTP_HEAD)
+        if (!out && !is_http_method_head)
         {
             if (compress)
             {
                 auto content_encoding_name = toContentEncodingName(compression_method);
 
-#if defined(POCO_CLICKHOUSE_PATCH)
                 *response_header_ostr << "Content-Encoding: " << content_encoding_name << "\r\n";
-#else
-                response.set("Content-Encoding", content_encoding_name);
-#endif
             }
 
-#if !defined(POCO_CLICKHOUSE_PATCH)
-            response_body_ostr = &(response.send());
-#endif
-
             /// We reuse our buffer in "out" to avoid extra allocations and copies.
 
             if (compress)
@@ -149,14 +130,14 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
 
 
 WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse(
-    Poco::Net::HTTPServerRequest & request_,
-    Poco::Net::HTTPServerResponse & response_,
+    HTTPServerResponse & response_,
+    bool is_http_method_head_,
     unsigned keep_alive_timeout_,
     bool compress_,
     CompressionMethod compression_method_)
     : BufferWithOwnMemory<WriteBuffer>(DBMS_DEFAULT_BUFFER_SIZE)
-    , request(request_)
     , response(response_)
+    , is_http_method_head(is_http_method_head_)
     , keep_alive_timeout(keep_alive_timeout_)
     , compress(compress_)
     , compression_method(compression_method_)
@@ -187,14 +168,20 @@ void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress)
 
 void WriteBufferFromHTTPServerResponse::finalize()
 {
-    if (offset())
+    try
     {
         next();
-
-        if (out)
-            out.reset();
+        out.reset();
     }
-    else
+    catch (...)
+    {
+        /// Avoid calling WriteBufferFromOStream::next() from dtor
+        /// (via WriteBufferFromHTTPServerResponse::next())
+        out.reset();
+        throw;
+    }
+
+    if (!offset())
     {
         /// If no remaining data, just send headers.
         std::lock_guard lock(mutex);
@@ -206,14 +193,9 @@ void WriteBufferFromHTTPServerResponse::finalize()
 
 WriteBufferFromHTTPServerResponse::~WriteBufferFromHTTPServerResponse()
 {
-    try
-    {
-        finalize();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    finalize();
 }
 
 }
diff --git a/src/IO/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
similarity index 86%
rename from src/IO/WriteBufferFromHTTPServerResponse.h
rename to src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
index 85a81c3dda7..b4ff454195f 100644
--- a/src/IO/WriteBufferFromHTTPServerResponse.h
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
@@ -1,31 +1,17 @@
 #pragma once
 
-#include <optional>
-#include <mutex>
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Version.h>
-#include <IO/CompressionMethod.h>
-#include <IO/WriteBuffer.h>
 #include <IO/BufferWithOwnMemory.h>
-#include <IO/WriteBufferFromOStream.h>
+#include <IO/CompressionMethod.h>
 #include <IO/HTTPCommon.h>
 #include <IO/Progress.h>
+#include <IO/WriteBuffer.h>
+#include <IO/WriteBufferFromOStream.h>
+#include <Server/HTTP/HTTPServerResponse.h>
 #include <Common/NetException.h>
 #include <Common/Stopwatch.h>
 
-#if !defined(ARCADIA_BUILD)
-#    include <Common/config.h>
-#endif
-
-
-namespace Poco
-{
-    namespace Net
-    {
-        class HTTPServerResponse;
-    }
-}
+#include <mutex>
+#include <optional>
 
 
 namespace DB
@@ -47,20 +33,17 @@ namespace DB
 class WriteBufferFromHTTPServerResponse final : public BufferWithOwnMemory<WriteBuffer>
 {
 private:
-    Poco::Net::HTTPServerRequest & request;
-    Poco::Net::HTTPServerResponse & response;
+    HTTPServerResponse & response;
 
+    bool is_http_method_head;
     bool add_cors_header = false;
     unsigned keep_alive_timeout = 0;
     bool compress = false;
     CompressionMethod compression_method;
     int compression_level = 1;
 
-    std::ostream * response_body_ostr = nullptr;
-
-#if defined(POCO_CLICKHOUSE_PATCH)
-    std::ostream * response_header_ostr = nullptr;
-#endif
+    std::shared_ptr<std::ostream> response_body_ostr;
+    std::shared_ptr<std::ostream> response_header_ostr;
 
     std::unique_ptr<WriteBuffer> out;
 
@@ -91,8 +74,8 @@ private:
 
 public:
     WriteBufferFromHTTPServerResponse(
-        Poco::Net::HTTPServerRequest & request_,
-        Poco::Net::HTTPServerResponse & response_,
+        HTTPServerResponse & response_,
+        bool is_http_method_head_,
         unsigned keep_alive_timeout_,
         bool compress_ = false,        /// If true - set Content-Encoding header and compress the result.
         CompressionMethod compression_method_ = CompressionMethod::None);
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 5006a817b5b..d200ee7421f 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -1,49 +1,47 @@
-#include "HTTPHandler.h"
+#include <Server/HTTPHandler.h>
 
-#include "HTTPHandlerFactory.h"
-#include "HTTPHandlerRequestFilter.h"
-
-#include <chrono>
-#include <iomanip>
-#include <Poco/File.h>
-#include <Poco/Net/HTTPBasicCredentials.h>
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerRequestImpl.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Net/HTTPRequestHandlerFactory.h>
-#include <Poco/Net/NetException.h>
-#include <ext/scope_guard.h>
-#include <Core/ExternalTable.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <Common/escapeForFileName.h>
-#include <common/getFQDNOrHostName.h>
-#include <Common/setThreadName.h>
-#include <Common/SettingsChanges.h>
-#include <Disks/StoragePolicy.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Compression/CompressedWriteBuffer.h>
+#include <Core/ExternalTable.h>
+#include <DataStreams/IBlockInputStream.h>
+#include <Disks/StoragePolicy.h>
+#include <IO/CascadeWriteBuffer.h>
+#include <IO/ConcatReadBuffer.h>
+#include <IO/MemoryReadWriteBuffer.h>
 #include <IO/ReadBufferFromIStream.h>
 #include <IO/ReadBufferFromString.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/WriteBufferFromHTTPServerResponse.h>
 #include <IO/WriteBufferFromFile.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/WriteBufferFromTemporaryFile.h>
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
-#include <IO/ConcatReadBuffer.h>
-#include <IO/CascadeWriteBuffer.h>
-#include <IO/MemoryReadWriteBuffer.h>
-#include <IO/WriteBufferFromTemporaryFile.h>
-#include <DataStreams/IBlockInputStream.h>
-#include <Interpreters/executeQuery.h>
-#include <Interpreters/QueryParameterVisitor.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/QueryParameterVisitor.h>
+#include <Interpreters/executeQuery.h>
+#include <Server/HTTPHandlerFactory.h>
+#include <Server/HTTPHandlerRequestFilter.h>
+#include <Server/IServer.h>
+#include <Common/SettingsChanges.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/escapeForFileName.h>
+#include <Common/setThreadName.h>
 #include <Common/typeid_cast.h>
-#include <Poco/Net/HTTPStream.h>
+#include <common/getFQDNOrHostName.h>
+#include <ext/scope_guard.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config.h>
 #endif
 
+#include <Poco/File.h>
+#include <Poco/Net/HTTPBasicCredentials.h>
+#include <Poco/Net/HTTPStream.h>
+#include <Poco/Net/NetException.h>
+
+#include <chrono>
+#include <iomanip>
+
 
 namespace DB
 {
@@ -219,8 +217,11 @@ void HTTPHandler::pushDelayedResults(Output & used_output)
         }
     }
 
-    ConcatReadBuffer concat_read_buffer(read_buffers_raw_ptr);
-    copyData(concat_read_buffer, *used_output.out_maybe_compressed);
+    if (!read_buffers_raw_ptr.empty())
+    {
+        ConcatReadBuffer concat_read_buffer(read_buffers_raw_ptr);
+        copyData(concat_read_buffer, *used_output.out_maybe_compressed);
+    }
 }
 
 
@@ -234,16 +235,14 @@ HTTPHandler::HTTPHandler(IServer & server_, const std::string & name)
 
 void HTTPHandler::processQuery(
     Context & context,
-    Poco::Net::HTTPServerRequest & request,
+    HTTPServerRequest & request,
     HTMLForm & params,
-    Poco::Net::HTTPServerResponse & response,
+    HTTPServerResponse & response,
     Output & used_output,
     std::optional<CurrentThread::QueryScope> & query_scope)
 {
     LOG_TRACE(log, "Request URI: {}", request.getURI());
 
-    std::istream & istr = request.stream();
-
     /// The user and password can be passed by headers (similar to X-Auth-*),
     /// which is used by load balancers to pass authentication information.
     std::string user = request.get("X-ClickHouse-User", "");
@@ -288,9 +287,9 @@ void HTTPHandler::processQuery(
     client_info.interface = ClientInfo::Interface::HTTP;
 
     ClientInfo::HTTPMethod http_method = ClientInfo::HTTPMethod::UNKNOWN;
-    if (request.getMethod() == Poco::Net::HTTPServerRequest::HTTP_GET)
+    if (request.getMethod() == HTTPServerRequest::HTTP_GET)
         http_method = ClientInfo::HTTPMethod::GET;
-    else if (request.getMethod() == Poco::Net::HTTPServerRequest::HTTP_POST)
+    else if (request.getMethod() == HTTPServerRequest::HTTP_POST)
         http_method = ClientInfo::HTTPMethod::POST;
 
     client_info.http_method = http_method;
@@ -353,10 +352,8 @@ void HTTPHandler::processQuery(
     }
 #endif
 
-    // Set the query id supplied by the user, if any, and also update the
-    // OpenTelemetry fields.
-    context.setCurrentQueryId(params.get("query_id",
-        request.get("X-ClickHouse-Query-Id", "")));
+    // Set the query id supplied by the user, if any, and also update the OpenTelemetry fields.
+    context.setCurrentQueryId(params.get("query_id", request.get("X-ClickHouse-Query-Id", "")));
 
     client_info.initial_query_id = client_info.current_query_id;
 
@@ -402,7 +399,11 @@ void HTTPHandler::processQuery(
     unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10);
 
     used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(
-        request, response, keep_alive_timeout, client_supports_http_compression, http_response_compression_method);
+        response,
+        request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD,
+        keep_alive_timeout,
+        client_supports_http_compression,
+        http_response_compression_method);
 
     if (internal_compression)
         used_output.out_maybe_compressed = std::make_shared<CompressedWriteBuffer>(*used_output.out);
@@ -456,8 +457,8 @@ void HTTPHandler::processQuery(
 
     /// Request body can be compressed using algorithm specified in the Content-Encoding header.
     String http_request_compression_method_str = request.get("Content-Encoding", "");
-    std::unique_ptr<ReadBuffer> in_post = wrapReadBufferWithCompressionMethod(
-        std::make_unique<ReadBufferFromIStream>(istr), chooseCompressionMethod({}, http_request_compression_method_str));
+    auto in_post = wrapReadBufferWithCompressionMethod(
+        wrapReadBufferReference(request.getStream()), chooseCompressionMethod({}, http_request_compression_method_str));
 
     /// The data can also be compressed using incompatible internal algorithm. This is indicated by
     /// 'decompress' query parameter.
@@ -510,7 +511,7 @@ void HTTPHandler::processQuery(
     const auto & settings = context.getSettingsRef();
 
     /// Only readonly queries are allowed for HTTP GET requests.
-    if (request.getMethod() == Poco::Net::HTTPServerRequest::HTTP_GET)
+    if (request.getMethod() == HTTPServerRequest::HTTP_GET)
     {
         if (settings.readonly == 0)
             context.setSetting("readonly", 2);
@@ -605,26 +606,12 @@ void HTTPHandler::processQuery(
 
     if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close)
     {
-        Poco::Net::StreamSocket & socket = dynamic_cast<Poco::Net::HTTPServerRequestImpl &>(request).socket();
-
-        append_callback([&context, &socket](const Progress &)
+        append_callback([&context, &request](const Progress &)
         {
-            /// Assume that at the point this method is called no one is reading data from the socket any more.
-            /// True for read-only queries.
-            try
-            {
-                char b;
-                int status = socket.receiveBytes(&b, 1, MSG_DONTWAIT | MSG_PEEK);
-                if (status == 0)
-                    context.killCurrentQuery();
-            }
-            catch (Poco::TimeoutException &)
-            {
-            }
-            catch (...)
-            {
+            /// Assume that at the point this method is called no one is reading data from the socket any more:
+            /// should be true for read-only queries.
+            if (!request.checkPeerConnected())
                 context.killCurrentQuery();
-            }
         });
     }
 
@@ -653,22 +640,23 @@ void HTTPHandler::processQuery(
     used_output.out->finalize();
 }
 
-void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_code,
-    Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response,
-    Output & used_output)
+void HTTPHandler::trySendExceptionToClient(
+    const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output)
 {
     try
     {
         response.set("X-ClickHouse-Exception-Code", toString<int>(exception_code));
 
+        /// FIXME: make sure that no one else is reading from the same stream at the moment.
+
         /// If HTTP method is POST and Keep-Alive is turned on, we should read the whole request body
         /// to avoid reading part of the current request body in the next request.
         if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST
             && response.getKeepAlive()
-            && !request.stream().eof()
-            && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED)
+            && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED
+            && !request.getStream().eof())
         {
-            request.stream().ignore(std::numeric_limits<std::streamsize>::max());
+            request.getStream().ignoreAll();
         }
 
         bool auth_fail = exception_code == ErrorCodes::UNKNOWN_USER ||
@@ -687,7 +675,7 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_
         if (!response.sent() && !used_output.out_maybe_compressed)
         {
             /// If nothing was sent yet and we don't even know if we must compress the response.
-            response.send() << s << std::endl;
+            *response.send() << s << std::endl;
         }
         else if (used_output.out_maybe_compressed)
         {
@@ -712,9 +700,13 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_
             writeChar('\n', *used_output.out_maybe_compressed);
 
             used_output.out_maybe_compressed->next();
-            used_output.out->next();
             used_output.out->finalize();
         }
+        else
+        {
+            assert(false);
+            __builtin_unreachable();
+        }
     }
     catch (...)
     {
@@ -723,7 +715,7 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_
 }
 
 
-void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     setThreadName("HTTPHandler");
     ThreadStatus thread_status;
@@ -744,17 +736,18 @@ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
         response.setContentType("text/plain; charset=UTF-8");
         response.set("X-ClickHouse-Server-Display-Name", server_display_name);
         /// For keep-alive to work.
-        if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1)
+        if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
             response.setChunkedTransferEncoding(true);
 
         HTMLForm params(request);
         with_stacktrace = params.getParsed<bool>("stacktrace", false);
 
         /// Workaround. Poco does not detect 411 Length Required case.
-        if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && !request.getChunkedTransferEncoding() &&
-            !request.hasContentLength())
+        if (request.getMethod() == HTTPRequest::HTTP_POST && !request.getChunkedTransferEncoding() && !request.hasContentLength())
         {
-            throw Exception("The Transfer-Encoding is not chunked and there is no Content-Length header for POST request", ErrorCodes::HTTP_LENGTH_REQUIRED);
+            throw Exception(
+                "The Transfer-Encoding is not chunked and there is no Content-Length header for POST request",
+                ErrorCodes::HTTP_LENGTH_REQUIRED);
         }
 
         processQuery(context, request, params, response, used_output, query_scope);
@@ -772,6 +765,9 @@ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
 
         trySendExceptionToClient(exception_message, exception_code, request, response, used_output);
     }
+
+    if (used_output.out)
+        used_output.out->finalize();
 }
 
 DynamicQueryHandler::DynamicQueryHandler(IServer & server_, const std::string & param_name_)
@@ -795,9 +791,8 @@ bool DynamicQueryHandler::customizeQueryParam(Context & context, const std::stri
     return false;
 }
 
-std::string DynamicQueryHandler::getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context)
+std::string DynamicQueryHandler::getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context)
 {
-
     if (likely(!startsWith(request.getContentType(), "multipart/form-data")))
     {
         /// Part of the query can be passed in the 'query' parameter and the rest in the request body
@@ -810,7 +805,7 @@ std::string DynamicQueryHandler::getQuery(Poco::Net::HTTPServerRequest & request
     /// Support for "external data for query processing".
     /// Used in case of POST request with form-data, but it isn't expected to be deleted after that scope.
     ExternalTablesHandler handler(context, params);
-    params.load(request, request.stream(), handler);
+    params.load(request, request.getStream(), handler);
 
     std::string full_query;
     /// Params are of both form params POST and uri (GET params)
@@ -840,7 +835,7 @@ bool PredefinedQueryHandler::customizeQueryParam(Context & context, const std::s
     return false;
 }
 
-void PredefinedQueryHandler::customizeContext(Poco::Net::HTTPServerRequest & request, DB::Context & context)
+void PredefinedQueryHandler::customizeContext(HTTPServerRequest & request, DB::Context & context)
 {
     /// If in the configuration file, the handler's header is regex and contains named capture group
     /// We will extract regex named capture groups as query parameters
@@ -876,22 +871,26 @@ void PredefinedQueryHandler::customizeContext(Poco::Net::HTTPServerRequest & req
     }
 }
 
-std::string PredefinedQueryHandler::getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context)
+std::string PredefinedQueryHandler::getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context)
 {
     if (unlikely(startsWith(request.getContentType(), "multipart/form-data")))
     {
         /// Support for "external data for query processing".
         ExternalTablesHandler handler(context, params);
-        params.load(request, request.stream(), handler);
+        params.load(request, request.getStream(), handler);
     }
 
     return predefined_query;
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createDynamicHandlerFactory(IServer & server, const std::string & config_prefix)
+HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server, const std::string & config_prefix)
 {
-    std::string query_param_name = server.config().getString(config_prefix + ".handler.query_param_name", "query");
-    return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>(server, std::move(query_param_name)), server.config(), config_prefix);
+    const auto & query_param_name = server.config().getString(config_prefix + ".handler.query_param_name", "query");
+    auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(server, std::move(query_param_name));
+
+    factory->addFiltersFromConfig(server.config(), config_prefix);
+
+    return factory;
 }
 
 static inline bool capturingNamedQueryParam(NameSet receive_params, const CompiledRegexPtr & compiled_regex)
@@ -909,18 +908,20 @@ static inline CompiledRegexPtr getCompiledRegex(const std::string & expression)
     auto compiled_regex = std::make_shared<const re2::RE2>(expression);
 
     if (!compiled_regex->ok())
-        throw Exception("Cannot compile re2: " + expression + " for http handling rule, error: " +
-                        compiled_regex->error() + ". Look at https://github.com/google/re2/wiki/Syntax for reference.", ErrorCodes::CANNOT_COMPILE_REGEXP);
+        throw Exception(
+            "Cannot compile re2: " + expression + " for http handling rule, error: " + compiled_regex->error()
+                + ". Look at https://github.com/google/re2/wiki/Syntax for reference.",
+            ErrorCodes::CANNOT_COMPILE_REGEXP);
 
     return compiled_regex;
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix)
+HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix)
 {
     Poco::Util::AbstractConfiguration & configuration = server.config();
 
     if (!configuration.has(config_prefix + ".handler.query"))
-        throw Exception("There is no path '" + config_prefix + ".handler.query" + "' in configuration file.", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
+        throw Exception("There is no path '" + config_prefix + ".handler.query' in configuration file.", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
 
     std::string predefined_query = configuration.getString(config_prefix + ".handler.query");
     NameSet analyze_receive_params = analyzeReceiveQueryParams(predefined_query);
@@ -942,6 +943,8 @@ Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer &
             headers_name_with_regex.emplace(std::make_pair(header_name, regex));
     }
 
+    std::shared_ptr<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>> factory;
+
     if (configuration.has(config_prefix + ".url"))
     {
         auto url_expression = configuration.getString(config_prefix + ".url");
@@ -951,14 +954,23 @@ Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer &
 
         auto regex = getCompiledRegex(url_expression);
         if (capturingNamedQueryParam(analyze_receive_params, regex))
-            return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>(
-                server, std::move(analyze_receive_params), std::move(predefined_query), std::move(regex),
-                std::move(headers_name_with_regex)), configuration, config_prefix);
+        {
+            factory = std::make_shared<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>>(
+                server,
+                std::move(analyze_receive_params),
+                std::move(predefined_query),
+                std::move(regex),
+                std::move(headers_name_with_regex));
+            factory->addFiltersFromConfig(configuration, config_prefix);
+            return factory;
+        }
     }
 
-    return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>(
-        server, std::move(analyze_receive_params), std::move(predefined_query), CompiledRegexPtr{} ,std::move(headers_name_with_regex)),
-        configuration, config_prefix);
+    factory = std::make_shared<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>>(
+        server, std::move(analyze_receive_params), std::move(predefined_query), CompiledRegexPtr{}, std::move(headers_name_with_regex));
+    factory->addFiltersFromConfig(configuration, config_prefix);
+
+    return factory;
 }
 
 }
diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h
index 96727df5404..e903fbfbff7 100644
--- a/src/Server/HTTPHandler.h
+++ b/src/Server/HTTPHandler.h
@@ -1,13 +1,10 @@
 #pragma once
 
-#include "IServer.h"
-
-#include <Poco/Net/HTTPRequestHandler.h>
-
-#include <Common/CurrentThread.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/HTMLForm.h>
 #include <Core/Names.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/CurrentThread.h>
 
 #include <re2/re2.h>
 
@@ -21,23 +18,24 @@ namespace Poco { class Logger; }
 namespace DB
 {
 
+class IServer;
 class WriteBufferFromHTTPServerResponse;
 
 using CompiledRegexPtr = std::shared_ptr<const re2::RE2>;
 
-class HTTPHandler : public Poco::Net::HTTPRequestHandler
+class HTTPHandler : public HTTPRequestHandler
 {
 public:
-    explicit HTTPHandler(IServer & server_, const std::string & name);
+    HTTPHandler(IServer & server_, const std::string & name);
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
     /// This method is called right before the query execution.
-    virtual void customizeContext(Poco::Net::HTTPServerRequest & /*request*/, Context & /* context */) {}
+    virtual void customizeContext(HTTPServerRequest & /* request */, Context & /* context */) {}
 
     virtual bool customizeQueryParam(Context & context, const std::string & key, const std::string & value) = 0;
 
-    virtual std::string getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context) = 0;
+    virtual std::string getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context) = 0;
 
 private:
     struct Output
@@ -74,17 +72,17 @@ private:
     /// Also initializes 'used_output'.
     void processQuery(
         Context & context,
-        Poco::Net::HTTPServerRequest & request,
+        HTTPServerRequest & request,
         HTMLForm & params,
-        Poco::Net::HTTPServerResponse & response,
+        HTTPServerResponse & response,
         Output & used_output,
         std::optional<CurrentThread::QueryScope> & query_scope);
 
     void trySendExceptionToClient(
         const std::string & s,
         int exception_code,
-        Poco::Net::HTTPServerRequest & request,
-        Poco::Net::HTTPServerResponse & response,
+        HTTPServerRequest & request,
+        HTTPServerResponse & response,
         Output & used_output);
 
     static void pushDelayedResults(Output & used_output);
@@ -97,7 +95,7 @@ private:
 public:
     explicit DynamicQueryHandler(IServer & server_, const std::string & param_name_ = "query");
 
-    std::string getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context) override;
+    std::string getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context) override;
 
     bool customizeQueryParam(Context &context, const std::string &key, const std::string &value) override;
 };
@@ -114,9 +112,9 @@ public:
         IServer & server_, const NameSet & receive_params_, const std::string & predefined_query_
         , const CompiledRegexPtr & url_regex_, const std::unordered_map<String, CompiledRegexPtr> & header_name_with_regex_);
 
-    virtual void customizeContext(Poco::Net::HTTPServerRequest & request, Context & context) override;
+    virtual void customizeContext(HTTPServerRequest & request, Context & context) override;
 
-    std::string getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context) override;
+    std::string getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context) override;
 
     bool customizeQueryParam(Context & context, const std::string & key, const std::string & value) override;
 };
diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp
index 9eac60355d2..db80750beb8 100644
--- a/src/Server/HTTPHandlerFactory.cpp
+++ b/src/Server/HTTPHandlerFactory.cpp
@@ -1,4 +1,7 @@
-#include "HTTPHandlerFactory.h"
+#include <Server/HTTPHandlerFactory.h>
+
+#include <Server/HTTP/HTTPRequestHandler.h>
+#include <Server/IServer.h>
 
 #include <Poco/Util/LayeredConfiguration.h>
 
@@ -29,7 +32,7 @@ HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string &
 {
 }
 
-Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request)
+std::unique_ptr<HTTPRequestHandler> HTTPRequestHandlerFactoryMain::createRequestHandler(const HTTPServerRequest & request)
 {
     LOG_TRACE(log, "HTTP Request for {}. Method: {}, Address: {}, User-Agent: {}{}, Content Type: {}, Transfer Encoding: {}, X-Forwarded-For: {}",
         name, request.getMethod(), request.clientAddress().toString(), request.get("User-Agent", "(none)"),
@@ -38,8 +41,8 @@ Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHand
 
     for (auto & handler_factory : child_factories)
     {
-        auto * handler = handler_factory->createRequestHandler(request);
-        if (handler != nullptr)
+        auto handler = handler_factory->createRequestHandler(request);
+        if (handler)
             return handler;
     }
 
@@ -47,31 +50,16 @@ Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHand
         || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD
         || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
     {
-        return new NotFoundHandler;
+        return std::unique_ptr<HTTPRequestHandler>(new NotFoundHandler);
     }
 
     return nullptr;
 }
 
-HTTPRequestHandlerFactoryMain::~HTTPRequestHandlerFactoryMain()
-{
-    while (!child_factories.empty())
-    {
-        delete child_factories.back();
-        child_factories.pop_back();
-    }
-}
-
-HTTPRequestHandlerFactoryMain::TThis * HTTPRequestHandlerFactoryMain::addHandler(Poco::Net::HTTPRequestHandlerFactory * child_factory)
-{
-    child_factories.emplace_back(child_factory);
-    return this;
-}
-
 static inline auto createHandlersFactoryFromConfig(
     IServer & server, const std::string & name, const String & prefix, AsynchronousMetrics & async_metrics)
 {
-    auto main_handler_factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
+    auto main_handler_factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
 
     Poco::Util::AbstractConfiguration::Keys keys;
     server.config().keys(prefix, keys);
@@ -109,10 +97,11 @@ static inline auto createHandlersFactoryFromConfig(
                 ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
     }
 
-    return main_handler_factory.release();
+    return main_handler_factory;
 }
 
-static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
+static inline HTTPRequestHandlerFactoryPtr
+createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
 {
     if (server.config().has("http_handlers"))
     {
@@ -120,25 +109,25 @@ static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IS
     }
     else
     {
-        auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
+        auto factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
         addDefaultHandlersFactory(*factory, server, async_metrics);
-        return factory.release();
+        return factory;
     }
 }
 
-static inline Poco::Net::HTTPRequestHandlerFactory * createInterserverHTTPHandlerFactory(IServer & server, const std::string & name)
+static inline HTTPRequestHandlerFactoryPtr createInterserverHTTPHandlerFactory(IServer & server, const std::string & name)
 {
-    auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
+    auto factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
     addCommonDefaultHandlersFactory(*factory, server);
 
-    auto main_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>>(server);
+    auto main_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>>(server);
     main_handler->allowPostAndGetParamsRequest();
-    factory->addHandler(main_handler.release());
+    factory->addHandler(main_handler);
 
-    return factory.release();
+    return factory;
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name)
+HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name)
 {
     if (name == "HTTPHandler-factory" || name == "HTTPSHandler-factory")
         return createHTTPHandlerFactory(server, name, async_metrics);
@@ -146,12 +135,13 @@ Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, As
         return createInterserverHTTPHandlerFactory(server, name);
     else if (name == "PrometheusHandler-factory")
     {
-        auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
-        auto handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
+        auto factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
+        auto handler = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
             server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
-        handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
-        factory->addHandler(handler.release());
-        return factory.release();
+        handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"));
+        handler->allowGetAndHeadRequest();
+        factory->addHandler(handler);
+        return factory;
     }
 
     throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR);
@@ -162,39 +152,44 @@ static const auto root_response_expression = "config://http_server_default_respo
 
 void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server)
 {
-    auto root_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
-    root_handler->attachStrictPath("/")->allowGetAndHeadRequest();
-    factory.addHandler(root_handler.release());
+    auto root_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
+    root_handler->attachStrictPath("/");
+    root_handler->allowGetAndHeadRequest();
+    factory.addHandler(root_handler);
 
-    auto ping_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
-    ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest();
-    factory.addHandler(ping_handler.release());
+    auto ping_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
+    ping_handler->attachStrictPath("/ping");
+    ping_handler->allowGetAndHeadRequest();
+    factory.addHandler(ping_handler);
 
-    auto replicas_status_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
-    replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest();
-    factory.addHandler(replicas_status_handler.release());
+    auto replicas_status_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
+    replicas_status_handler->attachNonStrictPath("/replicas_status");
+    replicas_status_handler->allowGetAndHeadRequest();
+    factory.addHandler(replicas_status_handler);
 
-    auto web_ui_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<WebUIRequestHandler>>(server, "play.html");
-    web_ui_handler->attachNonStrictPath("/play")->allowGetAndHeadRequest();
-    factory.addHandler(web_ui_handler.release());
+    auto web_ui_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<WebUIRequestHandler>>(server, "play.html");
+    web_ui_handler->attachNonStrictPath("/play");
+    web_ui_handler->allowGetAndHeadRequest();
+    factory.addHandler(web_ui_handler);
 }
 
 void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics)
 {
     addCommonDefaultHandlersFactory(factory, server);
 
-    auto query_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(server, "query");
+    auto query_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(server, "query");
     query_handler->allowPostAndGetParamsRequest();
-    factory.addHandler(query_handler.release());
+    factory.addHandler(query_handler);
 
     /// We check that prometheus handler will be served on current (default) port.
     /// Otherwise it will be created separately, see createHandlerFactory(...).
     if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0)
     {
-        auto prometheus_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
+        auto prometheus_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
             server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
-        prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
-        factory.addHandler(prometheus_handler.release());
+        prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"));
+        prometheus_handler->allowGetAndHeadRequest();
+        factory.addHandler(prometheus_handler);
     }
 }
 
diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h
index 3e8313172eb..6297f988eaa 100644
--- a/src/Server/HTTPHandlerFactory.h
+++ b/src/Server/HTTPHandlerFactory.h
@@ -1,82 +1,102 @@
 #pragma once
 
-#include "IServer.h"
-#include <common/logger_useful.h>
-#include <Common/HTMLForm.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Net/HTTPRequestHandlerFactory.h>
 #include <Interpreters/AsynchronousMetrics.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTP/HTTPRequestHandlerFactory.h>
+#include <Server/HTTPHandlerRequestFilter.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <common/logger_useful.h>
+
+#include <Poco/Util/LayeredConfiguration.h>
 
 namespace DB
 {
 
-/// Handle request using child handlers
-class HTTPRequestHandlerFactoryMain : public Poco::Net::HTTPRequestHandlerFactory, boost::noncopyable
+namespace ErrorCodes
 {
-private:
-    using TThis = HTTPRequestHandlerFactoryMain;
+    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
+}
 
+class IServer;
+
+/// Handle request using child handlers
+class HTTPRequestHandlerFactoryMain : public HTTPRequestHandlerFactory
+{
+public:
+    explicit HTTPRequestHandlerFactoryMain(const std::string & name_);
+
+    void addHandler(HTTPRequestHandlerFactoryPtr child_factory) { child_factories.emplace_back(child_factory); }
+
+    std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
+
+private:
     Poco::Logger * log;
     std::string name;
 
-    std::vector<Poco::Net::HTTPRequestHandlerFactory *> child_factories;
-public:
-
-    ~HTTPRequestHandlerFactoryMain() override;
-
-    HTTPRequestHandlerFactoryMain(const std::string & name_);
-
-    TThis * addHandler(Poco::Net::HTTPRequestHandlerFactory * child_factory);
-
-    Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override;
+    std::vector<HTTPRequestHandlerFactoryPtr> child_factories;
 };
 
 template <typename TEndpoint>
-class HandlingRuleHTTPHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory
+class HandlingRuleHTTPHandlerFactory : public HTTPRequestHandlerFactory
 {
 public:
-    using TThis = HandlingRuleHTTPHandlerFactory<TEndpoint>;
-    using Filter = std::function<bool(const Poco::Net::HTTPServerRequest &)>;
+    using Filter = std::function<bool(const HTTPServerRequest &)>;
 
     template <typename... TArgs>
-    HandlingRuleHTTPHandlerFactory(TArgs &&... args)
+    explicit HandlingRuleHTTPHandlerFactory(TArgs &&... args)
     {
         creator = [args = std::tuple<TArgs...>(std::forward<TArgs>(args) ...)]()
         {
             return std::apply([&](auto && ... endpoint_args)
             {
-                return new TEndpoint(std::forward<decltype(endpoint_args)>(endpoint_args)...);
+                return std::make_unique<TEndpoint>(std::forward<decltype(endpoint_args)>(endpoint_args)...);
             }, std::move(args));
         };
     }
 
-    TThis * addFilter(Filter cur_filter)
+    void addFilter(Filter cur_filter)
     {
         Filter prev_filter = filter;
         filter = [prev_filter, cur_filter](const auto & request)
         {
             return prev_filter ? prev_filter(request) && cur_filter(request) : cur_filter(request);
         };
-
-        return this;
     }
 
-    TThis * attachStrictPath(const String & strict_path)
+    void addFiltersFromConfig(Poco::Util::AbstractConfiguration & config, const std::string & prefix)
     {
-        return addFilter([strict_path](const auto & request) { return request.getURI() == strict_path; });
+        Poco::Util::AbstractConfiguration::Keys filters_type;
+        config.keys(prefix, filters_type);
+
+        for (const auto & filter_type : filters_type)
+        {
+            if (filter_type == "handler")
+                continue;
+            else if (filter_type == "url")
+                addFilter(urlFilter(config, prefix + ".url"));
+            else if (filter_type == "headers")
+                addFilter(headersFilter(config, prefix + ".headers"));
+            else if (filter_type == "methods")
+                addFilter(methodsFilter(config, prefix + ".methods"));
+            else
+                throw Exception("Unknown element in config: " + prefix + "." + filter_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
+        }
     }
 
-    TThis * attachNonStrictPath(const String & non_strict_path)
+    void attachStrictPath(const String & strict_path)
     {
-        return addFilter([non_strict_path](const auto & request) { return startsWith(request.getURI(), non_strict_path); });
+        addFilter([strict_path](const auto & request) { return request.getURI() == strict_path; });
+    }
+
+    void attachNonStrictPath(const String & non_strict_path)
+    {
+        addFilter([non_strict_path](const auto & request) { return startsWith(request.getURI(), non_strict_path); });
     }
 
     /// Handle GET or HEAD endpoint on specified path
-    TThis * allowGetAndHeadRequest()
+    void allowGetAndHeadRequest()
     {
-        return addFilter([](const auto & request)
+        addFilter([](const auto & request)
         {
             return request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET
                 || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD;
@@ -84,35 +104,35 @@ public:
     }
 
     /// Handle POST or GET with params
-    TThis * allowPostAndGetParamsRequest()
+    void allowPostAndGetParamsRequest()
     {
-        return addFilter([](const auto & request)
+        addFilter([](const auto & request)
         {
             return request.getURI().find('?') != std::string::npos
                 || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST;
         });
     }
 
-    Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override
+    std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override
     {
         return filter(request) ? creator() : nullptr;
     }
 
 private:
     Filter filter;
-    std::function<Poco::Net::HTTPRequestHandler * ()> creator;
+    std::function<std::unique_ptr<HTTPRequestHandler> ()> creator;
 };
 
-Poco::Net::HTTPRequestHandlerFactory * createStaticHandlerFactory(IServer & server, const std::string & config_prefix);
+HTTPRequestHandlerFactoryPtr createStaticHandlerFactory(IServer & server, const std::string & config_prefix);
 
-Poco::Net::HTTPRequestHandlerFactory * createDynamicHandlerFactory(IServer & server, const std::string & config_prefix);
+HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server, const std::string & config_prefix);
 
-Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix);
+HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix);
 
-Poco::Net::HTTPRequestHandlerFactory * createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix);
+HTTPRequestHandlerFactoryPtr createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix);
 
-Poco::Net::HTTPRequestHandlerFactory * createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix);
-
-Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name);
+HTTPRequestHandlerFactoryPtr
+createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix);
 
+HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name);
 }
diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h
index f952efd7653..f0474e8b953 100644
--- a/src/Server/HTTPHandlerRequestFilter.h
+++ b/src/Server/HTTPHandlerRequestFilter.h
@@ -1,15 +1,17 @@
 #pragma once
 
-#include "HTTPHandlerFactory.h"
+#include <Server/HTTP/HTTPServerRequest.h>
+#include <Common/Exception.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <common/StringRef.h>
+#include <common/find_symbols.h>
 
 #include <re2/re2.h>
 #include <re2/stringpiece.h>
 #include <Poco/StringTokenizer.h>
-#include <Poco/Net/HTTPServerRequest.h>
 #include <Poco/Util/LayeredConfiguration.h>
 
-#include <common/find_symbols.h>
-
+#include <unordered_map>
 
 namespace DB
 {
@@ -17,11 +19,9 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int CANNOT_COMPILE_REGEXP;
-    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
 }
 
-
-typedef std::shared_ptr<const re2::RE2> CompiledRegexPtr;
+using CompiledRegexPtr = std::shared_ptr<const re2::RE2>;
 
 static inline bool checkRegexExpression(const StringRef & match_str, const CompiledRegexPtr & compiled_regex)
 {
@@ -45,10 +45,10 @@ static inline auto methodsFilter(Poco::Util::AbstractConfiguration & config, con
     std::vector<String> methods;
     Poco::StringTokenizer tokenizer(config.getString(config_path), ",");
 
-    for (auto iterator = tokenizer.begin(); iterator != tokenizer.end(); ++iterator)
-        methods.emplace_back(Poco::toUpper(Poco::trim(*iterator)));
+    for (const auto & iterator : tokenizer)
+        methods.emplace_back(Poco::toUpper(Poco::trim(iterator)));
 
-    return [methods](const Poco::Net::HTTPServerRequest & request) { return std::count(methods.begin(), methods.end(), request.getMethod()); };
+    return [methods](const HTTPServerRequest & request) { return std::count(methods.begin(), methods.end(), request.getMethod()); };
 }
 
 static inline auto getExpression(const std::string & expression)
@@ -66,7 +66,7 @@ static inline auto getExpression(const std::string & expression)
 
 static inline auto urlFilter(Poco::Util::AbstractConfiguration & config, const std::string & config_path)
 {
-    return [expression = getExpression(config.getString(config_path))](const Poco::Net::HTTPServerRequest & request)
+    return [expression = getExpression(config.getString(config_path))](const HTTPServerRequest & request)
     {
         const auto & uri = request.getURI();
         const auto & end = find_first_symbols<'?'>(uri.data(), uri.data() + uri.size());
@@ -88,7 +88,7 @@ static inline auto headersFilter(Poco::Util::AbstractConfiguration & config, con
         headers_expression.emplace(std::make_pair(header_name, expression));
     }
 
-    return [headers_expression](const Poco::Net::HTTPServerRequest & request)
+    return [headers_expression](const HTTPServerRequest & request)
     {
         for (const auto & [header_name, header_expression] : headers_expression)
         {
@@ -101,28 +101,4 @@ static inline auto headersFilter(Poco::Util::AbstractConfiguration & config, con
     };
 }
 
-template <typename TEndpoint>
-static inline Poco::Net::HTTPRequestHandlerFactory * addFiltersFromConfig(
-    HandlingRuleHTTPHandlerFactory <TEndpoint> * factory, Poco::Util::AbstractConfiguration & config, const std::string & prefix)
-{
-    Poco::Util::AbstractConfiguration::Keys filters_type;
-    config.keys(prefix, filters_type);
-
-    for (const auto & filter_type : filters_type)
-    {
-        if (filter_type == "handler")
-            continue;
-        else if (filter_type == "url")
-            factory->addFilter(urlFilter(config, prefix + ".url"));
-        else if (filter_type == "headers")
-            factory->addFilter(headersFilter(config, prefix + ".headers"));
-        else if (filter_type == "methods")
-            factory->addFilter(methodsFilter(config, prefix + ".methods"));
-        else
-            throw Exception("Unknown element in config: " + prefix + "." + filter_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
-    }
-
-    return factory;
-}
-
 }
diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index 973759bedd1..740072e8e9f 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -1,18 +1,18 @@
-#include "InterserverIOHTTPHandler.h"
+#include <Server/InterserverIOHTTPHandler.h>
+
+#include <Server/IServer.h>
 
-#include <Poco/Net/HTTPBasicCredentials.h>
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Util/LayeredConfiguration.h>
-#include <common/logger_useful.h>
-#include <Common/HTMLForm.h>
-#include <Common/setThreadName.h>
 #include <Compression/CompressedWriteBuffer.h>
 #include <IO/ReadBufferFromIStream.h>
-#include <IO/WriteBufferFromHTTPServerResponse.h>
-#include <Interpreters/InterserverIOHandler.h>
 #include <Interpreters/Context.h>
-#include "IServer.h"
+#include <Interpreters/InterserverIOHandler.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
+#include <Common/setThreadName.h>
+#include <common/logger_useful.h>
+
+#include <Poco/Net/HTTPBasicCredentials.h>
+#include <Poco/Util/LayeredConfiguration.h>
 
 namespace DB
 {
@@ -23,7 +23,7 @@ namespace ErrorCodes
     extern const int TOO_MANY_SIMULTANEOUS_QUERIES;
 }
 
-std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(Poco::Net::HTTPServerRequest & request) const
+std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(HTTPServerRequest & request) const
 {
     const auto & config = server.config();
 
@@ -51,7 +51,7 @@ std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(Poco::Net:
     return {"", true};
 }
 
-void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output)
+void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output)
 {
     HTMLForm params(request);
 
@@ -60,7 +60,7 @@ void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & reque
     String endpoint_name = params.get("endpoint");
     bool compress = params.get("compress") == "true";
 
-    ReadBufferFromIStream body(request.stream());
+    auto & body = request.getStream();
 
     auto endpoint = server.context().getInterserverIOHandler().getEndpoint(endpoint_name);
     /// Locked for read while query processing
@@ -80,18 +80,36 @@ void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & reque
 }
 
 
-void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     setThreadName("IntersrvHandler");
 
     /// In order to work keep-alive.
-    if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1)
+    if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
         response.setChunkedTransferEncoding(true);
 
     Output used_output;
     const auto & config = server.config();
     unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10);
-    used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(request, response, keep_alive_timeout);
+    used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(
+        response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
+
+    auto write_response = [&](const std::string & message)
+    {
+        if (response.sent())
+            return;
+
+        auto & out = *used_output.out;
+        try
+        {
+            writeString(message, out);
+            out.finalize();
+        }
+        catch (...)
+        {
+            out.finalize();
+        }
+    };
 
     try
     {
@@ -102,9 +120,8 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ
         }
         else
         {
-            response.setStatusAndReason(Poco::Net::HTTPServerResponse::HTTP_UNAUTHORIZED);
-            if (!response.sent())
-                writeString(message, *used_output.out);
+            response.setStatusAndReason(HTTPServerResponse::HTTP_UNAUTHORIZED);
+            write_response(message);
             LOG_WARNING(log, "Query processing failed request: '{}' authentication failed", request.getURI());
         }
     }
@@ -119,8 +136,7 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ
         bool is_real_error = e.code() != ErrorCodes::ABORTED;
 
         std::string message = getCurrentExceptionMessage(is_real_error);
-        if (!response.sent())
-            writeString(message, *used_output.out);
+        write_response(message);
 
         if (is_real_error)
             LOG_ERROR(log, message);
@@ -131,8 +147,7 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
         std::string message = getCurrentExceptionMessage(false);
-        if (!response.sent())
-            writeString(message, *used_output.out);
+        write_response(message);
 
         LOG_ERROR(log, message);
     }
diff --git a/src/Server/InterserverIOHTTPHandler.h b/src/Server/InterserverIOHTTPHandler.h
index 8dc1962664c..47892aa678f 100644
--- a/src/Server/InterserverIOHTTPHandler.h
+++ b/src/Server/InterserverIOHTTPHandler.h
@@ -1,10 +1,12 @@
 #pragma once
 
-#include <memory>
-#include <Poco/Logger.h>
-#include <Poco/Net/HTTPRequestHandler.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
 #include <Common/CurrentMetrics.h>
 
+#include <Poco/Logger.h>
+
+#include <memory>
+
 
 namespace CurrentMetrics
 {
@@ -17,7 +19,7 @@ namespace DB
 class IServer;
 class WriteBufferFromHTTPServerResponse;
 
-class InterserverIOHTTPHandler : public Poco::Net::HTTPRequestHandler
+class InterserverIOHTTPHandler : public HTTPRequestHandler
 {
 public:
     explicit InterserverIOHTTPHandler(IServer & server_)
@@ -26,7 +28,7 @@ public:
     {
     }
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     struct Output
@@ -39,9 +41,9 @@ private:
 
     CurrentMetrics::Increment metric_increment{CurrentMetrics::InterserverConnection};
 
-    void processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output);
+    void processQuery(HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output);
 
-    std::pair<String, bool> checkAuthentication(Poco::Net::HTTPServerRequest & request) const;
+    std::pair<String, bool> checkAuthentication(HTTPServerRequest & request) const;
 };
 
 }
diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp
index 63a48fde1a7..ea2813cf639 100644
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@@ -24,6 +24,7 @@
 #include <regex>
 #include <Access/User.h>
 #include <Access/AccessControlManager.h>
+#include <Common/setThreadName.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config_version.h>
@@ -86,6 +87,8 @@ MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & so
 
 void MySQLHandler::run()
 {
+    setThreadName("MySQLHandler");
+    ThreadStatus thread_status;
     connection_context.makeSessionContext();
     connection_context.getClientInfo().interface = ClientInfo::Interface::MYSQL;
     connection_context.setDefaultFormat("MySQLWire");
@@ -286,7 +289,7 @@ void MySQLHandler::comFieldList(ReadBuffer & payload)
     for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAll())
     {
         ColumnDefinition column_definition(
-            database, packet.table, packet.table, column.name, column.name, CharacterSet::binary, 100, ColumnType::MYSQL_TYPE_STRING, 0, 0
+            database, packet.table, packet.table, column.name, column.name, CharacterSet::binary, 100, ColumnType::MYSQL_TYPE_STRING, 0, 0, true
         );
         packet_endpoint->sendPacket(column_definition);
     }
@@ -340,7 +343,9 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
             affected_rows += progress.written_rows;
         });
 
-        executeQuery(should_replace ? replacement : payload, *out, true, query_context,
+        CurrentThread::QueryScope query_scope{query_context};
+
+        executeQuery(should_replace ? replacement : payload, *out, false, query_context,
             [&with_output](const String &, const String &, const String &, const String &)
             {
                 with_output = true;
diff --git a/src/Server/NotFoundHandler.cpp b/src/Server/NotFoundHandler.cpp
index 766e8895784..3181708b9b7 100644
--- a/src/Server/NotFoundHandler.cpp
+++ b/src/Server/NotFoundHandler.cpp
@@ -1,32 +1,25 @@
-#include "NotFoundHandler.h"
+#include <Server/NotFoundHandler.h>
 
 #include <IO/HTTPCommon.h>
-
 #include <Common/Exception.h>
 
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-
 namespace DB
 {
-
-void NotFoundHandler::handleRequest(
-    Poco::Net::HTTPServerRequest & request,
-    Poco::Net::HTTPServerResponse & response)
+void NotFoundHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     try
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND);
-        response.send() << "There is no handle " << request.getURI() << "\n\n"
-                        << "Use / or /ping for health checks.\n"
-                        << "Or /replicas_status for more sophisticated health checks.\n\n"
-                        << "Send queries from your program with POST method or GET /?query=...\n\n"
-                        << "Use clickhouse-client:\n\n"
-                        << "For interactive data analysis:\n"
-                        << "    clickhouse-client\n\n"
-                        << "For batch query processing:\n"
-                        << "    clickhouse-client --query='SELECT 1' > result\n"
-                        << "    clickhouse-client < query > result\n";
+        *response.send() << "There is no handle " << request.getURI() << "\n\n"
+                         << "Use / or /ping for health checks.\n"
+                         << "Or /replicas_status for more sophisticated health checks.\n\n"
+                         << "Send queries from your program with POST method or GET /?query=...\n\n"
+                         << "Use clickhouse-client:\n\n"
+                         << "For interactive data analysis:\n"
+                         << "    clickhouse-client\n\n"
+                         << "For batch query processing:\n"
+                         << "    clickhouse-client --query='SELECT 1' > result\n"
+                         << "    clickhouse-client < query > result\n";
     }
     catch (...)
     {
diff --git a/src/Server/NotFoundHandler.h b/src/Server/NotFoundHandler.h
index 7f758e49d0d..749ac388c4d 100644
--- a/src/Server/NotFoundHandler.h
+++ b/src/Server/NotFoundHandler.h
@@ -1,18 +1,15 @@
 #pragma once
 
-#include <Poco/Net/HTTPRequestHandler.h>
-
+#include <Server/HTTP/HTTPRequestHandler.h>
 
 namespace DB
 {
 
 /// Response with 404 and verbose description.
-class NotFoundHandler : public Poco::Net::HTTPRequestHandler
+class NotFoundHandler : public HTTPRequestHandler
 {
 public:
-    void handleRequest(
-        Poco::Net::HTTPServerRequest & request,
-        Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };
 
 }
diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/NuKeeperTCPHandler.cpp
similarity index 81%
rename from src/Server/TestKeeperTCPHandler.cpp
rename to src/Server/NuKeeperTCPHandler.cpp
index 97999c2b1c1..e855e2c68f7 100644
--- a/src/Server/TestKeeperTCPHandler.cpp
+++ b/src/Server/NuKeeperTCPHandler.cpp
@@ -1,4 +1,7 @@
-#include <Server/TestKeeperTCPHandler.h>
+#include <Server/NuKeeperTCPHandler.h>
+
+#if USE_NURAFT
+
 #include <Common/ZooKeeper/ZooKeeperIO.h>
 #include <Core/Types.h>
 #include <IO/WriteBufferFromPocoSocket.h>
@@ -22,14 +25,17 @@
     #include <poll.h>
 #endif
 
+
 namespace DB
 {
 
+
 namespace ErrorCodes
 {
     extern const int SYSTEM_ERROR;
     extern const int LOGICAL_ERROR;
     extern const int UNEXPECTED_PACKET_FROM_CLIENT;
+    extern const int TIMEOUT_EXCEEDED;
 }
 
 struct PollResult
@@ -39,36 +45,6 @@ struct PollResult
     bool error{false};
 };
 
-/// Queue with mutex. As simple as possible.
-class ThreadSafeResponseQueue
-{
-private:
-    mutable std::mutex queue_mutex;
-    std::queue<Coordination::ZooKeeperResponsePtr> queue;
-public:
-    void push(const Coordination::ZooKeeperResponsePtr & response)
-    {
-        std::lock_guard lock(queue_mutex);
-        queue.push(response);
-    }
-    bool tryPop(Coordination::ZooKeeperResponsePtr & response)
-    {
-        std::lock_guard lock(queue_mutex);
-        if (!queue.empty())
-        {
-            response = queue.front();
-            queue.pop();
-            return true;
-        }
-        return false;
-    }
-    size_t size() const
-    {
-        std::lock_guard lock(queue_mutex);
-        return queue.size();
-    }
-};
-
 struct SocketInterruptablePollWrapper
 {
     int sockfd;
@@ -218,45 +194,47 @@ struct SocketInterruptablePollWrapper
 #endif
 };
 
-TestKeeperTCPHandler::TestKeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_)
+NuKeeperTCPHandler::NuKeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_)
     : Poco::Net::TCPServerConnection(socket_)
     , server(server_)
-    , log(&Poco::Logger::get("TestKeeperTCPHandler"))
+    , log(&Poco::Logger::get("NuKeeperTCPHandler"))
     , global_context(server.context())
-    , test_keeper_storage_dispatcher(global_context.getTestKeeperStorageDispatcher())
+    , nu_keeper_storage_dispatcher(global_context.getNuKeeperStorageDispatcher())
     , operation_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
     , session_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
-    , session_id(test_keeper_storage_dispatcher->getSessionID())
     , poll_wrapper(std::make_unique<SocketInterruptablePollWrapper>(socket_))
     , responses(std::make_unique<ThreadSafeResponseQueue>())
 {
 }
 
-void TestKeeperTCPHandler::sendHandshake()
+void NuKeeperTCPHandler::sendHandshake(bool has_leader)
 {
     Coordination::write(Coordination::SERVER_HANDSHAKE_LENGTH, *out);
-    Coordination::write(Coordination::ZOOKEEPER_PROTOCOL_VERSION, *out);
-    Coordination::write(Coordination::DEFAULT_SESSION_TIMEOUT_MS, *out);
+    if (has_leader)
+        Coordination::write(Coordination::ZOOKEEPER_PROTOCOL_VERSION, *out);
+    else /// Specially ignore connections if we are not leader, client will throw exception
+        Coordination::write(42, *out);
+
+    Coordination::write(static_cast<int32_t>(session_timeout.totalMilliseconds()), *out);
     Coordination::write(session_id, *out);
     std::array<char, Coordination::PASSWORD_LENGTH> passwd{};
     Coordination::write(passwd, *out);
     out->next();
 }
 
-void TestKeeperTCPHandler::run()
+void NuKeeperTCPHandler::run()
 {
     runImpl();
 }
 
-void TestKeeperTCPHandler::receiveHandshake()
+Poco::Timespan NuKeeperTCPHandler::receiveHandshake()
 {
     int32_t handshake_length;
     int32_t protocol_version;
     int64_t last_zxid_seen;
-    int32_t timeout;
+    int32_t timeout_ms;
     int64_t previous_session_id = 0;    /// We don't support session restore. So previous session_id is always zero.
     std::array<char, Coordination::PASSWORD_LENGTH> passwd {};
-
     Coordination::read(handshake_length, *in);
     if (handshake_length != Coordination::CLIENT_HANDSHAKE_LENGTH && handshake_length != Coordination::CLIENT_HANDSHAKE_LENGTH_WITH_READONLY)
         throw Exception("Unexpected handshake length received: " + toString(handshake_length), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
@@ -271,7 +249,7 @@ void TestKeeperTCPHandler::receiveHandshake()
     if (last_zxid_seen != 0)
         throw Exception("Non zero last_zxid_seen is not supported", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
 
-    Coordination::read(timeout, *in);
+    Coordination::read(timeout_ms, *in);
     Coordination::read(previous_session_id, *in);
 
     if (previous_session_id != 0)
@@ -282,10 +260,12 @@ void TestKeeperTCPHandler::receiveHandshake()
     int8_t readonly;
     if (handshake_length == Coordination::CLIENT_HANDSHAKE_LENGTH_WITH_READONLY)
         Coordination::read(readonly, *in);
+
+    return Poco::Timespan(0, timeout_ms * 1000);
 }
 
 
-void TestKeeperTCPHandler::runImpl()
+void NuKeeperTCPHandler::runImpl()
 {
     setThreadName("TstKprHandler");
     ThreadStatus thread_status;
@@ -307,7 +287,9 @@ void TestKeeperTCPHandler::runImpl()
 
     try
     {
-        receiveHandshake();
+        auto client_timeout = receiveHandshake();
+        if (client_timeout != 0)
+            session_timeout = std::min(client_timeout, session_timeout);
     }
     catch (const Exception & e) /// Typical for an incorrect username, password, or address.
     {
@@ -315,7 +297,30 @@ void TestKeeperTCPHandler::runImpl()
         return;
     }
 
-    sendHandshake();
+    if (nu_keeper_storage_dispatcher->hasLeader())
+    {
+        try
+        {
+            LOG_INFO(log, "Requesting session ID for the new client");
+            session_id = nu_keeper_storage_dispatcher->getSessionID(session_timeout.totalMilliseconds());
+            LOG_INFO(log, "Received session ID {}", session_id);
+        }
+        catch (const Exception & e)
+        {
+            LOG_WARNING(log, "Cannot receive session id {}", e.displayText());
+            sendHandshake(false);
+            return;
+
+        }
+
+        sendHandshake(true);
+    }
+    else
+    {
+        LOG_WARNING(log, "Ignoring user request, because no alive leader exist");
+        sendHandshake(false);
+        return;
+    }
 
     auto response_fd = poll_wrapper->getResponseFD();
     auto response_callback = [this, response_fd] (const Coordination::ZooKeeperResponsePtr & response)
@@ -324,7 +329,7 @@ void TestKeeperTCPHandler::runImpl()
         UInt8 single_byte = 1;
         [[maybe_unused]] int result = write(response_fd, &single_byte, sizeof(single_byte));
     };
-    test_keeper_storage_dispatcher->registerSession(session_id, response_callback);
+    nu_keeper_storage_dispatcher->registerSession(session_id, response_callback);
 
     session_stopwatch.start();
     bool close_received = false;
@@ -371,12 +376,13 @@ void TestKeeperTCPHandler::runImpl()
                     LOG_DEBUG(log, "Session #{} successfully closed", session_id);
                     return;
                 }
-
-                if (response->error == Coordination::Error::ZOK)
-                    response->write(*out);
-                else if (response->xid != Coordination::WATCH_XID)
-                    response->write(*out);
-                /// skipping bad response for watch
+                response->write(*out);
+                if (response->error == Coordination::Error::ZSESSIONEXPIRED)
+                {
+                    LOG_DEBUG(log, "Session #{} expired because server shutting down or quorum is not alive", session_id);
+                    nu_keeper_storage_dispatcher->finishSession(session_id);
+                    return;
+                }
                 result.ready_responses_count--;
             }
 
@@ -386,7 +392,7 @@ void TestKeeperTCPHandler::runImpl()
             if (session_stopwatch.elapsedMicroseconds() > static_cast<UInt64>(session_timeout.totalMicroseconds()))
             {
                 LOG_DEBUG(log, "Session #{} expired", session_id);
-                finish();
+                nu_keeper_storage_dispatcher->finishSession(session_id);
                 break;
             }
         }
@@ -394,22 +400,11 @@ void TestKeeperTCPHandler::runImpl()
     catch (const Exception & ex)
     {
         LOG_INFO(log, "Got exception processing session #{}: {}", session_id, getExceptionMessage(ex, true));
-        finish();
+        nu_keeper_storage_dispatcher->finishSession(session_id);
     }
 }
 
-void TestKeeperTCPHandler::finish()
-{
-    Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
-    request->xid = close_xid;
-    /// Put close request (so storage will remove all info about session)
-    test_keeper_storage_dispatcher->putRequest(request, session_id);
-    /// We don't need any callbacks because session can be already dead and
-    /// nobody wait for response
-    test_keeper_storage_dispatcher->finishSession(session_id);
-}
-
-std::pair<Coordination::OpNum, Coordination::XID> TestKeeperTCPHandler::receiveRequest()
+std::pair<Coordination::OpNum, Coordination::XID> NuKeeperTCPHandler::receiveRequest()
 {
     int32_t length;
     Coordination::read(length, *in);
@@ -423,8 +418,11 @@ std::pair<Coordination::OpNum, Coordination::XID> TestKeeperTCPHandler::receiveR
     request->xid = xid;
     request->readImpl(*in);
 
-    test_keeper_storage_dispatcher->putRequest(request, session_id);
+    if (!nu_keeper_storage_dispatcher->putRequest(request, session_id))
+        throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Session {} already disconnected", session_id);
     return std::make_pair(opnum, xid);
 }
 
 }
+
+#endif
diff --git a/src/Server/TestKeeperTCPHandler.h b/src/Server/NuKeeperTCPHandler.h
similarity index 65%
rename from src/Server/TestKeeperTCPHandler.h
rename to src/Server/NuKeeperTCPHandler.h
index 46b4454b319..03a857ad1d7 100644
--- a/src/Server/TestKeeperTCPHandler.h
+++ b/src/Server/NuKeeperTCPHandler.h
@@ -1,14 +1,22 @@
 #pragma once
 
+#if !defined(ARCADIA_BUILD)
+#    include <Common/config.h>
+#    include "config_core.h"
+#endif
+
+#if USE_NURAFT
+
 #include <Poco/Net/TCPServerConnection.h>
 #include "IServer.h"
 #include <Common/Stopwatch.h>
 #include <Interpreters/Context.h>
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
 #include <Common/ZooKeeper/ZooKeeperConstants.h>
-#include <Common/ZooKeeper/TestKeeperStorageDispatcher.h>
+#include <Coordination/NuKeeperStorageDispatcher.h>
 #include <IO/WriteBufferFromPocoSocket.h>
 #include <IO/ReadBufferFromPocoSocket.h>
+#include <Coordination/ThreadSafeQueue.h>
 #include <unordered_map>
 
 namespace DB
@@ -16,22 +24,24 @@ namespace DB
 
 struct SocketInterruptablePollWrapper;
 using SocketInterruptablePollWrapperPtr = std::unique_ptr<SocketInterruptablePollWrapper>;
-class ThreadSafeResponseQueue;
+
+using ThreadSafeResponseQueue = ThreadSafeQueue<Coordination::ZooKeeperResponsePtr>;
+
 using ThreadSafeResponseQueuePtr = std::unique_ptr<ThreadSafeResponseQueue>;
 
-class TestKeeperTCPHandler : public Poco::Net::TCPServerConnection
+class NuKeeperTCPHandler : public Poco::Net::TCPServerConnection
 {
 public:
-    TestKeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_);
+    NuKeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_);
     void run() override;
 private:
     IServer & server;
     Poco::Logger * log;
     Context global_context;
-    std::shared_ptr<zkutil::TestKeeperStorageDispatcher> test_keeper_storage_dispatcher;
+    std::shared_ptr<NuKeeperStorageDispatcher> nu_keeper_storage_dispatcher;
     Poco::Timespan operation_timeout;
     Poco::Timespan session_timeout;
-    int64_t session_id;
+    int64_t session_id{-1};
     Stopwatch session_stopwatch;
     SocketInterruptablePollWrapperPtr poll_wrapper;
 
@@ -45,11 +55,11 @@ private:
 
     void runImpl();
 
-    void sendHandshake();
-    void receiveHandshake();
+    void sendHandshake(bool has_leader);
+    Poco::Timespan receiveHandshake();
 
     std::pair<Coordination::OpNum, Coordination::XID> receiveRequest();
-    void finish();
 };
 
 }
+#endif
diff --git a/src/Server/TestKeeperTCPHandlerFactory.h b/src/Server/NuKeeperTCPHandlerFactory.h
similarity index 68%
rename from src/Server/TestKeeperTCPHandlerFactory.h
rename to src/Server/NuKeeperTCPHandlerFactory.h
index ebf91aa31d4..0fd86ebc21f 100644
--- a/src/Server/TestKeeperTCPHandlerFactory.h
+++ b/src/Server/NuKeeperTCPHandlerFactory.h
@@ -1,5 +1,6 @@
 #pragma once
-#include <Server/TestKeeperTCPHandler.h>
+
+#include <Server/NuKeeperTCPHandler.h>
 #include <Poco/Net/TCPServerConnectionFactory.h>
 #include <Poco/Net/NetException.h>
 #include <common/logger_useful.h>
@@ -8,7 +9,7 @@
 namespace DB
 {
 
-class TestKeeperTCPHandlerFactory : public Poco::Net::TCPServerConnectionFactory
+class NuKeeperTCPHandlerFactory : public Poco::Net::TCPServerConnectionFactory
 {
 private:
     IServer & server;
@@ -20,9 +21,9 @@ private:
         void run() override {}
     };
 public:
-    TestKeeperTCPHandlerFactory(IServer & server_)
+    NuKeeperTCPHandlerFactory(IServer & server_)
         : server(server_)
-        , log(&Poco::Logger::get("TestKeeperTCPHandlerFactory"))
+        , log(&Poco::Logger::get("NuKeeperTCPHandlerFactory"))
     {
     }
 
@@ -30,8 +31,8 @@ public:
     {
         try
         {
-            LOG_TRACE(log, "Test keeper request. Address: {}", socket.peerAddress().toString());
-            return new TestKeeperTCPHandler(server, socket);
+            LOG_TRACE(log, "NuKeeper request. Address: {}", socket.peerAddress().toString());
+            return new NuKeeperTCPHandler(server, socket);
         }
         catch (const Poco::Net::NetException &)
         {
diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp
index 2bce5abcd11..b3a3bbf2aaa 100644
--- a/src/Server/PostgreSQLHandler.cpp
+++ b/src/Server/PostgreSQLHandler.cpp
@@ -5,6 +5,7 @@
 #include <Interpreters/executeQuery.h>
 #include "PostgreSQLHandler.h"
 #include <Parsers/parseQuery.h>
+#include <Common/setThreadName.h>
 #include <random>
 
 #if !defined(ARCADIA_BUILD)
@@ -49,6 +50,8 @@ void PostgreSQLHandler::changeIO(Poco::Net::StreamSocket & socket)
 
 void PostgreSQLHandler::run()
 {
+    setThreadName("PostgresHandler");
+    ThreadStatus thread_status;
     connection_context.makeSessionContext();
     connection_context.getClientInfo().interface = ClientInfo::Interface::POSTGRESQL;
     connection_context.setDefaultFormat("PostgreSQLWire");
@@ -273,8 +276,10 @@ void PostgreSQLHandler::processQuery()
 
         for (const auto & spl_query : queries)
         {
+            /// FIXME why do we execute all queries in a single connection context?
+            CurrentThread::QueryScope query_scope{connection_context};
             ReadBufferFromString read_buf(spl_query);
-            executeQuery(read_buf, *out, true, connection_context, {});
+            executeQuery(read_buf, *out, false, connection_context, {});
 
             PostgreSQLProtocol::Messaging::CommandComplete::Command command =
                 PostgreSQLProtocol::Messaging::CommandComplete::classifyQuery(spl_query);
diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp
index 60deec9b289..bf78a37166a 100644
--- a/src/Server/PrometheusRequestHandler.cpp
+++ b/src/Server/PrometheusRequestHandler.cpp
@@ -1,26 +1,19 @@
-#include "PrometheusRequestHandler.h"
+#include <Server/PrometheusRequestHandler.h>
 
 #include <IO/HTTPCommon.h>
-
-#include <Common/Exception.h>
-
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Util/LayeredConfiguration.h>
-
-#include <Common/ProfileEvents.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
+#include <Server/HTTPHandlerFactory.h>
+#include <Server/IServer.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/Exception.h>
+#include <Common/ProfileEvents.h>
 
-#include <IO/WriteBufferFromHTTPServerResponse.h>
-#include <Server/HTTPHandlerRequestFilter.h>
+#include <Poco/Util/LayeredConfiguration.h>
 
 
 namespace DB
 {
-
-void PrometheusRequestHandler::handleRequest(
-    Poco::Net::HTTPServerRequest & request,
-    Poco::Net::HTTPServerResponse & response)
+void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     try
     {
@@ -31,9 +24,16 @@ void PrometheusRequestHandler::handleRequest(
 
         response.setContentType("text/plain; version=0.0.4; charset=UTF-8");
 
-        auto wb = WriteBufferFromHTTPServerResponse(request, response, keep_alive_timeout);
-        metrics_writer.write(wb);
-        wb.finalize();
+        WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
+        try
+        {
+            metrics_writer.write(wb);
+            wb.finalize();
+        }
+        catch (...)
+        {
+            wb.finalize();
+        }
     }
     catch (...)
     {
@@ -41,10 +41,13 @@ void PrometheusRequestHandler::handleRequest(
     }
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix)
+HTTPRequestHandlerFactoryPtr
+createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix)
 {
-    return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>(
-        server, PrometheusMetricsWriter(server.config(), config_prefix + ".handler", async_metrics)), server.config(), config_prefix);
+    auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
+        server, PrometheusMetricsWriter(server.config(), config_prefix + ".handler", async_metrics));
+    factory->addFiltersFromConfig(server.config(), config_prefix);
+    return factory;
 }
 
 }
diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h
index 47c8adf4774..1fb3d9f0f59 100644
--- a/src/Server/PrometheusRequestHandler.h
+++ b/src/Server/PrometheusRequestHandler.h
@@ -1,17 +1,15 @@
 #pragma once
 
-#include "IServer.h"
-#include "PrometheusMetricsWriter.h"
+#include <Server/HTTP/HTTPRequestHandler.h>
 
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Net/HTTPRequestHandler.h>
-#include <Poco/Net/HTTPRequestHandlerFactory.h>
+#include "PrometheusMetricsWriter.h"
 
 namespace DB
 {
 
-class PrometheusRequestHandler : public Poco::Net::HTTPRequestHandler
+class IServer;
+
+class PrometheusRequestHandler : public HTTPRequestHandler
 {
 private:
     IServer & server;
@@ -24,9 +22,7 @@ public:
     {
     }
 
-    void handleRequest(
-        Poco::Net::HTTPServerRequest & request,
-        Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };
 
 }
diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp
index fc79ad9d134..778f9827131 100644
--- a/src/Server/ReplicasStatusHandler.cpp
+++ b/src/Server/ReplicasStatusHandler.cpp
@@ -1,17 +1,18 @@
-#include "ReplicasStatusHandler.h"
+#include <Server/ReplicasStatusHandler.h>
 
-#include <Interpreters/Context.h>
-#include <Storages/StorageReplicatedMergeTree.h>
-#include <Common/HTMLForm.h>
-#include <Common/typeid_cast.h>
 #include <Databases/IDatabase.h>
 #include <IO/HTTPCommon.h>
+#include <Interpreters/Context.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTPHandlerFactory.h>
+#include <Server/HTTPHandlerRequestFilter.h>
+#include <Server/IServer.h>
+#include <Storages/StorageReplicatedMergeTree.h>
+#include <Common/typeid_cast.h>
 
 #include <Poco/Net/HTTPRequestHandlerFactory.h>
 #include <Poco/Net/HTTPServerRequest.h>
 #include <Poco/Net/HTTPServerResponse.h>
-#include <Server/HTTPHandlerFactory.h>
-#include <Server/HTTPHandlerRequestFilter.h>
 
 
 namespace DB
@@ -24,7 +25,7 @@ ReplicasStatusHandler::ReplicasStatusHandler(IServer & server)
 }
 
 
-void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     try
     {
@@ -82,7 +83,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request
         }
 
         if (verbose)
-            response.send() << message.str();
+            *response.send() << message.str();
         else
         {
             const char * data = "Ok.\n";
@@ -100,7 +101,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request
             if (!response.sent())
             {
                 /// We have not sent anything yet and we don't even know if we need to compress response.
-                response.send() << getCurrentExceptionMessage(false) << std::endl;
+                *response.send() << getCurrentExceptionMessage(false) << std::endl;
             }
         }
         catch (...)
@@ -110,9 +111,11 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request
     }
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix)
+HTTPRequestHandlerFactoryPtr createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix)
 {
-    return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>(server), server.config(), config_prefix);
+    auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
+    factory->addFiltersFromConfig(server.config(), config_prefix);
+    return factory;
 }
 
 }
diff --git a/src/Server/ReplicasStatusHandler.h b/src/Server/ReplicasStatusHandler.h
index a32f1ba905f..8a790b13ad6 100644
--- a/src/Server/ReplicasStatusHandler.h
+++ b/src/Server/ReplicasStatusHandler.h
@@ -1,17 +1,15 @@
 #pragma once
 
-#include "IServer.h"
-
-#include <Poco/Net/HTTPRequestHandler.h>
-
+#include <Server/HTTP/HTTPRequestHandler.h>
 
 namespace DB
 {
 
 class Context;
+class IServer;
 
 /// Replies "Ok.\n" if all replicas on this server don't lag too much. Otherwise output lag information.
-class ReplicasStatusHandler : public Poco::Net::HTTPRequestHandler
+class ReplicasStatusHandler : public HTTPRequestHandler
 {
 private:
     Context & context;
@@ -19,7 +17,7 @@ private:
 public:
     explicit ReplicasStatusHandler(IServer & server_);
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };
 
 
diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp
index ad2c07ab0aa..9f959239be9 100644
--- a/src/Server/StaticRequestHandler.cpp
+++ b/src/Server/StaticRequestHandler.cpp
@@ -9,7 +9,7 @@
 #include <IO/WriteBufferFromString.h>
 #include <IO/copyData.h>
 #include <IO/WriteHelpers.h>
-#include <IO/WriteBufferFromHTTPServerResponse.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #include <Interpreters/Context.h>
 
 #include <Common/Exception.h>
@@ -32,7 +32,8 @@ namespace ErrorCodes
     extern const int INVALID_CONFIG_PARAMETER;
 }
 
-static inline WriteBufferPtr responseWriteBuffer(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, unsigned int keep_alive_timeout)
+static inline WriteBufferPtr
+responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, unsigned int keep_alive_timeout)
 {
     /// The client can pass a HTTP header indicating supported compression method (gzip or deflate).
     String http_response_compression_methods = request.get("Accept-Encoding", "");
@@ -55,12 +56,15 @@ static inline WriteBufferPtr responseWriteBuffer(Poco::Net::HTTPServerRequest &
     bool client_supports_http_compression = http_response_compression_method != CompressionMethod::None;
 
     return std::make_shared<WriteBufferFromHTTPServerResponse>(
-        request, response, keep_alive_timeout, client_supports_http_compression, http_response_compression_method);
+        response,
+        request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD,
+        keep_alive_timeout,
+        client_supports_http_compression,
+        http_response_compression_method);
 }
 
 static inline void trySendExceptionToClient(
-    const std::string & s, int exception_code,
-    Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response , WriteBuffer & out)
+    const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBuffer & out)
 {
     try
     {
@@ -69,13 +73,13 @@ static inline void trySendExceptionToClient(
         /// If HTTP method is POST and Keep-Alive is turned on, we should read the whole request body
         /// to avoid reading part of the current request body in the next request.
         if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST
-            && response.getKeepAlive() && !request.stream().eof() && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED)
-            request.stream().ignore(std::numeric_limits<std::streamsize>::max());
+            && response.getKeepAlive() && !request.getStream().eof() && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED)
+            request.getStream().ignore(std::numeric_limits<std::streamsize>::max());
 
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
 
         if (!response.sent())
-            response.send() << s << std::endl;
+            *response.send() << s << std::endl;
         else
         {
             if (out.count() != out.offset())
@@ -94,7 +98,7 @@ static inline void trySendExceptionToClient(
     }
 }
 
-void StaticRequestHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", 10);
     const auto & out = responseWriteBuffer(request, response, keep_alive_timeout);
@@ -122,6 +126,8 @@ void StaticRequestHandler::handleRequest(Poco::Net::HTTPServerRequest & request,
         std::string exception_message = getCurrentExceptionMessage(false, true);
         trySendExceptionToClient(exception_message, exception_code, request, response, *out);
     }
+
+    out->finalize();
 }
 
 void StaticRequestHandler::writeResponse(WriteBuffer & out)
@@ -159,14 +165,17 @@ StaticRequestHandler::StaticRequestHandler(IServer & server_, const String & exp
 {
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createStaticHandlerFactory(IServer & server, const std::string & config_prefix)
+HTTPRequestHandlerFactoryPtr createStaticHandlerFactory(IServer & server, const std::string & config_prefix)
 {
     int status = server.config().getInt(config_prefix + ".handler.status", 200);
     std::string response_content = server.config().getRawString(config_prefix + ".handler.response_content", "Ok.\n");
     std::string response_content_type = server.config().getString(config_prefix + ".handler.content_type", "text/plain; charset=UTF-8");
+    auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(
+        server, std::move(response_content), std::move(status), std::move(response_content_type));
 
-    return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(
-        server, std::move(response_content), std::move(status), std::move(response_content_type)), server.config(), config_prefix);
+    factory->addFiltersFromConfig(server.config(), config_prefix);
+
+    return factory;
 }
 
 }
diff --git a/src/Server/StaticRequestHandler.h b/src/Server/StaticRequestHandler.h
index 0a29384ad0e..56c7f5a6d44 100644
--- a/src/Server/StaticRequestHandler.h
+++ b/src/Server/StaticRequestHandler.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Poco/Net/HTTPRequestHandler.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
 #include <common/types.h>
 
 
@@ -11,7 +11,7 @@ class IServer;
 class WriteBuffer;
 
 /// Response with custom string. Can be used for browser.
-class StaticRequestHandler : public Poco::Net::HTTPRequestHandler
+class StaticRequestHandler : public HTTPRequestHandler
 {
 private:
     IServer & server;
@@ -29,7 +29,7 @@ public:
 
     void writeResponse(WriteBuffer & out);
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };
 
 }
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 12d1a0249b7..d2ce2a409a9 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1,6 +1,7 @@
 #include <iomanip>
 #include <ext/scope_guard.h>
 #include <Poco/Net/NetException.h>
+#include <Poco/Util/LayeredConfiguration.h>
 #include <Common/CurrentThread.h>
 #include <Common/Stopwatch.h>
 #include <Common/NetException.h>
@@ -22,8 +23,8 @@
 #include <Interpreters/TablesStatus.h>
 #include <Interpreters/InternalTextLogsQueue.h>
 #include <Interpreters/OpenTelemetrySpanLog.h>
-#include <Storages/StorageMemory.h>
 #include <Storages/StorageReplicatedMergeTree.h>
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
 #include <Core/ExternalTable.h>
 #include <Storages/ColumnDefault.h>
 #include <DataTypes/DataTypeLowCardinality.h>
@@ -56,6 +57,28 @@ namespace ErrorCodes
     extern const int SUPPORT_IS_DISABLED;
 }
 
+TCPHandler::TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_)
+    : Poco::Net::TCPServerConnection(socket_)
+    , server(server_)
+    , parse_proxy_protocol(parse_proxy_protocol_)
+    , log(&Poco::Logger::get("TCPHandler"))
+    , connection_context(server.context())
+    , query_context(server.context())
+    , server_display_name(std::move(server_display_name_))
+{
+}
+TCPHandler::~TCPHandler()
+{
+    try
+    {
+        state.reset();
+        out->next();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
+}
 
 void TCPHandler::runImpl()
 {
@@ -180,10 +203,16 @@ void TCPHandler::runImpl()
 
             /** If Query - process it. If Ping or Cancel - go back to the beginning.
              *  There may come settings for a separate query that modify `query_context`.
+             *  It's possible to receive part uuids packet before the query, so then receivePacket has to be called twice.
              */
             if (!receivePacket())
                 continue;
 
+            /** If part_uuids got received in previous packet, trying to read again.
+              */
+            if (state.empty() && state.part_uuids && !receivePacket())
+                continue;
+
             query_scope.emplace(*query_context);
 
             send_exception_with_stack_trace = query_context->getSettingsRef().calculate_text_stack_trace;
@@ -528,6 +557,10 @@ void TCPHandler::processOrdinaryQuery()
     /// Pull query execution result, if exists, and send it to network.
     if (state.io.in)
     {
+
+        if (query_context->getSettingsRef().allow_experimental_query_deduplication)
+            sendPartUUIDs();
+
         /// This allows the client to prepare output format
         if (Block header = state.io.in->getHeader())
             sendData(header);
@@ -592,6 +625,9 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
 {
     auto & pipeline = state.io.pipeline;
 
+    if (query_context->getSettingsRef().allow_experimental_query_deduplication)
+        sendPartUUIDs();
+
     /// Send header-block, to allow client to prepare output format for data to send.
     {
         const auto & header = pipeline.getHeader();
@@ -693,6 +729,20 @@ void TCPHandler::receiveUnexpectedTablesStatusRequest()
     throw NetException("Unexpected packet TablesStatusRequest received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
 }
 
+void TCPHandler::sendPartUUIDs()
+{
+    auto uuids = query_context->getPartUUIDs()->get();
+    if (!uuids.empty())
+    {
+        for (const auto & uuid : uuids)
+            LOG_TRACE(log, "Sending UUID: {}", toString(uuid));
+
+        writeVarUInt(Protocol::Server::PartUUIDs, *out);
+        writeVectorBinary(uuids, *out);
+        out->next();
+    }
+}
+
 void TCPHandler::sendProfileInfo(const BlockStreamProfileInfo & info)
 {
     writeVarUInt(Protocol::Server::ProfileInfo, *out);
@@ -905,6 +955,10 @@ bool TCPHandler::receivePacket()
 
     switch (packet_type)
     {
+        case Protocol::Client::IgnoredPartUUIDs:
+            /// Part uuids packet if any comes before query.
+            receiveIgnoredPartUUIDs();
+            return true;
         case Protocol::Client::Query:
             if (!state.empty())
                 receiveUnexpectedQuery();
@@ -940,6 +994,16 @@ bool TCPHandler::receivePacket()
     }
 }
 
+void TCPHandler::receiveIgnoredPartUUIDs()
+{
+    state.part_uuids = true;
+    std::vector<UUID> uuids;
+    readVectorBinary(uuids, *in);
+
+    if (!uuids.empty())
+        query_context->getIgnoredPartUUIDs()->add(uuids);
+}
+
 void TCPHandler::receiveClusterNameAndSalt()
 {
     readStringBinary(cluster, *in);
@@ -1069,6 +1133,14 @@ void TCPHandler::receiveQuery()
     }
     query_context->applySettingsChanges(settings_changes);
 
+    /// Disable function name normalization when it's a secondary query, because queries are either
+    /// already normalized on initiator node, or not normalized and should remain unnormalized for
+    /// compatibility.
+    if (client_info.query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
+    {
+        query_context->setSetting("normalize_function_names", Field(0));
+    }
+
     // Use the received query id, or generate a random default. It is convenient
     // to also generate the default OpenTelemetry trace id at the same time, and
     // set the trace parent.
@@ -1139,33 +1211,44 @@ bool TCPHandler::receiveData(bool scalar)
     if (block)
     {
         if (scalar)
+        {
+            /// Scalar value
             query_context->addScalar(temporary_id.table_name, block);
+        }
+        else if (!state.need_receive_data_for_insert && !state.need_receive_data_for_input)
+        {
+            /// Data for external tables
+
+            auto resolved = query_context->tryResolveStorageID(temporary_id, Context::ResolveExternal);
+            StoragePtr storage;
+            /// If such a table does not exist, create it.
+            if (resolved)
+            {
+                storage = DatabaseCatalog::instance().getTable(resolved, *query_context);
+            }
+            else
+            {
+                NamesAndTypesList columns = block.getNamesAndTypesList();
+                auto temporary_table = TemporaryTableHolder(*query_context, ColumnsDescription{columns}, {});
+                storage = temporary_table.getTable();
+                query_context->addExternalTable(temporary_id.table_name, std::move(temporary_table));
+            }
+            auto metadata_snapshot = storage->getInMemoryMetadataPtr();
+            /// The data will be written directly to the table.
+            auto temporary_table_out = storage->write(ASTPtr(), metadata_snapshot, *query_context);
+            temporary_table_out->write(block);
+            temporary_table_out->writeSuffix();
+
+        }
+        else if (state.need_receive_data_for_input)
+        {
+            /// 'input' table function.
+            state.block_for_input = block;
+        }
         else
         {
-            /// If there is an insert request, then the data should be written directly to `state.io.out`.
-            /// Otherwise, we write the blocks in the temporary `external_table_name` table.
-            if (!state.need_receive_data_for_insert && !state.need_receive_data_for_input)
-            {
-                auto resolved = query_context->tryResolveStorageID(temporary_id, Context::ResolveExternal);
-                StoragePtr storage;
-                /// If such a table does not exist, create it.
-                if (resolved)
-                    storage = DatabaseCatalog::instance().getTable(resolved, *query_context);
-                else
-                {
-                    NamesAndTypesList columns = block.getNamesAndTypesList();
-                    auto temporary_table = TemporaryTableHolder(*query_context, ColumnsDescription{columns}, {});
-                    storage = temporary_table.getTable();
-                    query_context->addExternalTable(temporary_id.table_name, std::move(temporary_table));
-                }
-                auto metadata_snapshot = storage->getInMemoryMetadataPtr();
-                /// The data will be written directly to the table.
-                state.io.out = storage->write(ASTPtr(), metadata_snapshot, *query_context);
-            }
-            if (state.need_receive_data_for_input)
-                state.block_for_input = block;
-            else
-                state.io.out->write(block);
+            /// INSERT query.
+            state.io.out->write(block);
         }
         return true;
     }
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index e12e9bcf4d0..ee2f7c96b5a 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -10,6 +10,7 @@
 #include <IO/Progress.h>
 #include <DataStreams/BlockIO.h>
 #include <Interpreters/InternalTextLogsQueue.h>
+#include <Interpreters/Context.h>
 #include <Client/TimeoutSetter.h>
 
 #include "IServer.h"
@@ -67,6 +68,9 @@ struct QueryState
     /// Temporary tables read
     bool temporary_tables_read = false;
 
+    /// A state got uuids to exclude from a query
+    bool part_uuids = false;
+
     /// Request requires data from client for function input()
     bool need_receive_data_for_input = false;
     /// temporary place for incoming data block for input()
@@ -109,16 +113,9 @@ public:
       *  because it allows to check the IP ranges of the trusted proxy.
       * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP.
       */
-    TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_)
-        : Poco::Net::TCPServerConnection(socket_)
-        , server(server_)
-        , parse_proxy_protocol(parse_proxy_protocol_)
-        , log(&Poco::Logger::get("TCPHandler"))
-        , connection_context(server.context())
-        , query_context(server.context())
-    {
-        server_display_name = server.config().getString("display_name", getFQDNOrHostName());
-    }
+    TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_,
+        std::string server_display_name_);
+    ~TCPHandler() override;
 
     void run() override;
 
@@ -172,6 +169,7 @@ private:
     void receiveHello();
     bool receivePacket();
     void receiveQuery();
+    void receiveIgnoredPartUUIDs();
     bool receiveData(bool scalar);
     bool readDataNext(const size_t & poll_interval, const int & receive_timeout);
     void readData(const Settings & connection_settings);
@@ -200,6 +198,7 @@ private:
     void sendProgress();
     void sendLogs();
     void sendEndOfStream();
+    void sendPartUUIDs();
     void sendProfileInfo(const BlockStreamProfileInfo & info);
     void sendTotals(const Block & totals);
     void sendExtremes(const Block & extremes);
diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h
index 73318fea9da..fb50333193c 100644
--- a/src/Server/TCPHandlerFactory.h
+++ b/src/Server/TCPHandlerFactory.h
@@ -17,6 +17,7 @@ private:
     IServer & server;
     bool parse_proxy_protocol = false;
     Poco::Logger * log;
+    std::string server_display_name;
 
     class DummyTCPHandler : public Poco::Net::TCPServerConnection
     {
@@ -34,6 +35,7 @@ public:
         : server(server_), parse_proxy_protocol(parse_proxy_protocol_)
         , log(&Poco::Logger::get(std::string("TCP") + (secure_ ? "S" : "") + "HandlerFactory"))
     {
+        server_display_name = server.config().getString("display_name", getFQDNOrHostName());
     }
 
     Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override
@@ -42,7 +44,7 @@ public:
         {
             LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
 
-            return new TCPHandler(server, socket, parse_proxy_protocol);
+            return new TCPHandler(server, socket, parse_proxy_protocol, server_display_name);
         }
         catch (const Poco::Net::NetException &)
         {
diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp
index 6159a27971f..fb8ff71611e 100644
--- a/src/Server/WebUIRequestHandler.cpp
+++ b/src/Server/WebUIRequestHandler.cpp
@@ -18,18 +18,18 @@ WebUIRequestHandler::WebUIRequestHandler(IServer & server_, std::string resource
 }
 
 
-void WebUIRequestHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", 10);
 
     response.setContentType("text/html; charset=UTF-8");
 
-    if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1)
+    if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
         response.setChunkedTransferEncoding(true);
 
     setResponseDefaultHeaders(response, keep_alive_timeout);
     response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
-    response.send() << getResource(resource_name);
+    *response.send() << getResource(resource_name);
 }
 
 }
diff --git a/src/Server/WebUIRequestHandler.h b/src/Server/WebUIRequestHandler.h
index 3066b86b36a..1c52b626091 100644
--- a/src/Server/WebUIRequestHandler.h
+++ b/src/Server/WebUIRequestHandler.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Poco/Net/HTTPRequestHandler.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
 
 
 namespace DB
@@ -9,14 +9,14 @@ namespace DB
 class IServer;
 
 /// Response with HTML page that allows to send queries and show results in browser.
-class WebUIRequestHandler : public Poco::Net::HTTPRequestHandler
+class WebUIRequestHandler : public HTTPRequestHandler
 {
 private:
     IServer & server;
     std::string resource_name;
 public:
     WebUIRequestHandler(IServer & server_, std::string resource_name_);
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };
 
 }
diff --git a/src/Server/ya.make b/src/Server/ya.make
index 1e44577aea9..ef5ef6d5f57 100644
--- a/src/Server/ya.make
+++ b/src/Server/ya.make
@@ -11,12 +11,21 @@ PEERDIR(
 
 SRCS(
     GRPCServer.cpp
+    HTTP/HTMLForm.cpp
+    HTTP/HTTPServer.cpp
+    HTTP/HTTPServerConnection.cpp
+    HTTP/HTTPServerConnectionFactory.cpp
+    HTTP/HTTPServerRequest.cpp
+    HTTP/HTTPServerResponse.cpp
+    HTTP/ReadHeaders.cpp
+    HTTP/WriteBufferFromHTTPServerResponse.cpp
     HTTPHandler.cpp
     HTTPHandlerFactory.cpp
     InterserverIOHTTPHandler.cpp
     MySQLHandler.cpp
     MySQLHandlerFactory.cpp
     NotFoundHandler.cpp
+    NuKeeperTCPHandler.cpp
     PostgreSQLHandler.cpp
     PostgreSQLHandlerFactory.cpp
     PrometheusMetricsWriter.cpp
@@ -25,7 +34,6 @@ SRCS(
     ReplicasStatusHandler.cpp
     StaticRequestHandler.cpp
     TCPHandler.cpp
-    TestKeeperTCPHandler.cpp
     WebUIRequestHandler.cpp
 
 )
diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index 8d1b9103357..6fe98c53b3e 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -1,5 +1,7 @@
 #include <DataStreams/RemoteBlockOutputStream.h>
 #include <DataStreams/NativeBlockInputStream.h>
+#include <DataStreams/ConvertingBlockInputStream.h>
+#include <DataStreams/OneBlockInputStream.h>
 #include <Common/escapeForFileName.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/StringUtils/StringUtils.h>
@@ -46,6 +48,7 @@ namespace ErrorCodes
     extern const int TOO_LARGE_SIZE_COMPRESSED;
     extern const int ATTEMPT_TO_READ_AFTER_EOF;
     extern const int EMPTY_DATA_PASSED;
+    extern const int INCORRECT_FILE_NAME;
 }
 
 
@@ -54,14 +57,26 @@ namespace
     constexpr const std::chrono::minutes decrease_error_count_period{5};
 
     template <typename PoolFactory>
-    ConnectionPoolPtrs createPoolsForAddresses(const std::string & name, PoolFactory && factory)
+    ConnectionPoolPtrs createPoolsForAddresses(const std::string & name, PoolFactory && factory, Poco::Logger * log)
     {
         ConnectionPoolPtrs pools;
 
         for (auto it = boost::make_split_iterator(name, boost::first_finder(",")); it != decltype(it){}; ++it)
         {
             Cluster::Address address = Cluster::Address::fromFullString(boost::copy_range<std::string>(*it));
-            pools.emplace_back(factory(address));
+            try
+            {
+                pools.emplace_back(factory(address));
+            }
+            catch (const Exception & e)
+            {
+                if (e.code() == ErrorCodes::INCORRECT_FILE_NAME)
+                {
+                    tryLogCurrentException(log);
+                    continue;
+                }
+                throw;
+            }
         }
 
         return pools;
@@ -184,6 +199,37 @@ namespace
             return disk->getDirectorySyncGuard(path);
         return nullptr;
     }
+
+    void writeRemoteConvert(const DistributedHeader & header, RemoteBlockOutputStream & remote, ReadBufferFromFile & in, Poco::Logger * log)
+    {
+        if (remote.getHeader() && header.header != remote.getHeader().dumpStructure())
+        {
+            LOG_WARNING(log,
+                "Structure does not match (remote: {}, local: {}), implicit conversion will be done",
+                remote.getHeader().dumpStructure(), header.header);
+
+            CompressedReadBuffer decompressing_in(in);
+            /// Lack of header, requires to read blocks
+            NativeBlockInputStream block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION);
+
+            block_in.readPrefix();
+            while (Block block = block_in.read())
+            {
+                ConvertingBlockInputStream convert(
+                    std::make_shared<OneBlockInputStream>(block),
+                    remote.getHeader(),
+                    ConvertingBlockInputStream::MatchColumnsMode::Name);
+                auto adopted_block = convert.read();
+                remote.write(adopted_block);
+            }
+            block_in.readSuffix();
+        }
+        else
+        {
+            CheckingCompressedReadBuffer checking_in(in);
+            remote.writePrepared(checking_in);
+        }
+    }
 }
 
 
@@ -318,16 +364,30 @@ void StorageDistributedDirectoryMonitor::run()
 
 ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::string & name, const StorageDistributed & storage)
 {
-    const auto pool_factory = [&storage] (const Cluster::Address & address) -> ConnectionPoolPtr
+    const auto pool_factory = [&storage, &name] (const Cluster::Address & address) -> ConnectionPoolPtr
     {
         const auto & cluster = storage.getCluster();
         const auto & shards_info = cluster->getShardsInfo();
         const auto & shards_addresses = cluster->getShardsAddresses();
 
-        /// check new format shard{shard_index}_number{number_index}
+        /// check new format shard{shard_index}_number{replica_index}
+        /// (shard_index and replica_index starts from 1)
         if (address.shard_index != 0)
         {
-            return shards_info[address.shard_index - 1].per_replica_pools[address.replica_index - 1];
+            if (!address.replica_index)
+                throw Exception(ErrorCodes::INCORRECT_FILE_NAME,
+                    "Wrong replica_index ({})", address.replica_index, name);
+
+            if (address.shard_index > shards_info.size())
+                throw Exception(ErrorCodes::INCORRECT_FILE_NAME,
+                    "No shard with shard_index={} ({})", address.shard_index, name);
+
+            const auto & shard_info = shards_info[address.shard_index - 1];
+            if (address.replica_index > shard_info.per_replica_pools.size())
+                throw Exception(ErrorCodes::INCORRECT_FILE_NAME,
+                    "No shard with replica_index={} ({})", address.replica_index, name);
+
+            return shard_info.per_replica_pools[address.replica_index - 1];
         }
 
         /// existing connections pool have a higher priority
@@ -365,7 +425,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
             address.secure);
     };
 
-    auto pools = createPoolsForAddresses(name, pool_factory);
+    auto pools = createPoolsForAddresses(name, pool_factory, storage.log);
 
     const auto settings = storage.global_context.getSettings();
     return pools.size() == 1 ? pools.front() : std::make_shared<ConnectionPoolWithFailover>(pools,
@@ -438,11 +498,8 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
         auto connection = pool->get(timeouts, &header.insert_settings);
         RemoteBlockOutputStream remote{*connection, timeouts,
             header.insert_query, header.insert_settings, header.client_info};
-
-        CheckingCompressedReadBuffer checking_in(in);
-
         remote.writePrefix();
-        remote.writePrepared(checking_in);
+        writeRemoteConvert(header, remote, in, log);
         remote.writeSuffix();
     }
     catch (const Exception & e)
@@ -560,7 +617,6 @@ struct StorageDistributedDirectoryMonitor::Batch
         try
         {
             std::unique_ptr<RemoteBlockOutputStream> remote;
-            bool first = true;
 
             for (UInt64 file_idx : file_indices)
             {
@@ -575,16 +631,14 @@ struct StorageDistributedDirectoryMonitor::Batch
                 ReadBufferFromFile in(file_path->second);
                 const auto & header = readDistributedHeader(in, parent.log);
 
-                if (first)
+                if (!remote)
                 {
-                    first = false;
                     remote = std::make_unique<RemoteBlockOutputStream>(*connection, timeouts,
                         header.insert_query, header.insert_settings, header.client_info);
                     remote->writePrefix();
                 }
 
-                CheckingCompressedReadBuffer checking_in(in);
-                remote->writePrepared(checking_in);
+                writeRemoteConvert(header, *remote, in, parent.log);
             }
 
             if (remote)
diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index d21764bbb7d..51bd6d83105 100644
--- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -60,24 +60,28 @@ namespace ErrorCodes
     extern const int TIMEOUT_EXCEEDED;
 }
 
-static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & block, const size_t repeats)
+static Block adoptBlock(const Block & header, const Block & block, Poco::Logger * log)
 {
-    if (!blocksHaveEqualStructure(out->getHeader(), block))
-    {
-        ConvertingBlockInputStream convert(
-            std::make_shared<OneBlockInputStream>(block),
-            out->getHeader(),
-            ConvertingBlockInputStream::MatchColumnsMode::Name);
-        auto adopted_block = convert.read();
+    if (blocksHaveEqualStructure(header, block))
+        return block;
 
-        for (size_t i = 0; i < repeats; ++i)
-            out->write(adopted_block);
-    }
-    else
-    {
-        for (size_t i = 0; i < repeats; ++i)
-            out->write(block);
-    }
+    LOG_WARNING(log,
+        "Structure does not match (remote: {}, local: {}), implicit conversion will be done.",
+        header.dumpStructure(), block.dumpStructure());
+
+    ConvertingBlockInputStream convert(
+        std::make_shared<OneBlockInputStream>(block),
+        header,
+        ConvertingBlockInputStream::MatchColumnsMode::Name);
+    return convert.read();
+}
+
+
+static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & block, size_t repeats, Poco::Logger * log)
+{
+    Block adopted_block = adoptBlock(out->getHeader(), block, log);
+    for (size_t i = 0; i < repeats; ++i)
+        out->write(adopted_block);
 }
 
 
@@ -343,7 +347,9 @@ DistributedBlockOutputStream::runWritingJob(DistributedBlockOutputStream::JobRep
             }
 
             CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
-            job.stream->write(shard_block);
+
+            Block adopted_shard_block = adoptBlock(job.stream->getHeader(), shard_block, log);
+            job.stream->write(adopted_shard_block);
         }
         else // local
         {
@@ -367,7 +373,7 @@ DistributedBlockOutputStream::runWritingJob(DistributedBlockOutputStream::JobRep
                 job.stream->writePrefix();
             }
 
-            writeBlockConvert(job.stream, shard_block, shard_info.getLocalNodeCount());
+            writeBlockConvert(job.stream, shard_block, shard_info.getLocalNodeCount(), log);
         }
 
         job.blocks_written += 1;
@@ -383,11 +389,18 @@ void DistributedBlockOutputStream::writeSync(const Block & block)
     bool random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key;
     size_t start = 0;
     size_t end = shards_info.size();
-    if (random_shard_insert)
+
+    if (settings.insert_shard_id)
+    {
+        start = settings.insert_shard_id - 1;
+        end = settings.insert_shard_id;
+    }
+    else if (random_shard_insert)
     {
         start = storage.getRandomShardIndex(shards_info);
         end = start + 1;
     }
+
     size_t num_shards = end - start;
 
     if (!pool)
@@ -545,7 +558,7 @@ void DistributedBlockOutputStream::writeSplitAsync(const Block & block)
 }
 
 
-void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const size_t shard_id)
+void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, size_t shard_id)
 {
     const auto & shard_info = cluster->getShardsInfo()[shard_id];
     const auto & settings = context.getSettingsRef();
@@ -581,7 +594,7 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const siz
 }
 
 
-void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_t repeats)
+void DistributedBlockOutputStream::writeToLocal(const Block & block, size_t repeats)
 {
     /// Async insert does not support settings forwarding yet whereas sync one supports
     InterpreterInsertQuery interp(query_ast, context);
@@ -589,7 +602,7 @@ void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_
     auto block_io = interp.execute();
 
     block_io.out->writePrefix();
-    writeBlockConvert(block_io.out, block, repeats);
+    writeBlockConvert(block_io.out, block, repeats, log);
     block_io.out->writeSuffix();
 }
 
diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.h b/src/Storages/Distributed/DistributedBlockOutputStream.h
index ef37776893a..ca57ad46fbb 100644
--- a/src/Storages/Distributed/DistributedBlockOutputStream.h
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.h
@@ -62,10 +62,10 @@ private:
 
     void writeSplitAsync(const Block & block);
 
-    void writeAsyncImpl(const Block & block, const size_t shard_id = 0);
+    void writeAsyncImpl(const Block & block, size_t shard_id = 0);
 
     /// Increments finished_writings_count after each repeat.
-    void writeToLocal(const Block & block, const size_t repeats);
+    void writeToLocal(const Block & block, size_t repeats);
 
     void writeToShard(const Block & block, const std::vector<std::string> & dir_names);
 
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 031b960fac1..651688f41bb 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -50,8 +50,8 @@ class Pipe;
 class QueryPlan;
 using QueryPlanPtr = std::unique_ptr<QueryPlan>;
 
-class StoragePolicy;
-using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
+class IStoragePolicy;
+using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
 
 struct StreamLocalLimits;
 class EnabledQuota;
@@ -131,6 +131,10 @@ public:
     /// Returns true if the storage supports reading of subcolumns of complex types.
     virtual bool supportsSubcolumns() const { return false; }
 
+    /// Requires squashing small blocks to large for optimal storage.
+    /// This is true for most storages that store data on disk.
+    virtual bool prefersLargeBlocks() const { return true; }
+
 
     /// Optional size information of each physical column.
     /// Currently it's only used by the MergeTree family for query optimizations.
diff --git a/src/Storages/Kafka/KafkaBlockOutputStream.cpp b/src/Storages/Kafka/KafkaBlockOutputStream.cpp
index e1742741670..2cb0fd98c71 100644
--- a/src/Storages/Kafka/KafkaBlockOutputStream.cpp
+++ b/src/Storages/Kafka/KafkaBlockOutputStream.cpp
@@ -6,11 +6,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int CANNOT_CREATE_IO_BUFFER;
-}
-
 KafkaBlockOutputStream::KafkaBlockOutputStream(
     StorageKafka & storage_,
     const StorageMetadataPtr & metadata_snapshot_,
@@ -29,11 +24,9 @@ Block KafkaBlockOutputStream::getHeader() const
 void KafkaBlockOutputStream::writePrefix()
 {
     buffer = storage.createWriteBuffer(getHeader());
-    if (!buffer)
-        throw Exception("Failed to create Kafka producer!", ErrorCodes::CANNOT_CREATE_IO_BUFFER);
 
     auto format_settings = getFormatSettings(*context);
-    format_settings.protobuf.allow_many_rows_no_delimiters = true;
+    format_settings.protobuf.allow_multiple_rows_without_delimiter = true;
 
     child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
         getHeader(), *context,
diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp
index 388c21c6ad6..45e4ec538a1 100644
--- a/src/Storages/Kafka/StorageKafka.cpp
+++ b/src/Storages/Kafka/StorageKafka.cpp
@@ -196,7 +196,7 @@ StorageKafka::StorageKafka(
     auto task_count = thread_per_consumer ? num_consumers : 1;
     for (size_t i = 0; i < task_count; ++i)
     {
-        auto task = global_context.getSchedulePool().createTask(log->name(), [this, i]{ threadFunc(i); });
+        auto task = global_context.getMessageBrokerSchedulePool().createTask(log->name(), [this, i]{ threadFunc(i); });
         task->deactivate();
         tasks.emplace_back(std::make_shared<TaskContext>(std::move(task)));
     }
diff --git a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp b/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp
index c6d365ce2fe..dbb18b56769 100644
--- a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp
+++ b/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp
@@ -42,6 +42,8 @@ WriteBufferToKafkaProducer::WriteBufferToKafkaProducer(
             timestamp_column_index = column_index;
         }
     }
+
+    reinitializeChunks();
 }
 
 WriteBufferToKafkaProducer::~WriteBufferToKafkaProducer()
@@ -108,9 +110,7 @@ void WriteBufferToKafkaProducer::countRow(const Columns & columns, size_t curren
             break;
         }
 
-        rows = 0;
-        chunks.clear();
-        set(nullptr, 0);
+        reinitializeChunks();
     }
 }
 
@@ -135,10 +135,25 @@ void WriteBufferToKafkaProducer::flush()
 }
 
 void WriteBufferToKafkaProducer::nextImpl()
+{
+    addChunk();
+}
+
+void WriteBufferToKafkaProducer::addChunk()
 {
     chunks.push_back(std::string());
     chunks.back().resize(chunk_size);
     set(chunks.back().data(), chunk_size);
 }
 
+void WriteBufferToKafkaProducer::reinitializeChunks()
+{
+    rows = 0;
+    chunks.clear();
+    /// We cannot leave the buffer in the undefined state (i.e. without any
+    /// underlying buffer), since in this case the WriteBuffeR::next() will
+    /// not call our nextImpl() (due to available() == 0)
+    addChunk();
+}
+
 }
diff --git a/src/Storages/Kafka/WriteBufferToKafkaProducer.h b/src/Storages/Kafka/WriteBufferToKafkaProducer.h
index 76859c4e33f..15881b7a8e5 100644
--- a/src/Storages/Kafka/WriteBufferToKafkaProducer.h
+++ b/src/Storages/Kafka/WriteBufferToKafkaProducer.h
@@ -30,6 +30,8 @@ public:
 
 private:
     void nextImpl() override;
+    void addChunk();
+    void reinitializeChunks();
 
     ProducerPtr producer;
     const std::string topic;
diff --git a/src/Storages/LiveView/LiveViewBlockOutputStream.h b/src/Storages/LiveView/LiveViewBlockOutputStream.h
index 548bcf1b86a..5a1f75a8c2f 100644
--- a/src/Storages/LiveView/LiveViewBlockOutputStream.h
+++ b/src/Storages/LiveView/LiveViewBlockOutputStream.h
@@ -34,6 +34,7 @@ public:
         {
             new_blocks_metadata->hash = key_str;
             new_blocks_metadata->version = storage.getBlocksVersion() + 1;
+            new_blocks_metadata->time = std::chrono::system_clock::now();
 
             for (auto & block : *new_blocks)
             {
@@ -48,6 +49,15 @@ public:
 
             storage.condition.notify_all();
         }
+        else
+        {
+            // only update blocks time
+            new_blocks_metadata->hash = storage.getBlocksHashKey();
+            new_blocks_metadata->version = storage.getBlocksVersion();
+            new_blocks_metadata->time = std::chrono::system_clock::now();
+
+            (*storage.blocks_metadata_ptr) = new_blocks_metadata;
+        }
 
         new_blocks.reset();
         new_blocks_metadata.reset();
diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp
index 0803b3a9c9c..bfec7bffc8c 100644
--- a/src/Storages/LiveView/StorageLiveView.cpp
+++ b/src/Storages/LiveView/StorageLiveView.cpp
@@ -20,6 +20,7 @@ limitations under the License. */
 #include <DataStreams/MaterializingBlockInputStream.h>
 #include <DataStreams/SquashingBlockInputStream.h>
 #include <DataStreams/copyData.h>
+#include <common/logger_useful.h>
 #include <Common/typeid_cast.h>
 #include <Common/SipHash.h>
 
@@ -254,6 +255,8 @@ StorageLiveView::StorageLiveView(
     live_view_context = std::make_unique<Context>(global_context);
     live_view_context->makeQueryContext();
 
+    log = &Poco::Logger::get("StorageLiveView (" + table_id_.database_name + "." + table_id_.table_name + ")");
+
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_);
     setInMemoryMetadata(storage_metadata);
@@ -275,12 +278,21 @@ StorageLiveView::StorageLiveView(
     if (query.live_view_timeout)
     {
         is_temporary = true;
-        temporary_live_view_timeout = std::chrono::seconds{*query.live_view_timeout};
+        temporary_live_view_timeout = Seconds {*query.live_view_timeout};
+    }
+
+    if (query.live_view_periodic_refresh)
+    {
+        is_periodically_refreshed = true;
+        periodic_live_view_refresh = Seconds {*query.live_view_periodic_refresh};
     }
 
     blocks_ptr = std::make_shared<BlocksPtr>();
     blocks_metadata_ptr = std::make_shared<BlocksMetadataPtr>();
     active_ptr = std::make_shared<bool>(true);
+
+    periodic_refresh_task = global_context.getSchedulePool().createTask("LieViewPeriodicRefreshTask", [this]{ periodicRefreshTaskFunc(); });
+    periodic_refresh_task->deactivate();
 }
 
 Block StorageLiveView::getHeader() const
@@ -369,10 +381,21 @@ bool StorageLiveView::getNewBlocks()
             }
             new_blocks_metadata->hash = key.toHexString();
             new_blocks_metadata->version = getBlocksVersion() + 1;
+            new_blocks_metadata->time = std::chrono::system_clock::now();
+
             (*blocks_ptr) = new_blocks;
             (*blocks_metadata_ptr) = new_blocks_metadata;
+
             updated = true;
         }
+        else
+        {
+            new_blocks_metadata->hash = getBlocksHashKey();
+            new_blocks_metadata->version = getBlocksVersion();
+            new_blocks_metadata->time = std::chrono::system_clock::now();
+
+            (*blocks_metadata_ptr) = new_blocks_metadata;
+        }
     }
     return updated;
 }
@@ -392,11 +415,18 @@ void StorageLiveView::startup()
 {
     if (is_temporary)
         TemporaryLiveViewCleaner::instance().addView(std::static_pointer_cast<StorageLiveView>(shared_from_this()));
+
+    if (is_periodically_refreshed)
+        periodic_refresh_task->activate();
 }
 
 void StorageLiveView::shutdown()
 {
     shutdown_called = true;
+
+    if (is_periodically_refreshed)
+        periodic_refresh_task->deactivate();
+
     DatabaseCatalog::instance().removeDependency(select_table_id, getStorageID());
 }
 
@@ -415,15 +445,55 @@ void StorageLiveView::drop()
     condition.notify_all();
 }
 
-void StorageLiveView::refresh()
+void StorageLiveView::scheduleNextPeriodicRefresh()
+{
+    Seconds current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
+    Seconds blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
+
+    if ((current_time - periodic_live_view_refresh) >= blocks_time)
+    {
+        refresh(false);
+        blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
+    }
+    current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
+
+    auto next_refresh_time = blocks_time + periodic_live_view_refresh;
+
+    if (current_time >= next_refresh_time)
+        periodic_refresh_task->scheduleAfter(0);
+    else
+    {
+        auto schedule_time = std::chrono::duration_cast<MilliSeconds> (next_refresh_time - current_time);
+        periodic_refresh_task->scheduleAfter(static_cast<size_t>(schedule_time.count()));
+    }
+}
+
+void StorageLiveView::periodicRefreshTaskFunc()
+{
+    LOG_TRACE(log, "periodic refresh task");
+
+    std::lock_guard lock(mutex);
+
+    if (hasActiveUsers())
+        scheduleNextPeriodicRefresh();
+}
+
+void StorageLiveView::refresh(bool grab_lock)
 {
     // Lock is already acquired exclusively from InterperterAlterQuery.cpp InterpreterAlterQuery::execute() method.
     // So, reacquiring lock is not needed and will result in an exception.
+
+    if (grab_lock)
     {
         std::lock_guard lock(mutex);
         if (getNewBlocks())
             condition.notify_all();
     }
+    else
+    {
+        if (getNewBlocks())
+            condition.notify_all();
+    }
 }
 
 Pipe StorageLiveView::read(
@@ -435,15 +505,21 @@ Pipe StorageLiveView::read(
     const size_t /*max_block_size*/,
     const unsigned /*num_streams*/)
 {
+    std::lock_guard lock(mutex);
+
+    if (!(*blocks_ptr))
+        refresh(false);
+
+    else if (is_periodically_refreshed)
     {
-        std::lock_guard lock(mutex);
-        if (!(*blocks_ptr))
-        {
-            if (getNewBlocks())
-                condition.notify_all();
-        }
-        return Pipe(std::make_shared<BlocksSource>(blocks_ptr, getHeader()));
+        Seconds current_time = std::chrono::duration_cast<Seconds>(std::chrono::system_clock::now().time_since_epoch());
+        Seconds blocks_time = std::chrono::duration_cast<Seconds>(getBlocksTime().time_since_epoch());
+
+        if ((current_time - periodic_live_view_refresh) >= blocks_time)
+            refresh(false);
     }
+
+    return Pipe(std::make_shared<BlocksSource>(blocks_ptr, getHeader()));
 }
 
 BlockInputStreams StorageLiveView::watch(
@@ -458,6 +534,7 @@ BlockInputStreams StorageLiveView::watch(
 
     bool has_limit = false;
     UInt64 limit = 0;
+    BlockInputStreamPtr reader;
 
     if (query.limit_length)
     {
@@ -466,45 +543,28 @@ BlockInputStreams StorageLiveView::watch(
     }
 
     if (query.is_watch_events)
-    {
-        auto reader = std::make_shared<LiveViewEventsBlockInputStream>(
+        reader = std::make_shared<LiveViewEventsBlockInputStream>(
             std::static_pointer_cast<StorageLiveView>(shared_from_this()),
             blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
             context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
-
-        {
-            std::lock_guard lock(mutex);
-            if (!(*blocks_ptr))
-            {
-                if (getNewBlocks())
-                    condition.notify_all();
-            }
-        }
-
-        processed_stage = QueryProcessingStage::Complete;
-
-        return { reader };
-    }
     else
-    {
-        auto reader = std::make_shared<LiveViewBlockInputStream>(
+        reader = std::make_shared<LiveViewBlockInputStream>(
             std::static_pointer_cast<StorageLiveView>(shared_from_this()),
             blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
             context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
 
-        {
-            std::lock_guard lock(mutex);
-            if (!(*blocks_ptr))
-            {
-                if (getNewBlocks())
-                    condition.notify_all();
-            }
-        }
+    {
+        std::lock_guard lock(mutex);
 
-        processed_stage = QueryProcessingStage::Complete;
+        if (!(*blocks_ptr))
+            refresh(false);
 
-        return { reader };
+        if (is_periodically_refreshed)
+            scheduleNextPeriodicRefresh();
     }
+
+    processed_stage = QueryProcessingStage::Complete;
+    return { reader };
 }
 
 NamesAndTypesList StorageLiveView::getVirtuals() const
diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h
index 4a03e29342a..e30a8f51705 100644
--- a/src/Storages/LiveView/StorageLiveView.h
+++ b/src/Storages/LiveView/StorageLiveView.h
@@ -13,6 +13,7 @@ limitations under the License. */
 
 #include <ext/shared_ptr_helper.h>
 #include <Storages/IStorage.h>
+#include <Core/BackgroundSchedulePool.h>
 
 #include <mutex>
 #include <condition_variable>
@@ -21,10 +22,16 @@ limitations under the License. */
 namespace DB
 {
 
+using Time = std::chrono::time_point<std::chrono::system_clock>;
+using Seconds = std::chrono::seconds;
+using MilliSeconds = std::chrono::milliseconds;
+
+
 struct BlocksMetadata
 {
     String hash;
     UInt64 version;
+    Time time;
 };
 
 struct MergeableBlocks
@@ -75,8 +82,10 @@ public:
     NamesAndTypesList getVirtuals() const override;
 
     bool isTemporary() const { return is_temporary; }
-    std::chrono::seconds getTimeout() const { return temporary_live_view_timeout; }
+    bool isPeriodicallyRefreshed() const { return is_periodically_refreshed; }
 
+    Seconds getTimeout() const { return temporary_live_view_timeout; }
+    Seconds getPeriodicRefresh() const { return periodic_live_view_refresh; }
 
     /// Check if we have any readers
     /// must be called with mutex locked
@@ -109,6 +118,15 @@ public:
         return 0;
     }
 
+    /// Get blocks time
+    /// must be called with mutex locked
+    Time getBlocksTime()
+    {
+        if (*blocks_metadata_ptr)
+            return (*blocks_metadata_ptr)->time;
+        return {};
+    }
+
     /// Reset blocks
     /// must be called with mutex locked
     void reset()
@@ -124,7 +142,7 @@ public:
     void startup() override;
     void shutdown() override;
 
-    void refresh();
+    void refresh(const bool grab_lock = true);
 
     Pipe read(
         const Names & column_names,
@@ -176,8 +194,13 @@ private:
     Context & global_context;
     std::unique_ptr<Context> live_view_context;
 
+    Poco::Logger * log;
+
     bool is_temporary = false;
-    std::chrono::seconds temporary_live_view_timeout;
+    bool is_periodically_refreshed = false;
+
+    Seconds temporary_live_view_timeout;
+    Seconds periodic_live_view_refresh;
 
     /// Mutex to protect access to sample block and inner_blocks_query
     mutable std::mutex sample_block_lock;
@@ -199,6 +222,13 @@ private:
 
     std::atomic<bool> shutdown_called = false;
 
+    /// Periodic refresh task used when [PERIODIC] REFRESH is specified in create statement
+    BackgroundSchedulePool::TaskHolder periodic_refresh_task;
+    void periodicRefreshTaskFunc();
+
+    /// Must be called with mutex locked
+    void scheduleNextPeriodicRefresh();
+
     StorageLiveView(
         const StorageID & table_id_,
         Context & local_context,
diff --git a/src/Storages/MemorySettings.cpp b/src/Storages/MemorySettings.cpp
new file mode 100644
index 00000000000..f5e182b3484
--- /dev/null
+++ b/src/Storages/MemorySettings.cpp
@@ -0,0 +1,36 @@
+#include <Storages/MemorySettings.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ASTSetQuery.h>
+#include <Parsers/ASTFunction.h>
+#include <Common/Exception.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_SETTING;
+}
+
+IMPLEMENT_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS)
+
+void MemorySettings::loadFromQuery(ASTStorage & storage_def)
+{
+    if (storage_def.settings)
+    {
+        try
+        {
+            applyChanges(storage_def.settings->changes);
+        }
+        catch (Exception & e)
+        {
+            if (e.code() == ErrorCodes::UNKNOWN_SETTING)
+                e.addMessage("for storage " + storage_def.engine->name);
+            throw;
+        }
+    }
+}
+
+}
+
diff --git a/src/Storages/MemorySettings.h b/src/Storages/MemorySettings.h
new file mode 100644
index 00000000000..5e3b5f81ba5
--- /dev/null
+++ b/src/Storages/MemorySettings.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <Core/BaseSettings.h>
+
+
+namespace DB
+{
+class ASTStorage;
+
+
+#define MEMORY_SETTINGS(M) \
+    M(Bool, compress, false, "Compress data in memory", 0) \
+
+DECLARE_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS)
+
+
+/** Settings for the Memory engine.
+  * Could be loaded from a CREATE TABLE query (SETTINGS clause).
+  */
+struct MemorySettings : public BaseSettings<memorySettingsTraits>
+{
+    void loadFromQuery(ASTStorage & storage_def);
+};
+
+}
+
diff --git a/src/Storages/MergeTree/BackgroundJobsExecutor.cpp b/src/Storages/MergeTree/BackgroundJobsExecutor.cpp
index 3e3f693addd..8e5a0e8a3b8 100644
--- a/src/Storages/MergeTree/BackgroundJobsExecutor.cpp
+++ b/src/Storages/MergeTree/BackgroundJobsExecutor.cpp
@@ -98,11 +98,21 @@ try
                 {
                     try /// We don't want exceptions in background pool
                     {
-                        job();
+                        bool job_success = job();
                         /// Job done, decrement metric and reset no_work counter
                         CurrentMetrics::values[pool_config.tasks_metric]--;
-                        /// Job done, new empty space in pool, schedule background task
-                        runTaskWithoutDelay();
+
+                        if (job_success)
+                        {
+                            /// Job done, new empty space in pool, schedule background task
+                            runTaskWithoutDelay();
+                        }
+                        else
+                        {
+                            /// Job done, but failed, schedule with backoff
+                            scheduleTask(/* with_backoff = */ true);
+                        }
+
                     }
                     catch (...)
                     {
diff --git a/src/Storages/MergeTree/BackgroundJobsExecutor.h b/src/Storages/MergeTree/BackgroundJobsExecutor.h
index 85067188f09..da22c752e1b 100644
--- a/src/Storages/MergeTree/BackgroundJobsExecutor.h
+++ b/src/Storages/MergeTree/BackgroundJobsExecutor.h
@@ -36,10 +36,12 @@ enum class PoolType
     FETCH,
 };
 
+using BackgroundJobFunc = std::function<bool()>;
+
 /// Result from background job providers. Function which will be executed in pool and pool type.
 struct JobAndPool
 {
-    ThreadPool::Job job;
+    BackgroundJobFunc job;
     PoolType pool_type;
 };
 
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index e01e7793dd3..f80020991b0 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -1,17 +1,20 @@
 #include <Storages/MergeTree/DataPartsExchange.h>
+
+#include <DataStreams/NativeBlockOutputStream.h>
+#include <Disks/SingleDiskVolume.h>
+#include <Disks/createVolume.h>
+#include <IO/HTTPCommon.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTP/HTTPServerResponse.h>
 #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
-#include <Disks/createVolume.h>
-#include <Disks/SingleDiskVolume.h>
+#include <Storages/MergeTree/ReplicatedFetchList.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/NetException.h>
-#include <DataStreams/NativeBlockOutputStream.h>
-#include <IO/HTTPCommon.h>
 #include <ext/scope_guard.h>
+
 #include <Poco/File.h>
-#include <Poco/Net/HTTPServerResponse.h>
 #include <Poco/Net/HTTPRequest.h>
-#include <Storages/MergeTree/ReplicatedFetchList.h>
 
 
 namespace CurrentMetrics
@@ -83,7 +86,7 @@ std::string Service::getId(const std::string & node_id) const
     return getEndpointId(node_id);
 }
 
-void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*body*/, WriteBuffer & out, Poco::Net::HTTPServerResponse & response)
+void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, WriteBuffer & out, HTTPServerResponse & response)
 {
     int client_protocol_version = parse<int>(params.get("client_protocol_version", "0"));
 
diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h
index 0a359474d2d..834fed1182f 100644
--- a/src/Storages/MergeTree/DataPartsExchange.h
+++ b/src/Storages/MergeTree/DataPartsExchange.h
@@ -20,21 +20,19 @@ namespace DataPartsExchange
 class Service final : public InterserverIOEndpoint
 {
 public:
-    Service(MergeTreeData & data_)
-    : data(data_), log(&Poco::Logger::get(data.getLogName() + " (Replicated PartsService)")) {}
+    explicit Service(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get(data.getLogName() + " (Replicated PartsService)")) {}
 
     Service(const Service &) = delete;
     Service & operator=(const Service &) = delete;
 
     std::string getId(const std::string & node_id) const override;
-    void processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body, WriteBuffer & out, Poco::Net::HTTPServerResponse & response) override;
+    void processQuery(const HTMLForm & params, ReadBuffer & body, WriteBuffer & out, HTTPServerResponse & response) override;
 
 private:
     MergeTreeData::DataPartPtr findPart(const String & name);
     void sendPartFromMemory(const MergeTreeData::DataPartPtr & part, WriteBuffer & out);
     void sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, int client_protocol_version);
 
-private:
     /// StorageReplicatedMergeTree::shutdown() waits for all parts exchange handlers to finish,
     /// so Service will never access dangling reference to storage
     MergeTreeData & data;
@@ -43,13 +41,10 @@ private:
 
 /** Client for getting the parts from the table *MergeTree.
   */
-class Fetcher final
+class Fetcher final : private boost::noncopyable
 {
 public:
-    Fetcher(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get("Fetcher")) {}
-
-    Fetcher(const Fetcher &) = delete;
-    Fetcher & operator=(const Fetcher &) = delete;
+    explicit Fetcher(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get("Fetcher")) {}
 
     /// Downloads a part to tmp_directory. If to_detached - downloads to the `detached` directory.
     MergeTreeData::MutableDataPartPtr fetchPart(
@@ -75,7 +70,7 @@ private:
             bool to_detached,
             const String & tmp_prefix_,
             bool sync,
-            const ReservationPtr reservation,
+            ReservationPtr reservation,
             PooledReadWriteBufferFromHTTP & in);
 
     MergeTreeData::MutableDataPartPtr downloadPartToMemory(
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 7c37a067360..2f6513bbb12 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1278,6 +1278,18 @@ bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metada
             return false;
     }
 
+    for (const auto & group_by_desc : metadata_snapshot->getGroupByTTLs())
+    {
+        if (!ttl_infos.group_by_ttl.count(group_by_desc.result_column))
+            return false;
+    }
+
+    for (const auto & rows_where_desc : metadata_snapshot->getRowsWhereTTLs())
+    {
+        if (!ttl_infos.rows_where_ttl.count(rows_where_desc.result_column))
+            return false;
+    }
+
     return true;
 }
 
diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp
index 0140b32e12c..f28ca28b124 100644
--- a/src/Storages/MergeTree/IMergeTreeReader.cpp
+++ b/src/Storages/MergeTree/IMergeTreeReader.cpp
@@ -186,7 +186,13 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
             additional_columns.insert({res_columns[pos], name_and_type->type, name_and_type->name});
         }
 
-        DB::evaluateMissingDefaults(additional_columns, columns, metadata_snapshot->getColumns(), storage.global_context);
+        auto dag = DB::evaluateMissingDefaults(
+                additional_columns, columns, metadata_snapshot->getColumns(), storage.global_context);
+        if (dag)
+        {
+            auto actions = std::make_shared<ExpressionActions>(std::move(dag));
+            actions->execute(additional_columns);
+        }
 
         /// Move columns from block.
         name_and_type = columns.begin();
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 65e873ed102..6b2af414835 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -116,12 +116,6 @@ public:
         : Parent(CurrentMetrics::Merge)
     {}
 
-    void onEntryCreate(const Parent::Entry & entry) override
-    {
-        if (isTTLMergeType(entry->merge_type))
-            ++merges_with_ttl_counter;
-    }
-
     void onEntryDestroy(const Parent::Entry & entry) override
     {
         if (isTTLMergeType(entry->merge_type))
@@ -140,7 +134,20 @@ public:
         }
     }
 
-    size_t getExecutingMergesWithTTLCount() const
+    /// Merge consists of two parts: assignment and execution. We add merge to
+    /// merge list on execution, but checking merge list during merge
+    /// assignment. This lead to the logical race condition (we can assign more
+    /// merges with TTL than allowed). So we "book" merge with ttl during
+    /// assignment, and remove from list after merge execution.
+    ///
+    /// NOTE: Not important for replicated merge tree, we check count of merges twice:
+    /// in assignment and in queue before execution.
+    void bookMergeWithTTL()
+    {
+        ++merges_with_ttl_counter;
+    }
+
+    size_t getMergesWithTTLCount() const
     {
         return merges_with_ttl_counter;
     }
diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index c852151f27d..ce60856505e 100644
--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -7,6 +7,7 @@
 #include <Common/typeid_cast.h>
 #include <DataTypes/DataTypeNothing.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeUUID.h>
 
 
 namespace DB
@@ -205,6 +206,7 @@ namespace
 
         virtual void insertStringColumn(const ColumnPtr & column, const String & name) = 0;
         virtual void insertUInt64Column(const ColumnPtr & column, const String & name) = 0;
+        virtual void insertUUIDColumn(const ColumnPtr & column, const String & name) = 0;
     };
 }
 
@@ -241,6 +243,16 @@ static void injectVirtualColumnsImpl(size_t rows, VirtualColumnsInserter & inser
 
                 inserter.insertUInt64Column(column, virtual_column_name);
             }
+            else if (virtual_column_name == "_part_uuid")
+            {
+                ColumnPtr column;
+                if (rows)
+                    column = DataTypeUUID().createColumnConst(rows, task->data_part->uuid)->convertToFullColumnIfConst();
+                else
+                    column = DataTypeUUID().createColumn();
+
+                inserter.insertUUIDColumn(column, virtual_column_name);
+            }
             else if (virtual_column_name == "_partition_id")
             {
                 ColumnPtr column;
@@ -271,6 +283,11 @@ namespace
             block.insert({column, std::make_shared<DataTypeUInt64>(), name});
         }
 
+        void insertUUIDColumn(const ColumnPtr & column, const String & name) final
+        {
+            block.insert({column, std::make_shared<DataTypeUUID>(), name});
+        }
+
         Block & block;
     };
 
@@ -288,6 +305,10 @@ namespace
             columns.push_back(column);
         }
 
+        void insertUUIDColumn(const ColumnPtr & column, const String &) final
+        {
+            columns.push_back(column);
+        }
         Columns & columns;
     };
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 276ac10aeaf..a0d23b8ab22 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -4,6 +4,7 @@
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/NestedUtils.h>
@@ -114,6 +115,7 @@ namespace ErrorCodes
     extern const int NOT_ENOUGH_SPACE;
     extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;
     extern const int SUPPORT_IS_DISABLED;
+    extern const int TOO_MANY_SIMULTANEOUS_QUERIES;
 }
 
 
@@ -727,16 +729,16 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
             if (startsWith(it->name(), "tmp"))
                 continue;
 
-            part_names_with_disks.emplace_back(it->name(), disk_ptr);
-
-            /// Create and correctly initialize global WAL object, if it's needed
-            if (it->name() == MergeTreeWriteAheadLog::DEFAULT_WAL_FILE_NAME && settings->in_memory_parts_enable_wal)
+            if (!startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME))
+                part_names_with_disks.emplace_back(it->name(), disk_ptr);
+            else if (it->name() == MergeTreeWriteAheadLog::DEFAULT_WAL_FILE_NAME && settings->in_memory_parts_enable_wal)
             {
+                /// Create and correctly initialize global WAL object
                 write_ahead_log = std::make_shared<MergeTreeWriteAheadLog>(*this, disk_ptr, it->name());
                 for (auto && part : write_ahead_log->restore(metadata_snapshot))
                     parts_from_wal.push_back(std::move(part));
             }
-            else if (startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME) && settings->in_memory_parts_enable_wal)
+            else if (settings->in_memory_parts_enable_wal)
             {
                 MergeTreeWriteAheadLog wal(*this, disk_ptr, it->name());
                 for (auto && part : wal.restore(metadata_snapshot))
@@ -1488,16 +1490,31 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
             getPartitionIDFromQuery(command.partition, global_context);
         }
 
-        /// Some type changes for version column is allowed despite it's a part of sorting key
-        if (command.type == AlterCommand::MODIFY_COLUMN && command.column_name == merging_params.version_column)
+        if (command.column_name == merging_params.version_column)
         {
-            const IDataType * new_type = command.data_type.get();
-            const IDataType * old_type = old_types[command.column_name];
+            /// Some type changes for version column is allowed despite it's a part of sorting key
+            if (command.type == AlterCommand::MODIFY_COLUMN)
+            {
+                const IDataType * new_type = command.data_type.get();
+                const IDataType * old_type = old_types[command.column_name];
 
-            checkVersionColumnTypesConversion(old_type, new_type, command.column_name);
+                checkVersionColumnTypesConversion(old_type, new_type, command.column_name);
 
-            /// No other checks required
-            continue;
+                /// No other checks required
+                continue;
+            }
+            else if (command.type == AlterCommand::DROP_COLUMN)
+            {
+                throw Exception(
+                    "Trying to ALTER DROP version " + backQuoteIfNeed(command.column_name) + " column",
+                    ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
+            }
+            else if (command.type == AlterCommand::RENAME_COLUMN)
+            {
+                throw Exception(
+                    "Trying to ALTER RENAME version " + backQuoteIfNeed(command.column_name) + " column",
+                    ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
+            }
         }
 
         if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned)
@@ -2345,7 +2362,7 @@ size_t MergeTreeData::getPartsCount() const
 }
 
 
-size_t MergeTreeData::getMaxPartsCountForPartition() const
+size_t MergeTreeData::getMaxPartsCountForPartitionWithState(DataPartState state) const
 {
     auto lock = lockParts();
 
@@ -2353,7 +2370,7 @@ size_t MergeTreeData::getMaxPartsCountForPartition() const
     size_t cur_count = 0;
     const String * cur_partition_id = nullptr;
 
-    for (const auto & part : getDataPartsStateRange(DataPartState::Committed))
+    for (const auto & part : getDataPartsStateRange(state))
     {
         if (cur_partition_id && part->info.partition_id == *cur_partition_id)
         {
@@ -2372,6 +2389,18 @@ size_t MergeTreeData::getMaxPartsCountForPartition() const
 }
 
 
+size_t MergeTreeData::getMaxPartsCountForPartition() const
+{
+    return getMaxPartsCountForPartitionWithState(DataPartState::Committed);
+}
+
+
+size_t MergeTreeData::getMaxInactivePartsCountForPartition() const
+{
+    return getMaxPartsCountForPartitionWithState(DataPartState::Outdated);
+}
+
+
 std::optional<Int64> MergeTreeData::getMinPartDataVersion() const
 {
     auto lock = lockParts();
@@ -2397,19 +2426,47 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const
         throw Exception("Too many parts (" + toString(parts_count_in_total) + ") in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in <merge_tree> element in config.xml or with per-table setting.", ErrorCodes::TOO_MANY_PARTS);
     }
 
-    const size_t parts_count_in_partition = getMaxPartsCountForPartition();
+    size_t parts_count_in_partition = getMaxPartsCountForPartition();
+    ssize_t k_inactive = -1;
+    if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0)
+    {
+        size_t inactive_parts_count_in_partition = getMaxInactivePartsCountForPartition();
+        if (inactive_parts_count_in_partition >= settings->inactive_parts_to_throw_insert)
+        {
+            ProfileEvents::increment(ProfileEvents::RejectedInserts);
+            throw Exception(
+                ErrorCodes::TOO_MANY_PARTS,
+                "Too many inactive parts ({}). Parts cleaning are processing significantly slower than inserts",
+                inactive_parts_count_in_partition);
+        }
+        k_inactive = ssize_t(inactive_parts_count_in_partition) - ssize_t(settings->inactive_parts_to_delay_insert);
+    }
 
     if (parts_count_in_partition >= settings->parts_to_throw_insert)
     {
         ProfileEvents::increment(ProfileEvents::RejectedInserts);
-        throw Exception("Too many parts (" + toString(parts_count_in_partition) + "). Merges are processing significantly slower than inserts.", ErrorCodes::TOO_MANY_PARTS);
+        throw Exception(
+            ErrorCodes::TOO_MANY_PARTS,
+            "Too many parts ({}). Parts cleaning are processing significantly slower than inserts",
+            parts_count_in_partition);
     }
 
-    if (parts_count_in_partition < settings->parts_to_delay_insert)
+    if (k_inactive < 0 && parts_count_in_partition < settings->parts_to_delay_insert)
         return;
 
-    const size_t max_k = settings->parts_to_throw_insert - settings->parts_to_delay_insert; /// always > 0
-    const size_t k = 1 + parts_count_in_partition - settings->parts_to_delay_insert; /// from 1 to max_k
+    const ssize_t k_active = ssize_t(parts_count_in_partition) - ssize_t(settings->parts_to_delay_insert);
+    size_t max_k;
+    size_t k;
+    if (k_active > k_inactive)
+    {
+        max_k = settings->parts_to_throw_insert - settings->parts_to_delay_insert;
+        k = k_active + 1;
+    }
+    else
+    {
+        max_k = settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert;
+        k = k_inactive + 1;
+    }
     const double delay_milliseconds = ::pow(settings->max_delay_to_insert * 1000, static_cast<double>(k) / max_k);
 
     ProfileEvents::increment(ProfileEvents::DelayedInserts);
@@ -3619,6 +3676,10 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn m
     const auto data_parts = getDataParts();
 
     String backup_name = (!with_name.empty() ? escapeForFileName(with_name) : toString(increment));
+    String backup_path = shadow_path + backup_name + "/";
+
+    for (const auto & disk : getStoragePolicy()->getDisks())
+        disk->onFreeze(backup_path);
 
     PartitionCommandsResultInfo result;
 
@@ -3628,12 +3689,10 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn m
         if (!matcher(part))
             continue;
 
-        part->volume->getDisk()->createDirectories(shadow_path);
-
-        String backup_path = shadow_path + backup_name + "/";
-
         LOG_DEBUG(log, "Freezing part {} snapshot will be placed at {}", part->name, backup_path);
 
+        part->volume->getDisk()->createDirectories(backup_path);
+
         String backup_part_path = backup_path + relative_data_path + part->relative_path;
         if (auto part_in_memory = asInMemoryPart(part))
             part_in_memory->flushToDisk(backup_path + relative_data_path, part->relative_path, metadata_snapshot);
@@ -3671,6 +3730,17 @@ bool MergeTreeData::canReplacePartition(const DataPartPtr & src_part) const
     return true;
 }
 
+inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
+}
+
+
+inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::seconds>(timepoint.time_since_epoch()).count();
+}
+
 void MergeTreeData::writePartLog(
     PartLogElement::Type type,
     const ExecutionStatus & execution_status,
@@ -3693,7 +3763,12 @@ try
     part_log_elem.error = static_cast<UInt16>(execution_status.code);
     part_log_elem.exception = execution_status.message;
 
-    part_log_elem.event_time = time(nullptr);
+    // construct event_time and event_time_microseconds using the same time point
+    // so that the two times will always be equal up to a precision of a second.
+    const auto time_now = std::chrono::system_clock::now();
+    part_log_elem.event_time = time_in_seconds(time_now);
+    part_log_elem.event_time_microseconds = time_in_microseconds(time_now);
+
     /// TODO: Stop stopwatch in outer code to exclude ZK timings and so on
     part_log_elem.duration_ms = elapsed_ns / 1000000;
 
@@ -3750,18 +3825,6 @@ MergeTreeData::CurrentlyMovingPartsTagger::~CurrentlyMovingPartsTagger()
     }
 }
 
-bool MergeTreeData::selectPartsAndMove()
-{
-    if (parts_mover.moves_blocker.isCancelled())
-        return false;
-
-    auto moving_tagger = selectPartsForMove();
-    if (moving_tagger->parts_to_move.empty())
-        return false;
-
-    return moveParts(std::move(moving_tagger));
-}
-
 std::optional<JobAndPool> MergeTreeData::getDataMovingJob()
 {
     if (parts_mover.moves_blocker.isCancelled())
@@ -3773,7 +3836,7 @@ std::optional<JobAndPool> MergeTreeData::getDataMovingJob()
 
     return JobAndPool{[this, moving_tagger] () mutable
     {
-        moveParts(moving_tagger);
+        return moveParts(moving_tagger);
     }, PoolType::MOVE};
 }
 
@@ -3948,6 +4011,7 @@ NamesAndTypesList MergeTreeData::getVirtuals() const
     return NamesAndTypesList{
         NameAndTypePair("_part", std::make_shared<DataTypeString>()),
         NameAndTypePair("_part_index", std::make_shared<DataTypeUInt64>()),
+        NameAndTypePair("_part_uuid", std::make_shared<DataTypeUUID>()),
         NameAndTypePair("_partition_id", std::make_shared<DataTypeString>()),
         NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
     };
@@ -3955,7 +4019,7 @@ NamesAndTypesList MergeTreeData::getVirtuals() const
 
 size_t MergeTreeData::getTotalMergesWithTTLInMergeList() const
 {
-    return global_context.getMergeList().getExecutingMergesWithTTLCount();
+    return global_context.getMergeList().getMergesWithTTLCount();
 }
 
 void MergeTreeData::addPartContributionToDataVolume(const DataPartPtr & part)
@@ -3988,4 +4052,24 @@ void MergeTreeData::setDataVolume(size_t bytes, size_t rows, size_t parts)
     total_active_size_rows.store(rows, std::memory_order_release);
     total_active_size_parts.store(parts, std::memory_order_release);
 }
+
+void MergeTreeData::insertQueryIdOrThrow(const String & query_id, size_t max_queries) const
+{
+    std::lock_guard lock(query_id_set_mutex);
+    if (query_id_set.find(query_id) != query_id_set.end())
+        return;
+    if (query_id_set.size() >= max_queries)
+        throw Exception(
+            ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries for table {}. Maximum is: {}", log_name, max_queries);
+    query_id_set.insert(query_id);
+}
+
+void MergeTreeData::removeQueryId(const String & query_id) const
+{
+    std::lock_guard lock(query_id_set_mutex);
+    if (query_id_set.find(query_id) == query_id_set.end())
+        LOG_WARNING(log, "We have query_id removed but it's not recorded. This is a bug");
+    else
+        query_id_set.erase(query_id);
+}
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 9d021815888..2aefa66ac58 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -415,7 +415,9 @@ public:
     size_t getTotalActiveSizeInRows() const;
 
     size_t getPartsCount() const;
+    size_t getMaxPartsCountForPartitionWithState(DataPartState state) const;
     size_t getMaxPartsCountForPartition() const;
+    size_t getMaxInactivePartsCountForPartition() const;
 
     /// Get min value of part->info.getDataVersion() for all active parts.
     /// Makes sense only for ordinary MergeTree engines because for them block numbering doesn't depend on partition.
@@ -465,9 +467,6 @@ public:
     DataPartsVector removePartsInRangeFromWorkingSet(const MergeTreePartInfo & drop_range, bool clear_without_timeout,
                                                      bool skip_intersecting_parts, DataPartsLock & lock);
 
-    /// Renames the part to detached/<prefix>_<part> and removes it from working set.
-    void removePartsFromWorkingSetAndCloneToDetached(const DataPartsVector & parts, bool clear_without_timeout, const String & prefix = "");
-
     /// Renames the part to detached/<prefix>_<part> and removes it from data_parts,
     //// so it will not be deleted in clearOldParts.
     /// If restore_covered is true, adds to the working set inactive parts, which were merged into the deleted part.
@@ -702,6 +701,12 @@ public:
     /// section from config.xml.
     CompressionCodecPtr getCompressionCodecForPart(size_t part_size_compressed, const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t current_time) const;
 
+    /// Record current query id where querying the table. Throw if there are already `max_queries` queries accessing the same table.
+    void insertQueryIdOrThrow(const String & query_id, size_t max_queries) const;
+
+    /// Remove current query id after query finished.
+    void removeQueryId(const String & query_id) const;
+
     /// Limiting parallel sends per one table, used in DataPartsExchange
     std::atomic_uint current_table_sends {0};
 
@@ -910,9 +915,6 @@ protected:
     /// Moves part to specified space, used in ALTER ... MOVE ... queries
     bool movePartsToSpace(const DataPartsVector & parts, SpacePtr space);
 
-    /// Selects parts for move and moves them, used in background process
-    bool selectPartsAndMove();
-
 
 private:
     /// RAII Wrapper for atomic work with currently moving parts
@@ -958,6 +960,10 @@ private:
     std::atomic<size_t> total_active_size_bytes = 0;
     std::atomic<size_t> total_active_size_rows = 0;
     std::atomic<size_t> total_active_size_parts = 0;
+
+    // Record all query ids which access the table. It's guarded by `query_id_set_mutex` and is always mutable.
+    mutable std::set<String> query_id_set;
+    mutable std::mutex query_id_set_mutex;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index f6581574ede..c571a53d4c8 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -3,7 +3,6 @@
 #include <Storages/MergeTree/MergeTreeSequentialSource.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
 #include <Storages/MergeTree/MergedColumnOnlyOutputStream.h>
-#include <Disks/StoragePolicy.h>
 #include <Storages/MergeTree/SimpleMergeSelector.h>
 #include <Storages/MergeTree/AllMergeSelector.h>
 #include <Storages/MergeTree/TTLMergeSelector.h>
@@ -1235,7 +1234,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
             if (files_to_skip.count(it->name()))
                 continue;
 
-            String destination = new_part_tmp_path + "/";
+            String destination = new_part_tmp_path;
             String file_name = it->name();
             auto rename_it = std::find_if(files_to_rename.begin(), files_to_rename.end(), [&file_name](const auto & rename_pair) { return rename_pair.first == file_name; });
             if (rename_it != files_to_rename.end())
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
index 92c8a66e828..e130fbc1798 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
@@ -17,13 +17,23 @@ void MergeTreeDataPartTTLInfos::update(const MergeTreeDataPartTTLInfos & other_i
         updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
     }
 
+    for (const auto & [name, ttl_info] : other_infos.rows_where_ttl)
+    {
+        rows_where_ttl[name].update(ttl_info);
+        updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
+    }
+
+    for (const auto & [name, ttl_info] : other_infos.group_by_ttl)
+    {
+        group_by_ttl[name].update(ttl_info);
+        updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
+    }
+
     for (const auto & [name, ttl_info] : other_infos.recompression_ttl)
         recompression_ttl[name].update(ttl_info);
 
     for (const auto & [expression, ttl_info] : other_infos.moves_ttl)
-    {
         moves_ttl[expression].update(ttl_info);
-    }
 
     table_ttl.update(other_infos.table_ttl);
     updatePartMinMaxTTL(table_ttl.min, table_ttl.max);
@@ -59,29 +69,41 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in)
 
         updatePartMinMaxTTL(table_ttl.min, table_ttl.max);
     }
+
+    auto fill_ttl_info_map = [this](const JSON & json_part, TTLInfoMap & ttl_info_map, bool update_min_max)
+    {
+        for (auto elem : json_part) // NOLINT
+        {
+            MergeTreeDataPartTTLInfo ttl_info;
+            ttl_info.min = elem["min"].getUInt();
+            ttl_info.max = elem["max"].getUInt();
+            String expression = elem["expression"].getString();
+            ttl_info_map.emplace(expression, ttl_info);
+
+            if (update_min_max)
+                updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
+        }
+    };
+
     if (json.has("moves"))
     {
         const JSON & moves = json["moves"];
-        for (auto move : moves) // NOLINT
-        {
-            MergeTreeDataPartTTLInfo ttl_info;
-            ttl_info.min = move["min"].getUInt();
-            ttl_info.max = move["max"].getUInt();
-            String expression = move["expression"].getString();
-            moves_ttl.emplace(expression, ttl_info);
-        }
+        fill_ttl_info_map(moves, moves_ttl, false);
     }
     if (json.has("recompression"))
     {
         const JSON & recompressions = json["recompression"];
-        for (auto recompression : recompressions) // NOLINT
-        {
-            MergeTreeDataPartTTLInfo ttl_info;
-            ttl_info.min = recompression["min"].getUInt();
-            ttl_info.max = recompression["max"].getUInt();
-            String expression = recompression["expression"].getString();
-            recompression_ttl.emplace(expression, ttl_info);
-        }
+        fill_ttl_info_map(recompressions, recompression_ttl, false);
+    }
+    if (json.has("group_by"))
+    {
+        const JSON & group_by = json["group_by"];
+        fill_ttl_info_map(group_by, group_by_ttl, true);
+    }
+    if (json.has("rows_where"))
+    {
+        const JSON & rows_where = json["rows_where"];
+        fill_ttl_info_map(rows_where, rows_where_ttl, true);
     }
 }
 
@@ -118,47 +140,52 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const
         writeIntText(table_ttl.max, out);
         writeString("}", out);
     }
+
+    auto write_infos = [&out](const TTLInfoMap & infos, const String & type, bool is_first)
+    {
+        if (!is_first)
+            writeString(",", out);
+
+        writeDoubleQuotedString(type, out);
+        writeString(":[", out);
+        for (auto it = infos.begin(); it != infos.end(); ++it)
+        {
+            if (it != infos.begin())
+                writeString(",", out);
+
+            writeString(R"({"expression":)", out);
+            writeString(doubleQuoteString(it->first), out);
+            writeString(R"(,"min":)", out);
+            writeIntText(it->second.min, out);
+            writeString(R"(,"max":)", out);
+            writeIntText(it->second.max, out);
+            writeString("}", out);
+        }
+        writeString("]", out);
+    };
+
+    bool is_first = columns_ttl.empty() && !table_ttl.min;
     if (!moves_ttl.empty())
     {
-        if (!columns_ttl.empty() || table_ttl.min)
-            writeString(",", out);
-        writeString(R"("moves":[)", out);
-        for (auto it = moves_ttl.begin(); it != moves_ttl.end(); ++it)
-        {
-            if (it != moves_ttl.begin())
-                writeString(",", out);
-
-            writeString(R"({"expression":)", out);
-            writeString(doubleQuoteString(it->first), out);
-            writeString(R"(,"min":)", out);
-            writeIntText(it->second.min, out);
-            writeString(R"(,"max":)", out);
-            writeIntText(it->second.max, out);
-            writeString("}", out);
-        }
-        writeString("]", out);
+        write_infos(moves_ttl, "moves", is_first);
+        is_first = false;
     }
+
     if (!recompression_ttl.empty())
     {
-        if (!moves_ttl.empty() || !columns_ttl.empty() || table_ttl.min)
-            writeString(",", out);
-
-        writeString(R"("recompression":[)", out);
-        for (auto it = recompression_ttl.begin(); it != recompression_ttl.end(); ++it)
-        {
-            if (it != recompression_ttl.begin())
-                writeString(",", out);
-
-            writeString(R"({"expression":)", out);
-            writeString(doubleQuoteString(it->first), out);
-            writeString(R"(,"min":)", out);
-            writeIntText(it->second.min, out);
-            writeString(R"(,"max":)", out);
-            writeIntText(it->second.max, out);
-            writeString("}", out);
-        }
-        writeString("]", out);
+        write_infos(recompression_ttl, "recompression", is_first);
+        is_first = false;
     }
+
+    if (!group_by_ttl.empty())
+    {
+        write_infos(group_by_ttl, "group_by", is_first);
+        is_first = false;
+    }
+
+    if (!rows_where_ttl.empty())
+        write_infos(rows_where_ttl, "rows_where", is_first);
+
     writeString("}", out);
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
index 17239e2618a..9d1606ee44a 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
@@ -45,14 +45,17 @@ struct MergeTreeDataPartTTLInfos
     time_t part_min_ttl = 0;
     time_t part_max_ttl = 0;
 
+    TTLInfoMap rows_where_ttl;
+
     TTLInfoMap moves_ttl;
 
     TTLInfoMap recompression_ttl;
 
+    TTLInfoMap group_by_ttl;
+
     /// Return the smallest max recompression TTL value
     time_t getMinimalMaxRecompressionTTL() const;
 
-
     void read(ReadBuffer & in);
     void write(WriteBuffer & out) const;
     void update(const MergeTreeDataPartTTLInfos & other_infos);
@@ -68,6 +71,7 @@ struct MergeTreeDataPartTTLInfos
 
     bool empty() const
     {
+        /// part_min_ttl in minimum of rows, rows_where and group_by TTLs
         return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty();
     }
 };
diff --git a/src/Storages/MergeTree/MergeTreeDataPartUUID.cpp b/src/Storages/MergeTree/MergeTreeDataPartUUID.cpp
new file mode 100644
index 00000000000..17d19855798
--- /dev/null
+++ b/src/Storages/MergeTree/MergeTreeDataPartUUID.cpp
@@ -0,0 +1,38 @@
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
+
+namespace DB
+{
+
+std::vector<UUID> PartUUIDs::add(const std::vector<UUID> & new_uuids)
+{
+    std::lock_guard lock(mutex);
+    std::vector<UUID> intersection;
+
+    /// First check any presence of uuids in a uuids, return duplicates back if any
+    for (const auto & uuid : new_uuids)
+    {
+        if (uuids.find(uuid) != uuids.end())
+            intersection.emplace_back(uuid);
+    }
+
+    if (intersection.empty())
+    {
+        for (const auto & uuid : new_uuids)
+            uuids.emplace(uuid);
+    }
+    return intersection;
+}
+
+std::vector<UUID> PartUUIDs::get() const
+{
+    std::lock_guard lock(mutex);
+    return std::vector<UUID>(uuids.begin(), uuids.end());
+}
+
+bool PartUUIDs::has(const UUID & uuid) const
+{
+    std::lock_guard lock(mutex);
+    return uuids.find(uuid) != uuids.end();
+}
+
+}
diff --git a/src/Storages/MergeTree/MergeTreeDataPartUUID.h b/src/Storages/MergeTree/MergeTreeDataPartUUID.h
new file mode 100644
index 00000000000..ee3a9ee2791
--- /dev/null
+++ b/src/Storages/MergeTree/MergeTreeDataPartUUID.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <unordered_set>
+#include <Core/UUID.h>
+
+namespace DB
+{
+
+/** PartUUIDs is a uuid set to control query deduplication.
+ * The object is used in query context in both direction:
+ *  Server->Client to send all parts' UUIDs that have been read during the query
+ *  Client->Server to ignored specified parts from being processed.
+ *
+ *  Current implementation assumes a user setting allow_experimental_query_deduplication=1 is set.
+ */
+struct PartUUIDs
+{
+public:
+    /// Add new UUIDs if not duplicates found otherwise return duplicated UUIDs
+    std::vector<UUID> add(const std::vector<UUID> & uuids);
+    /// Get accumulated UUIDs
+    std::vector<UUID> get() const;
+    bool has(const UUID & uuid) const;
+
+private:
+    mutable std::mutex mutex;
+    std::unordered_set<UUID> uuids;
+};
+
+using PartUUIDsPtr = std::shared_ptr<PartUUIDs>;
+
+}
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index 81a6539780c..f2bbf53bd97 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -140,7 +140,7 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri
     /// If we didn't finished last granule than we will continue to write it from new block
     if (!last_granule.is_complete)
     {
-        if (settings.blocks_are_granules_size)
+        if (settings.can_use_adaptive_granularity && settings.blocks_are_granules_size)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Incomplete granules are not allowed while blocks are granules size. "
                 "Mark number {} (rows {}), rows written in last mark {}, rows to write in last mark from block {} (from row {}), total marks currently {}",
                 last_granule.mark_number, index_granularity.getMarkRows(last_granule.mark_number), rows_written_in_last_mark,
@@ -506,7 +506,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(IMergeTreeDataPart::Ch
     WrittenOffsetColumns offset_columns;
     if (rows_written_in_last_mark > 0)
     {
-        if (settings.blocks_are_granules_size)
+        if (settings.can_use_adaptive_granularity && settings.blocks_are_granules_size)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Incomplete granule is not allowed while blocks are granules size even for last granule. "
                 "Mark number {} (rows {}), rows written for last mark {}, total marks {}",
                 getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()), rows_written_in_last_mark, index_granularity.getMarksCount());
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 6b2e3c5a8a4..d23413f4a84 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -6,7 +6,6 @@
 #include <Poco/File.h>
 
 #include <Common/FieldVisitors.h>
-#include <Storages/MergeTree/PartitionPruner.h>
 #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
 #include <Storages/MergeTree/MergeTreeSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
@@ -15,6 +14,7 @@
 #include <Storages/MergeTree/MergeTreeIndices.h>
 #include <Storages/MergeTree/MergeTreeIndexReader.h>
 #include <Storages/MergeTree/KeyCondition.h>
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
 #include <Storages/ReadInOrderOptimizer.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
@@ -28,14 +28,15 @@
 #include <Processors/QueryPlan/FilterStep.h>
 #include <Processors/QueryPlan/ExpressionStep.h>
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
-#include <Processors/QueryPlan/AddingConstColumnStep.h>
 #include <Processors/QueryPlan/ReverseRowsStep.h>
 #include <Processors/QueryPlan/MergingSortedStep.h>
 #include <Processors/QueryPlan/UnionStep.h>
 #include <Processors/QueryPlan/MergingFinal.h>
 
+#include <Core/UUID.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Storages/VirtualColumnUtils.h>
 
@@ -60,6 +61,7 @@ namespace ErrorCodes
     extern const int TOO_MANY_ROWS;
     extern const int CANNOT_PARSE_TEXT;
     extern const int TOO_MANY_PARTITIONS;
+    extern const int DUPLICATED_PART_UUIDS;
 }
 
 
@@ -70,14 +72,27 @@ MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & d
 
 
 /// Construct a block consisting only of possible values of virtual columns
-static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts)
+static Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool with_uuid)
 {
-    auto column = ColumnString::create();
+    auto part_column = ColumnString::create();
+    auto part_uuid_column = ColumnUUID::create();
 
     for (const auto & part : parts)
-        column->insert(part->name);
+    {
+        part_column->insert(part->name);
+        if (with_uuid)
+            part_uuid_column->insert(part->uuid);
+    }
 
-    return Block{ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), "_part")};
+    if (with_uuid)
+    {
+        return Block(std::initializer_list<ColumnWithTypeAndName>{
+            ColumnWithTypeAndName(std::move(part_column), std::make_shared<DataTypeString>(), "_part"),
+            ColumnWithTypeAndName(std::move(part_uuid_column), std::make_shared<DataTypeUUID>(), "_part_uuid"),
+        });
+    }
+
+    return Block{ColumnWithTypeAndName(std::move(part_column), std::make_shared<DataTypeString>(), "_part")};
 }
 
 
@@ -160,7 +175,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
     Names virt_column_names;
     Names real_column_names;
 
+    size_t total_parts = parts.size();
     bool part_column_queried = false;
+    bool part_uuid_column_queried = false;
 
     bool sample_factor_column_queried = false;
     Float64 used_sample_factor = 1;
@@ -180,6 +197,11 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         {
             virt_column_names.push_back(name);
         }
+        else if (name == "_part_uuid")
+        {
+            part_uuid_column_queried = true;
+            virt_column_names.push_back(name);
+        }
         else if (name == "_sample_factor")
         {
             sample_factor_column_queried = true;
@@ -197,9 +219,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
     if (real_column_names.empty())
         real_column_names.push_back(ExpressionActions::getSmallestColumn(available_real_columns));
 
-    /// If `_part` virtual column is requested, we try to use it as an index.
-    Block virtual_columns_block = getBlockWithPartColumn(parts);
-    if (part_column_queried)
+    /// If `_part` or `_part_uuid` virtual columns are requested, we try to filter out data by them.
+    Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, part_uuid_column_queried);
+    if (part_column_queried || part_uuid_column_queried)
         VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context);
 
     auto part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
@@ -243,40 +265,13 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         }
     }
 
-    /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`,
-    ///  as well as `max_block_number_to_read`.
-    {
-        auto prev_parts = parts;
-        parts.clear();
+    const Context & query_context = context.hasQueryContext() ? context.getQueryContext() : context;
 
-        for (const auto & part : prev_parts)
-        {
-            if (part_values.find(part->name) == part_values.end())
-                continue;
+    if (query_context.getSettingsRef().allow_experimental_query_deduplication)
+        selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, partition_pruner, max_block_numbers_to_read, query_context);
+    else
+        selectPartsToRead(parts, part_values, minmax_idx_condition, partition_pruner, max_block_numbers_to_read);
 
-            if (part->isEmpty())
-                continue;
-
-            if (minmax_idx_condition && !minmax_idx_condition->checkInHyperrectangle(
-                    part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true)
-                continue;
-
-            if (partition_pruner)
-            {
-                if (partition_pruner->canBePruned(part))
-                    continue;
-            }
-
-            if (max_block_numbers_to_read)
-            {
-                auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
-                if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second)
-                    continue;
-            }
-
-            parts.push_back(part);
-        }
-    }
 
     /// Sampling.
     Names column_names_to_read = real_column_names;
@@ -547,7 +542,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         .min_bytes_to_use_direct_io = settings.min_bytes_to_use_direct_io,
         .min_bytes_to_use_mmap_io = settings.min_bytes_to_use_mmap_io,
         .max_read_buffer_size = settings.max_read_buffer_size,
-        .save_marks_in_cache = true
+        .save_marks_in_cache = true,
+        .checksum_on_read = settings.checksum_on_read,
     };
 
     /// PREWHERE
@@ -555,7 +551,21 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
     if (select.prewhere())
         prewhere_column = select.prewhere()->getColumnName();
 
-    std::vector<std::pair<MergeTreeIndexPtr, MergeTreeIndexConditionPtr>> useful_indices;
+    struct DataSkippingIndexAndCondition
+    {
+        MergeTreeIndexPtr index;
+        MergeTreeIndexConditionPtr condition;
+        std::atomic<size_t> total_granules;
+        std::atomic<size_t> granules_dropped;
+
+        DataSkippingIndexAndCondition(MergeTreeIndexPtr index_, MergeTreeIndexConditionPtr condition_)
+            : index(index_)
+            , condition(condition_)
+            , total_granules(0)
+            , granules_dropped(0)
+        {}
+    };
+    std::list<DataSkippingIndexAndCondition> useful_indices;
 
     for (const auto & index : metadata_snapshot->getSecondaryIndices())
     {
@@ -584,7 +594,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
 
         std::unordered_set<std::string> useful_indices_names;
         for (const auto & useful_index : useful_indices)
-            useful_indices_names.insert(useful_index.first->index.name);
+            useful_indices_names.insert(useful_index.index->index.name);
 
         for (const auto & index_name : forced_indices)
         {
@@ -600,6 +610,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
     RangesInDataParts parts_with_ranges(parts.size());
     size_t sum_marks = 0;
     std::atomic<size_t> sum_marks_pk = 0;
+    std::atomic<size_t> total_marks_pk = 0;
+
     size_t sum_ranges = 0;
 
     /// Let's find what range to read from each part.
@@ -620,6 +632,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
 
             RangesInDataPart ranges(part, part_index);
 
+            total_marks_pk.fetch_add(part->index_granularity.getMarksCount(), std::memory_order_relaxed);
+
             if (metadata_snapshot->hasPrimaryKey())
                 ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings, log);
             else
@@ -635,9 +649,20 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
 
             sum_marks_pk.fetch_add(ranges.getMarksCount(), std::memory_order_relaxed);
 
-            for (const auto & index_and_condition : useful_indices)
+            for (auto & index_and_condition : useful_indices)
+            {
+                size_t total_granules = 0;
+                size_t granules_dropped = 0;
                 ranges.ranges = filterMarksUsingIndex(
-                        index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings, reader_settings, log);
+                    index_and_condition.index, index_and_condition.condition,
+                    part, ranges.ranges,
+                    settings, reader_settings,
+                    total_granules, granules_dropped,
+                    log);
+
+                index_and_condition.total_granules.fetch_add(total_granules, std::memory_order_relaxed);
+                index_and_condition.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed);
+            }
 
             if (!ranges.ranges.empty())
             {
@@ -702,13 +727,26 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         parts_with_ranges.resize(next_part);
     }
 
-    LOG_DEBUG(log, "Selected {} parts by partition key, {} parts by primary key, {} marks by primary key, {} marks to read from {} ranges", parts.size(), parts_with_ranges.size(), sum_marks_pk.load(std::memory_order_relaxed), sum_marks, sum_ranges);
+    for (const auto & index_and_condition : useful_indices)
+    {
+        const auto & index_name = index_and_condition.index->index.name;
+        LOG_DEBUG(log, "Index {} has dropped {}/{} granules.",
+            backQuote(index_name),
+            index_and_condition.granules_dropped, index_and_condition.total_granules);
+    }
+
+    LOG_DEBUG(log, "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges",
+        parts.size(), total_parts, parts_with_ranges.size(),
+        sum_marks_pk.load(std::memory_order_relaxed),
+        total_marks_pk.load(std::memory_order_relaxed),
+        sum_marks, sum_ranges);
 
     if (parts_with_ranges.empty())
         return std::make_unique<QueryPlan>();
 
+    const auto data_settings = data.getSettings();
     auto max_partitions_to_read
-        = settings.max_partitions_to_read.changed ? settings.max_partitions_to_read : data.getSettings()->max_partitions_to_read;
+        = settings.max_partitions_to_read.changed ? settings.max_partitions_to_read : data_settings->max_partitions_to_read;
     if (max_partitions_to_read > 0)
     {
         std::set<String> partitions;
@@ -722,6 +760,18 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
                 max_partitions_to_read);
     }
 
+    String query_id;
+    if (data_settings->max_concurrent_queries > 0)
+    {
+        if (data_settings->min_marks_to_honor_max_concurrent_queries > 0
+            && sum_marks >= data_settings->min_marks_to_honor_max_concurrent_queries)
+        {
+            query_id = context.getCurrentQueryId();
+            if (!query_id.empty())
+                data.insertQueryIdOrThrow(query_id, data_settings->max_concurrent_queries);
+        }
+    }
+
     ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size());
     ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges);
     ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks);
@@ -758,7 +808,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
             virt_column_names,
             settings,
             reader_settings,
-            result_projection);
+            result_projection,
+            query_id);
     }
     else if ((settings.optimize_read_in_order || settings.optimize_aggregation_in_order) && query_info.input_order_info)
     {
@@ -781,7 +832,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
             virt_column_names,
             settings,
             reader_settings,
-            result_projection);
+            result_projection,
+            query_id);
     }
     else
     {
@@ -795,7 +847,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
             query_info,
             virt_column_names,
             settings,
-            reader_settings);
+            reader_settings,
+            query_id);
     }
 
     if (!plan)
@@ -828,7 +881,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         column.type = std::make_shared<DataTypeFloat64>();
         column.column = column.type->createColumnConst(0, Field(used_sample_factor));
 
-        auto adding_column = std::make_unique<AddingConstColumnStep>(plan->getCurrentDataStream(), std::move(column));
+        auto adding_column_action = ActionsDAG::makeAddingColumnActions(std::move(column));
+
+        auto adding_column = std::make_unique<ExpressionStep>(plan->getCurrentDataStream(), std::move(adding_column_action));
         adding_column->setStepDescription("Add _sample_factor column");
         plan->addStep(std::move(adding_column));
     }
@@ -895,7 +950,7 @@ size_t minMarksForConcurrentRead(
 
 }
 
-static QueryPlanPtr createPlanFromPipe(Pipe pipe, const std::string & description = "")
+static QueryPlanPtr createPlanFromPipe(Pipe pipe, const String & query_id, const MergeTreeData & data, const std::string & description = "")
 {
     auto plan = std::make_unique<QueryPlan>();
 
@@ -903,6 +958,10 @@ static QueryPlanPtr createPlanFromPipe(Pipe pipe, const std::string & descriptio
     if (!description.empty())
         storage_name += ' ' + description;
 
+    // Attach QueryIdHolder if needed
+    if (!query_id.empty())
+        pipe.addQueryIdHolder(std::make_shared<QueryIdHolder>(query_id, data));
+
     auto step = std::make_unique<ReadFromStorageStep>(std::move(pipe), storage_name);
     plan->addStep(std::move(step));
     return plan;
@@ -918,7 +977,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
     const SelectQueryInfo & query_info,
     const Names & virt_columns,
     const Settings & settings,
-    const MergeTreeReaderSettings & reader_settings) const
+    const MergeTreeReaderSettings & reader_settings,
+    const String & query_id) const
 {
     /// Count marks for each part.
     std::vector<size_t> sum_marks_in_parts(parts.size());
@@ -1003,7 +1063,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
             res.emplace_back(std::move(source));
         }
 
-        return createPlanFromPipe(Pipe::unitePipes(std::move(res)));
+        return createPlanFromPipe(Pipe::unitePipes(std::move(res)), query_id, data);
     }
     else
     {
@@ -1027,7 +1087,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
         if (pipe.numOutputPorts() > 1)
             pipe.addTransform(std::make_shared<ConcatProcessor>(pipe.getHeader(), pipe.numOutputPorts()));
 
-        return createPlanFromPipe(std::move(pipe));
+        return createPlanFromPipe(std::move(pipe), query_id, data);
     }
 }
 
@@ -1051,7 +1111,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
     const Names & virt_columns,
     const Settings & settings,
     const MergeTreeReaderSettings & reader_settings,
-    ActionsDAGPtr & out_projection) const
+    ActionsDAGPtr & out_projection,
+    const String & query_id) const
 {
     size_t sum_marks = 0;
     const InputOrderInfoPtr & input_order_info = query_info.input_order_info;
@@ -1242,7 +1303,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
             }
         }
 
-        auto plan = createPlanFromPipe(Pipe::unitePipes(std::move(pipes)), " with order");
+        auto plan = createPlanFromPipe(Pipe::unitePipes(std::move(pipes)), query_id, data, "with order");
 
         if (input_order_info->direction != 1)
         {
@@ -1310,7 +1371,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
     const Names & virt_columns,
     const Settings & settings,
     const MergeTreeReaderSettings & reader_settings,
-    ActionsDAGPtr & out_projection) const
+    ActionsDAGPtr & out_projection,
+    const String & query_id) const
 {
     const auto data_settings = data.getSettings();
     size_t sum_marks = 0;
@@ -1369,6 +1431,12 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
 
     std::vector<QueryPlanPtr> partition_plans;
 
+    /// If do_not_merge_across_partitions_select_final is true and num_streams > 1
+    /// we will store lonely parts with level > 0 to use parallel select on them.
+    std::vector<RangesInDataPart> lonely_parts;
+    size_t total_rows_in_lonely_parts = 0;
+    size_t sum_marks_in_lonely_parts = 0;
+
     for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index)
     {
         QueryPlanPtr plan;
@@ -1376,25 +1444,41 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
         {
             Pipes pipes;
 
-            for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it)
+            /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
+            /// with level > 0 then we won't postprocess this part and if num_streams > 1 we
+            /// can use parallel select on such parts. We save such parts in one vector and then use
+            /// MergeTreeReadPool and MergeTreeThreadSelectBlockInputProcessor for parallel select.
+            if (num_streams > 1 && settings.do_not_merge_across_partitions_select_final &&
+                std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 &&
+                parts_to_merge_ranges[range_index]->data_part->info.level > 0)
             {
-                auto source_processor = std::make_shared<MergeTreeSelectProcessor>(
-                    data,
-                    metadata_snapshot,
-                    part_it->data_part,
-                    max_block_size,
-                    settings.preferred_block_size_bytes,
-                    settings.preferred_max_column_in_block_size_bytes,
-                    column_names,
-                    part_it->ranges,
-                    use_uncompressed_cache,
-                    query_info.prewhere_info,
-                    true,
-                    reader_settings,
-                    virt_columns,
-                    part_it->part_index_in_query);
+                total_rows_in_lonely_parts += parts_to_merge_ranges[range_index]->getRowsCount();
+                sum_marks_in_lonely_parts += parts_to_merge_ranges[range_index]->getMarksCount();
+                lonely_parts.push_back(std::move(*parts_to_merge_ranges[range_index]));
+                continue;
+            }
+            else
+            {
+                for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it)
+                {
+                    auto source_processor = std::make_shared<MergeTreeSelectProcessor>(
+                        data,
+                        metadata_snapshot,
+                        part_it->data_part,
+                        max_block_size,
+                        settings.preferred_block_size_bytes,
+                        settings.preferred_max_column_in_block_size_bytes,
+                        column_names,
+                        part_it->ranges,
+                        use_uncompressed_cache,
+                        query_info.prewhere_info,
+                        true,
+                        reader_settings,
+                        virt_columns,
+                        part_it->part_index_in_query);
 
-                pipes.emplace_back(std::move(source_processor));
+                    pipes.emplace_back(std::move(source_processor));
+                }
             }
 
             if (pipes.empty())
@@ -1406,9 +1490,16 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
             if (!out_projection)
                 out_projection = createProjection(pipe.getHeader());
 
-            plan = createPlanFromPipe(std::move(pipe), "with final");
+            plan = createPlanFromPipe(std::move(pipe), query_id, data, "with final");
         }
 
+        auto expression_step = std::make_unique<ExpressionStep>(
+            plan->getCurrentDataStream(),
+            metadata_snapshot->getSortingKey().expression->getActionsDAG().clone());
+
+        expression_step->setStepDescription("Calculate sorting key expression");
+        plan->addStep(std::move(expression_step));
+
         /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
         /// with level > 0 then we won't postprocess this part
         if (settings.do_not_merge_across_partitions_select_final &&
@@ -1419,13 +1510,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
             continue;
         }
 
-        auto expression_step = std::make_unique<ExpressionStep>(
-                plan->getCurrentDataStream(),
-                metadata_snapshot->getSortingKey().expression->getActionsDAG().clone());
-
-        expression_step->setStepDescription("Calculate sorting key expression");
-        plan->addStep(std::move(expression_step));
-
         Names sort_columns = metadata_snapshot->getSortingKeyColumns();
         SortDescription sort_description;
         size_t sort_columns_size = sort_columns.size();
@@ -1451,6 +1535,69 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
         partition_plans.emplace_back(std::move(plan));
     }
 
+    if (!lonely_parts.empty())
+    {
+        Pipes pipes;
+
+        size_t num_streams_for_lonely_parts = num_streams * lonely_parts.size();
+
+        const size_t min_marks_for_concurrent_read = minMarksForConcurrentRead(
+            settings.merge_tree_min_rows_for_concurrent_read,
+            settings.merge_tree_min_bytes_for_concurrent_read,
+            data_settings->index_granularity,
+            index_granularity_bytes,
+            sum_marks_in_lonely_parts);
+
+        /// Reduce the number of num_streams_for_lonely_parts if the data is small.
+        if (sum_marks_in_lonely_parts < num_streams_for_lonely_parts * min_marks_for_concurrent_read && lonely_parts.size() < num_streams_for_lonely_parts)
+            num_streams_for_lonely_parts = std::max((sum_marks_in_lonely_parts + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, lonely_parts.size());
+
+
+        MergeTreeReadPoolPtr pool = std::make_shared<MergeTreeReadPool>(
+            num_streams_for_lonely_parts,
+            sum_marks_in_lonely_parts,
+            min_marks_for_concurrent_read,
+            std::move(lonely_parts),
+            data,
+            metadata_snapshot,
+            query_info.prewhere_info,
+            true,
+            column_names,
+            MergeTreeReadPool::BackoffSettings(settings),
+            settings.preferred_block_size_bytes,
+            false);
+
+        LOG_TRACE(log, "Reading approx. {} rows with {} streams", total_rows_in_lonely_parts, num_streams_for_lonely_parts);
+
+        for (size_t i = 0; i < num_streams_for_lonely_parts; ++i)
+        {
+            auto source = std::make_shared<MergeTreeThreadSelectBlockInputProcessor>(
+                i, pool, min_marks_for_concurrent_read, max_block_size,
+                settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes,
+                data, metadata_snapshot, use_uncompressed_cache,
+                query_info.prewhere_info, reader_settings, virt_columns);
+
+            pipes.emplace_back(std::move(source));
+        }
+
+        auto pipe = Pipe::unitePipes(std::move(pipes));
+
+        /// Drop temporary columns, added by 'sorting_key_expr'
+        if (!out_projection)
+            out_projection = createProjection(pipe.getHeader());
+
+        QueryPlanPtr plan = createPlanFromPipe(std::move(pipe), query_id, data, "with final");
+
+        auto expression_step = std::make_unique<ExpressionStep>(
+            plan->getCurrentDataStream(),
+            metadata_snapshot->getSortingKey().expression->getActionsDAG().clone());
+
+        expression_step->setStepDescription("Calculate sorting key expression");
+        plan->addStep(std::move(expression_step));
+
+        partition_plans.emplace_back(std::move(plan));
+    }
+
     if (partition_plans.empty())
         return {};
 
@@ -1490,8 +1637,6 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
     /// If index is not used.
     if (key_condition.alwaysUnknownOrTrue())
     {
-        LOG_TRACE(log, "Not using primary index on part {}", part->name);
-
         if (has_final_mark)
             res.push_back(MarkRange(0, marks_count - 1));
         else
@@ -1664,6 +1809,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
     const MarkRanges & ranges,
     const Settings & settings,
     const MergeTreeReaderSettings & reader_settings,
+    size_t & total_granules,
+    size_t & granules_dropped,
     Poco::Logger * log)
 {
     if (!part->volume->getDisk()->exists(part->getFullRelativePath() + index_helper->getFileName() + ".idx"))
@@ -1680,9 +1827,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
         part->index_granularity_info.fixed_index_granularity,
         part->index_granularity_info.index_granularity_bytes);
 
-    size_t granules_dropped = 0;
-    size_t total_granules = 0;
-
     size_t marks_count = part->getMarksCount();
     size_t final_mark = part->index_granularity.hasFinalMark();
     size_t index_marks_count = (marks_count - final_mark + index_granularity - 1) / index_granularity;
@@ -1734,10 +1878,137 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
         last_index_mark = index_range.end - 1;
     }
 
-    LOG_DEBUG(log, "Index {} has dropped {} / {} granules.", backQuote(index_helper->index.name), granules_dropped, total_granules);
-
     return res;
 }
 
+void MergeTreeDataSelectExecutor::selectPartsToRead(
+    MergeTreeData::DataPartsVector & parts,
+    const std::unordered_set<String> & part_values,
+    const std::optional<KeyCondition> & minmax_idx_condition,
+    std::optional<PartitionPruner> & partition_pruner,
+    const PartitionIdToMaxBlock * max_block_numbers_to_read) const
+{
+    auto prev_parts = parts;
+    parts.clear();
+
+    for (const auto & part : prev_parts)
+    {
+        if (part_values.find(part->name) == part_values.end())
+            continue;
+
+        if (part->isEmpty())
+            continue;
+
+        if (minmax_idx_condition && !minmax_idx_condition->checkInHyperrectangle(
+                part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true)
+            continue;
+
+        if (partition_pruner)
+        {
+            if (partition_pruner->canBePruned(part))
+                continue;
+        }
+
+        if (max_block_numbers_to_read)
+        {
+            auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
+            if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second)
+                continue;
+        }
+
+        parts.push_back(part);
+    }
+}
+
+void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
+    MergeTreeData::DataPartsVector & parts,
+    const std::unordered_set<String> & part_values,
+    const std::optional<KeyCondition> & minmax_idx_condition,
+    std::optional<PartitionPruner> & partition_pruner,
+    const PartitionIdToMaxBlock * max_block_numbers_to_read,
+    const Context & query_context) const
+{
+    /// const_cast to add UUIDs to context. Bad practice.
+    Context & non_const_context = const_cast<Context &>(query_context);
+
+    /// process_parts prepare parts that have to be read for the query,
+    /// returns false if duplicated parts' UUID have been met
+    auto select_parts = [&] (MergeTreeData::DataPartsVector & selected_parts) -> bool
+    {
+        auto ignored_part_uuids = non_const_context.getIgnoredPartUUIDs();
+        std::unordered_set<UUID> temp_part_uuids;
+
+        auto prev_parts = selected_parts;
+        selected_parts.clear();
+
+        for (const auto & part : prev_parts)
+        {
+            if (part_values.find(part->name) == part_values.end())
+                continue;
+
+            if (part->isEmpty())
+                continue;
+
+            if (minmax_idx_condition
+                && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, data.minmax_idx_column_types)
+                        .can_be_true)
+                continue;
+
+            if (partition_pruner)
+            {
+                if (partition_pruner->canBePruned(part))
+                    continue;
+            }
+
+            if (max_block_numbers_to_read)
+            {
+                auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
+                if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second)
+                    continue;
+            }
+
+            /// populate UUIDs and exclude ignored parts if enabled
+            if (part->uuid != UUIDHelpers::Nil)
+            {
+                /// Skip the part if its uuid is meant to be excluded
+                if (ignored_part_uuids->has(part->uuid))
+                    continue;
+
+                auto result = temp_part_uuids.insert(part->uuid);
+                if (!result.second)
+                    throw Exception("Found a part with the same UUID on the same replica.", ErrorCodes::LOGICAL_ERROR);
+            }
+
+            selected_parts.push_back(part);
+        }
+
+        if (!temp_part_uuids.empty())
+        {
+            auto duplicates = non_const_context.getPartUUIDs()->add(std::vector<UUID>{temp_part_uuids.begin(), temp_part_uuids.end()});
+            if (!duplicates.empty())
+            {
+                /// on a local replica with prefer_localhost_replica=1 if any duplicates appeared during the first pass,
+                /// adding them to the exclusion, so they will be skipped on second pass
+                non_const_context.getIgnoredPartUUIDs()->add(duplicates);
+                return false;
+            }
+        }
+
+        return true;
+    };
+
+    /// Process parts that have to be read for a query.
+    auto needs_retry = !select_parts(parts);
+
+    /// If any duplicated part UUIDs met during the first step, try to ignore them in second pass
+    if (needs_retry)
+    {
+        LOG_DEBUG(log, "Found duplicate uuids locally, will retry part selection without them");
+
+        /// Second attempt didn't help, throw an exception
+        if (!select_parts(parts))
+            throw Exception("Found duplicate UUIDs while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
+    }
+}
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index af4e3717749..7692424dfb5 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -4,6 +4,7 @@
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/RangesInDataPart.h>
+#include <Storages/MergeTree/PartitionPruner.h>
 
 
 namespace DB
@@ -58,7 +59,8 @@ private:
         const SelectQueryInfo & query_info,
         const Names & virt_columns,
         const Settings & settings,
-        const MergeTreeReaderSettings & reader_settings) const;
+        const MergeTreeReaderSettings & reader_settings,
+        const String & query_id) const;
 
     /// out_projection - save projection only with columns, requested to read
     QueryPlanPtr spreadMarkRangesAmongStreamsWithOrder(
@@ -73,7 +75,8 @@ private:
         const Names & virt_columns,
         const Settings & settings,
         const MergeTreeReaderSettings & reader_settings,
-        ActionsDAGPtr & out_projection) const;
+        ActionsDAGPtr & out_projection,
+        const String & query_id) const;
 
     QueryPlanPtr spreadMarkRangesAmongStreamsFinal(
         RangesInDataParts && parts,
@@ -86,7 +89,8 @@ private:
         const Names & virt_columns,
         const Settings & settings,
         const MergeTreeReaderSettings & reader_settings,
-        ActionsDAGPtr & out_projection) const;
+        ActionsDAGPtr & out_projection,
+        const String & query_id) const;
 
     /// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index.
     size_t getApproximateTotalRowsToRead(
@@ -109,7 +113,27 @@ private:
         const MarkRanges & ranges,
         const Settings & settings,
         const MergeTreeReaderSettings & reader_settings,
+        size_t & total_granules,
+        size_t & granules_dropped,
         Poco::Logger * log);
+
+    /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`,
+    ///  as well as `max_block_number_to_read`.
+    void selectPartsToRead(
+        MergeTreeData::DataPartsVector & parts,
+        const std::unordered_set<String> & part_values,
+        const std::optional<KeyCondition> & minmax_idx_condition,
+        std::optional<PartitionPruner> & partition_pruner,
+        const PartitionIdToMaxBlock * max_block_numbers_to_read) const;
+
+    /// Same as previous but also skip parts uuids if any to the query context, or skip parts which uuids marked as excluded.
+    void selectPartsToReadWithUUIDFilter(
+        MergeTreeData::DataPartsVector & parts,
+        const std::unordered_set<String> & part_values,
+        const std::optional<KeyCondition> & minmax_idx_condition,
+        std::optional<PartitionPruner> & partition_pruner,
+        const PartitionIdToMaxBlock * max_block_numbers_to_read,
+        const Context & query_context) const;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index b49c07bc918..5a9bdd90bc8 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -12,6 +12,7 @@
 #include <IO/WriteHelpers.h>
 #include <Poco/File.h>
 #include <Common/typeid_cast.h>
+#include <DataStreams/ITTLAlgorithm.h>
 
 #include <Parsers/queryToString.h>
 
@@ -91,31 +92,23 @@ void updateTTL(
     const TTLDescription & ttl_entry,
     IMergeTreeDataPart::TTLInfos & ttl_infos,
     DB::MergeTreeDataPartTTLInfo & ttl_info,
-    Block & block,
+    const Block & block,
     bool update_part_min_max_ttls)
 {
-    bool remove_column = false;
-    if (!block.has(ttl_entry.result_column))
-    {
-        ttl_entry.expression->execute(block);
-        remove_column = true;
-    }
+    auto ttl_column = ITTLAlgorithm::executeExpressionAndGetColumn(ttl_entry.expression, block, ttl_entry.result_column);
 
-    const auto & current = block.getByName(ttl_entry.result_column);
-
-    const IColumn * column = current.column.get();
-    if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
+    if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(ttl_column.get()))
     {
         const auto & date_lut = DateLUT::instance();
         for (const auto & val : column_date->getData())
             ttl_info.update(date_lut.fromDayNum(DayNum(val)));
     }
-    else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
+    else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(ttl_column.get()))
     {
         for (const auto & val : column_date_time->getData())
             ttl_info.update(val);
     }
-    else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(column))
+    else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(ttl_column.get()))
     {
         if (typeid_cast<const ColumnUInt16 *>(&column_const->getDataColumn()))
         {
@@ -134,9 +127,6 @@ void updateTTL(
 
     if (update_part_min_max_ttls)
         ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
-
-    if (remove_column)
-        block.erase(ttl_entry.result_column);
 }
 
 }
@@ -383,6 +373,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     if (metadata_snapshot->hasRowsTTL())
         updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
 
+    for (const auto & ttl_entry : metadata_snapshot->getGroupByTTLs())
+        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true);
+
+    for (const auto & ttl_entry : metadata_snapshot->getRowsWhereTTLs())
+        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true);
+
     for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
         updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
 
diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h
index d82aa7dd7c2..f2469494792 100644
--- a/src/Storages/MergeTree/MergeTreeIOSettings.h
+++ b/src/Storages/MergeTree/MergeTreeIOSettings.h
@@ -16,6 +16,8 @@ struct MergeTreeReaderSettings
     bool save_marks_in_cache = false;
     /// Convert old-style nested (single arrays with same prefix, `n.a`, `n.b`...) to subcolumns of data type Nested.
     bool convert_nested_to_subcolumns = false;
+    /// Validate checksums on reading (should be always enabled in production).
+    bool checksum_on_read = true;
 };
 
 struct MergeTreeWriterSettings
diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index c140ebafec0..3e8b9cc704b 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -357,7 +357,7 @@ bool MergeTreeConditionFullText::atomFromAST(
             return false;
         }
 
-        if (key_arg_pos == 1 && (func_name != "equals" || func_name != "notEquals"))
+        if (key_arg_pos == 1 && (func_name != "equals" && func_name != "notEquals"))
             return false;
         else if (!token_extractor->supportLike() && (func_name == "like" || func_name == "notLike"))
             return false;
diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index c13146bd35c..6d4d8737f10 100644
--- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -414,18 +414,19 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con
         end -= 64;
         const auto * pos = end;
         UInt64 val =
-                static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)),
                         zero16)))
-                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 16)),
                         zero16))) << 16u)
-                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 32)),
                         zero16))) << 32u)
-                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 48)),
                         zero16))) << 48u);
+        val = ~val;
         if (val == 0)
             count += 64;
         else
@@ -866,7 +867,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
     if (result.totalRowsPerGranule() == 0)
         result.setFilterConstFalse();
     /// If we need to filter in PREWHERE
-    else if (prewhere->need_filter || result.need_filter)
+    else if (prewhere->need_filter || result.need_filter || prewhere->remove_prewhere_column)
     {
         /// If there is a filter and without optimized
         if (result.getFilter() && last_reader_in_chain)
diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h
index aa6811661e6..366e9a2381a 100644
--- a/src/Storages/MergeTree/MergeTreeReadPool.h
+++ b/src/Storages/MergeTree/MergeTreeReadPool.h
@@ -71,7 +71,8 @@ private:
 public:
     MergeTreeReadPool(
         const size_t threads_, const size_t sum_marks_, const size_t min_marks_for_concurrent_read_,
-        RangesInDataParts && parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_,
+        RangesInDataParts && parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_,
+        const PrewhereInfoPtr & prewhere_info_,
         const bool check_columns_, const Names & column_names_,
         const BackoffSettings & backoff_settings_, size_t preferred_block_size_bytes_,
         const bool do_not_steal_tasks_ = false);
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index 635c59cf19a..67268e8afd8 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -92,6 +92,9 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
             if (profile_callback_)
                 buffer->setProfileCallback(profile_callback_, clock_type_);
 
+            if (!settings.checksum_on_read)
+                buffer->disableChecksumming();
+
             cached_buffer = std::move(buffer);
             data_buffer = cached_buffer.get();
         }
@@ -106,6 +109,9 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
             if (profile_callback_)
                 buffer->setProfileCallback(profile_callback_, clock_type_);
 
+            if (!settings.checksum_on_read)
+                buffer->disableChecksumming();
+
             non_cached_buffer = std::move(buffer);
             data_buffer = non_cached_buffer.get();
         }
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
index 1754fb201eb..fd251497d7c 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
@@ -96,6 +96,9 @@ MergeTreeReaderStream::MergeTreeReaderStream(
         if (profile_callback)
             buffer->setProfileCallback(profile_callback, clock_type);
 
+        if (!settings.checksum_on_read)
+            buffer->disableChecksumming();
+
         cached_buffer = std::move(buffer);
         data_buffer = cached_buffer.get();
     }
@@ -109,6 +112,9 @@ MergeTreeReaderStream::MergeTreeReaderStream(
         if (profile_callback)
             buffer->setProfileCallback(profile_callback, clock_type);
 
+        if (!settings.checksum_on_read)
+            buffer->disableChecksumming();
+
         non_cached_buffer = std::move(buffer);
         data_buffer = non_cached_buffer.get();
     }
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 7f23a1a42ab..16657b4083d 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -16,6 +16,10 @@ class ASTStorage;
 struct Settings;
 
 
+/** These settings represent fine tunes for internal details of MergeTree storages
+  * and should not be changed by the user without a reason.
+  */
+
 #define LIST_OF_MERGE_TREE_SETTINGS(M) \
     M(UInt64, min_compress_block_size, 0, "When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used.", 0) \
     M(UInt64, max_compress_block_size, 0, "Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used.", 0) \
@@ -40,7 +44,7 @@ struct Settings;
     M(UInt64, number_of_free_entries_in_pool_to_execute_mutation, 10, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
     M(UInt64, max_number_of_merges_with_ttl_in_pool, 2, "When there is more than specified number of merges with TTL entries in pool, do not assign new merge with TTL. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
     M(Seconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \
-    M(Seconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.", 0) \
+    M(Seconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories. You should not lower this value because merges and mutations may not be able to work with low value of this setting.", 0) \
     M(Seconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \
     M(UInt64, min_rows_to_fsync_after_merge, 0, "Minimal number of rows to do fsync for part after merge (0 - disabled)", 0) \
     M(UInt64, min_compressed_bytes_to_fsync_after_merge, 0, "Minimal number of compressed bytes to do fsync for part after merge (0 - disabled)", 0) \
@@ -53,7 +57,9 @@ struct Settings;
     \
     /** Inserts settings. */ \
     M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \
+    M(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \
     M(UInt64, parts_to_throw_insert, 300, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \
+    M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \
     M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \
     \
@@ -111,6 +117,8 @@ struct Settings;
     M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \
     M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \
     M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \
+    M(UInt64, max_concurrent_queries, 0, "Max number of concurrently executed queries related to the MergeTree table (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \
+    M(UInt64, min_marks_to_honor_max_concurrent_queries, 0, "Minimal number of marks to honor the MergeTree-level's max_concurrent_queries (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \
     \
     /** Obsolete settings. Kept for backward compatibility only. */ \
     M(UInt64, min_relative_delay_to_yield_leadership, 120, "Obsolete setting, does nothing.", 0) \
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 5d6b74cabe9..34cac56d74c 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -30,7 +30,7 @@ static constexpr auto threshold = 2;
 MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
     SelectQueryInfo & query_info,
     const Context & context,
-    const MergeTreeData & data,
+    std::unordered_map<std::string, UInt64> column_sizes_,
     const StorageMetadataPtr & metadata_snapshot,
     const Names & queried_columns_,
     Poco::Logger * log_)
@@ -39,28 +39,20 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
     , queried_columns{queried_columns_}
     , block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)}
     , log{log_}
+    , column_sizes{std::move(column_sizes_)}
 {
     const auto & primary_key = metadata_snapshot->getPrimaryKey();
     if (!primary_key.column_names.empty())
         first_primary_key_column = primary_key.column_names[0];
 
-    calculateColumnSizes(data, queried_columns);
+    for (const auto & [_, size] : column_sizes)
+        total_size_of_queried_columns += size;
+
     determineArrayJoinedNames(query_info.query->as<ASTSelectQuery &>());
     optimize(query_info.query->as<ASTSelectQuery &>());
 }
 
 
-void MergeTreeWhereOptimizer::calculateColumnSizes(const MergeTreeData & data, const Names & column_names)
-{
-    for (const auto & column_name : column_names)
-    {
-        UInt64 size = data.getColumnCompressedSize(column_name);
-        column_sizes[column_name] = size;
-        total_size_of_queried_columns += size;
-    }
-}
-
-
 static void collectIdentifiersNoSubqueries(const ASTPtr & ast, NameSet & set)
 {
     if (auto opt_name = tryGetIdentifierName(ast))
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index 939c265b3e5..cad77fb9eed 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -33,7 +33,7 @@ public:
     MergeTreeWhereOptimizer(
         SelectQueryInfo & query_info,
         const Context & context,
-        const MergeTreeData & data,
+        std::unordered_map<std::string, UInt64> column_sizes_,
         const StorageMetadataPtr & metadata_snapshot,
         const Names & queried_columns_,
         Poco::Logger * log_);
@@ -75,8 +75,6 @@ private:
     /// Transform Conditions list to WHERE or PREWHERE expression.
     static ASTPtr reconstruct(const Conditions & conditions);
 
-    void calculateColumnSizes(const MergeTreeData & data, const Names & column_names);
-
     void optimizeConjunction(ASTSelectQuery & select, ASTFunction * const fun) const;
 
     void optimizeArbitrary(ASTSelectQuery & select) const;
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index 22cb5ed6e9c..b2a144ca748 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -74,20 +74,9 @@ size_t ReplicatedMergeTreePartCheckThread::size() const
 }
 
 
-void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & part_name)
+ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreePartCheckThread::searchForMissingPartOnOtherReplicas(const String & part_name)
 {
     auto zookeeper = storage.getZooKeeper();
-    String part_path = storage.replica_path + "/parts/" + part_name;
-
-    /// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue.
-    if (zookeeper->exists(part_path))
-    {
-        LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally. Removing from ZooKeeper and queueing a fetch.", part_name);
-        ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
-
-        storage.removePartAndEnqueueFetch(part_name);
-        return;
-    }
 
     /// If the part is not in ZooKeeper, we'll check if it's at least somewhere.
     auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version);
@@ -115,7 +104,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
         *   and don't delete the queue entry when in doubt.
         */
 
-    LOG_WARNING(log, "Checking if anyone has a part covering {}.", part_name);
+    LOG_WARNING(log, "Checking if anyone has a part {} or covering part.", part_name);
 
     bool found_part_with_the_same_min_block = false;
     bool found_part_with_the_same_max_block = false;
@@ -123,15 +112,27 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
     Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas");
     for (const String & replica : replicas)
     {
-        Strings parts = zookeeper->getChildren(storage.zookeeper_path + "/replicas/" + replica + "/parts");
+        String replica_path = storage.zookeeper_path + "/replicas/" + replica;
+
+        Strings parts = zookeeper->getChildren(replica_path + "/parts");
         for (const String & part_on_replica : parts)
         {
             auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica, storage.format_version);
 
+            if (part_info == part_on_replica_info)
+            {
+                /// Found missing part at ourself. If we are here then something wrong with this part, so skipping.
+                if (replica_path == storage.replica_path)
+                    continue;
+
+                LOG_WARNING(log, "Found the missing part {} at {} on {}", part_name, part_on_replica, replica);
+                return MissingPartSearchResult::FoundAndNeedFetch;
+            }
+
             if (part_on_replica_info.contains(part_info))
             {
                 LOG_WARNING(log, "Found part {} on {} that covers the missing part {}", part_on_replica, replica, part_name);
-                return;
+                return MissingPartSearchResult::FoundAndDontNeedFetch;
             }
 
             if (part_info.contains(part_on_replica_info))
@@ -144,7 +145,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
                 if (found_part_with_the_same_min_block && found_part_with_the_same_max_block)
                 {
                     LOG_WARNING(log, "Found parts with the same min block and with the same max block as the missing part {}. Hoping that it will eventually appear as a result of a merge.", part_name);
-                    return;
+                    return MissingPartSearchResult::FoundAndDontNeedFetch;
                 }
             }
         }
@@ -160,28 +161,61 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
         not_found_msg = "smaller parts with either the same min block or the same max block.";
     LOG_ERROR(log, "No replica has part covering {} and a merge is impossible: we didn't find {}", part_name, not_found_msg);
 
-    ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
-
-    /// Is it in the replication queue? If there is - delete, because the task can not be processed.
-    if (!storage.queue.remove(zookeeper, part_name))
-    {
-        /// The part was not in our queue. Why did it happen?
-        LOG_ERROR(log, "Missing part {} is not in our queue.", part_name);
-        return;
-    }
-
-    /** This situation is possible if on all the replicas where the part was, it deteriorated.
-        * For example, a replica that has just written it has power turned off and the data has not been written from cache to disk.
-        */
-    LOG_ERROR(log, "Part {} is lost forever.", part_name);
-    ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss);
+    return MissingPartSearchResult::LostForever;
 }
 
-
-CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
+void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper)
 {
-    LOG_WARNING(log, "Checking part {}", part_name);
-    ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks);
+    auto zookeeper = storage.getZooKeeper();
+    auto missing_part_search_result = searchForMissingPartOnOtherReplicas(part_name);
+
+    /// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue.
+    if (exists_in_zookeeper)
+    {
+        /// If part found on some other replica
+        if (missing_part_search_result == MissingPartSearchResult::FoundAndNeedFetch)
+        {
+            LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and found on other replica. Removing from ZooKeeper and queueing a fetch.", part_name);
+            storage.removePartAndEnqueueFetch(part_name);
+        }
+        else /// If we have covering part on other replica or part is lost forever we don't need to fetch anything
+        {
+            LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and not found on other replica. Removing it from ZooKeeper.", part_name);
+            storage.removePartFromZooKeeper(part_name);
+        }
+    }
+
+    ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
+
+    if (missing_part_search_result == MissingPartSearchResult::LostForever)
+    {
+        /// Is it in the replication queue? If there is - delete, because the task can not be processed.
+        if (!storage.queue.remove(zookeeper, part_name))
+        {
+            /// The part was not in our queue.
+            LOG_WARNING(log, "Missing part {} is not in our queue, this can happen rarely.", part_name);
+        }
+
+        /** This situation is possible if on all the replicas where the part was, it deteriorated.
+            * For example, a replica that has just written it has power turned off and the data has not been written from cache to disk.
+            */
+        LOG_ERROR(log, "Part {} is lost forever.", part_name);
+        ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss);
+    }
+}
+
+std::pair<bool, MergeTreeDataPartPtr> ReplicatedMergeTreePartCheckThread::findLocalPart(const String & part_name)
+{
+    auto zookeeper = storage.getZooKeeper();
+    String part_path = storage.replica_path + "/parts/" + part_name;
+
+    /// It's important to check zookeeper first and after that check local storage,
+    /// because our checks of local storage and zookeeper are not consistent.
+    /// If part exists in zookeeper and doesn't exists in local storage definitely require
+    /// to fetch this part. But if we check local storage first and than check zookeeper
+    /// some background process can successfully commit part between this checks (both to the local stoarge and zookeeper),
+    /// but checker thread will remove part from zookeeper and queue fetch.
+    bool exists_in_zookeeper = zookeeper->exists(part_path);
 
     /// If the part is still in the PreCommitted -> Committed transition, it is not lost
     /// and there is no need to go searching for it on other replicas. To definitely find the needed part
@@ -190,17 +224,27 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
     if (!part)
         part = storage.getActiveContainingPart(part_name);
 
+    return std::make_pair(exists_in_zookeeper, part);
+}
+
+CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
+{
+    LOG_WARNING(log, "Checking part {}", part_name);
+    ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks);
+
+    auto [exists_in_zookeeper, part] = findLocalPart(part_name);
+
     /// We do not have this or a covering part.
     if (!part)
     {
-        searchForMissingPart(part_name);
+        searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper);
         return {part_name, false, "Part is missing, will search for it"};
     }
+
     /// We have this part, and it's active. We will check whether we need this part and whether it has the right data.
-    else if (part->name == part_name)
+    if (part->name == part_name)
     {
         auto zookeeper = storage.getZooKeeper();
-
         auto table_lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations);
 
         auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
@@ -254,11 +298,11 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
 
                 tryLogCurrentException(log, __PRETTY_FUNCTION__);
 
-                String message = "Part " + part_name + " looks broken. Removing it and queueing a fetch.";
+                String message = "Part " + part_name + " looks broken. Removing it and will try to fetch.";
                 LOG_ERROR(log, message);
-                ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
 
-                storage.removePartAndEnqueueFetch(part_name);
+                /// Part is broken, let's try to find it and fetch.
+                searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper);
 
                 /// Delete part locally.
                 storage.forgetPartAndMoveToDetached(part, "broken");
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
index 4239d7a8051..8257898fe3f 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
@@ -12,6 +12,7 @@
 #include <common/logger_useful.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <Storages/CheckResults.h>
+#include <Storages/MergeTree/IMergeTreeDataPart.h>
 
 namespace DB
 {
@@ -73,7 +74,26 @@ public:
 private:
     void run();
 
-    void searchForMissingPart(const String & part_name);
+    /// Search for missing part and queue fetch if possible. Otherwise
+    /// remove part from zookeeper and queue.
+    void searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper);
+
+    std::pair<bool, MergeTreeDataPartPtr> findLocalPart(const String & part_name);
+
+    enum MissingPartSearchResult
+    {
+        /// We found this part on other replica, let's fetch it.
+        FoundAndNeedFetch,
+        /// We found covering part or source part with same min and max block number
+        /// don't need to fetch because we should do it during normal queue processing.
+        FoundAndDontNeedFetch,
+        /// Covering part not found anywhere and exact part_name doesn't found on other
+        /// replicas.
+        LostForever,
+    };
+
+    /// Search for missing part on other replicas or covering part on all replicas (including our replica).
+    MissingPartSearchResult searchForMissingPartOnOtherReplicas(const String & part_name);
 
     StorageReplicatedMergeTree & storage;
     String log_name;
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index ab1254acc5f..26a916d2356 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -420,13 +420,26 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri
     {
         std::unique_lock lock(state_mutex);
 
-        virtual_parts.remove(part_name);
+        bool removed = virtual_parts.remove(part_name);
 
         for (Queue::iterator it = queue.begin(); it != queue.end();)
         {
             if ((*it)->new_part_name == part_name)
             {
                 found = *it;
+                if (removed)
+                {
+                    /// Preserve invariant `virtual_parts` = `current_parts` + `queue`.
+                    /// We remove new_part from virtual parts and add all source parts
+                    /// which present in current_parts.
+                    for (const auto & source_part : found->source_parts)
+                    {
+                        auto part_in_current_parts = current_parts.getContainingPart(source_part);
+                        if (part_in_current_parts == source_part)
+                            virtual_parts.add(source_part);
+                    }
+                }
+
                 updateStateOnQueueEntryRemoval(
                     found, /* is_successful = */ false,
                     min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock);
@@ -1010,7 +1023,7 @@ bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & log_
     /// NOTE The above is redundant, but left for a more convenient message in the log.
     auto result_part = MergeTreePartInfo::fromPartName(new_part_name, format_version);
 
-    /// It can slow down when the size of `future_parts` is large. But it can not be large, since `BackgroundProcessingPool` is limited.
+    /// It can slow down when the size of `future_parts` is large. But it can not be large, since background pool is limited.
     for (const auto & future_part_elem : future_parts)
     {
         auto future_part = MergeTreePartInfo::fromPartName(future_part_elem.first, format_version);
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 44c2bcefcd8..10ebfa5ce1d 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -450,16 +450,21 @@ static StoragePtr create(const StorageFactory::Arguments & args)
             arg_cnt += 2;
         }
         else
-            throw Exception("Expected two string literal arguments: zookeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS);
+            throw Exception("Expected two string literal arguments: zookeeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS);
 
         /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries
         bool is_on_cluster = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
-        bool allow_uuid_macro = is_on_cluster || args.query.attach;
+        bool is_replicated_database = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY &&
+                                      DatabaseCatalog::instance().getDatabase(args.table_id.database_name)->getEngineName() == "Replicated";
+        bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach;
 
         /// Unfold {database} and {table} macro on table creation, so table can be renamed.
         /// We also unfold {uuid} macro, so path will not be broken after moving table from Atomic to Ordinary database.
         if (!args.attach)
         {
+            if (is_replicated_database && !is_extended_storage_def)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Old syntax is not allowed for ReplicatedMergeTree tables in Replicated databases");
+
             Macros::MacroExpansionInfo info;
             /// NOTE: it's not recursive
             info.expand_special_macros_only = true;
@@ -716,6 +721,9 @@ static StoragePtr create(const StorageFactory::Arguments & args)
                 "Index granularity must be a positive integer" + getMergeTreeVerboseHelp(is_extended_storage_def),
                 ErrorCodes::BAD_ARGUMENTS);
         ++arg_num;
+
+        if (args.storage_def->ttl_table && !args.attach)
+            throw Exception("Table TTL is not allowed for MergeTree in old syntax", ErrorCodes::BAD_ARGUMENTS);
     }
 
     DataTypes data_types = metadata.partition_key.data_types;
diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp
index 289f0c61b7d..a987fff3c64 100644
--- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp
@@ -8,12 +8,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int CANNOT_CREATE_IO_BUFFER;
-}
-
-
 RabbitMQBlockOutputStream::RabbitMQBlockOutputStream(
     StorageRabbitMQ & storage_,
     const StorageMetadataPtr & metadata_snapshot_,
@@ -37,13 +31,10 @@ void RabbitMQBlockOutputStream::writePrefix()
         storage.unbindExchange();
 
     buffer = storage.createWriteBuffer();
-    if (!buffer)
-        throw Exception("Failed to create RabbitMQ producer!", ErrorCodes::CANNOT_CREATE_IO_BUFFER);
-
     buffer->activateWriting();
 
     auto format_settings = getFormatSettings(context);
-    format_settings.protobuf.allow_many_rows_no_delimiters = true;
+    format_settings.protobuf.allow_multiple_rows_without_delimiter = true;
 
     child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
         getHeader(), context,
diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h
index 2f8d6adfa16..66348d61424 100644
--- a/src/Storages/RabbitMQ/RabbitMQSettings.h
+++ b/src/Storages/RabbitMQ/RabbitMQSettings.h
@@ -1,13 +1,14 @@
 #pragma once
 
 #include <Core/BaseSettings.h>
+#include <Core/Settings.h>
 
 namespace DB
 {
     class ASTStorage;
 
 
-#define LIST_OF_RABBITMQ_SETTINGS(M) \
+#define RABBITMQ_RELATED_SETTINGS(M) \
     M(String, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \
     M(String, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent.", 0) \
     M(String, rabbitmq_format, "", "The message format.", 0) \
@@ -24,6 +25,10 @@ namespace DB
     M(UInt64, rabbitmq_max_block_size, 0, "Number of row collected before flushing data from RabbitMQ.", 0) \
     M(Milliseconds, rabbitmq_flush_interval_ms, 0, "Timeout for flushing data from RabbitMQ.", 0) \
 
+#define LIST_OF_RABBITMQ_SETTINGS(M) \
+    RABBITMQ_RELATED_SETTINGS(M) \
+    FORMAT_FACTORY_SETTINGS(M)
+
 DECLARE_SETTINGS_TRAITS(RabbitMQSettingsTraits, LIST_OF_RABBITMQ_SETTINGS)
 
 struct RabbitMQSettings : public BaseSettings<RabbitMQSettingsTraits>
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index f41c4805d24..48305ab1b61 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -94,6 +94,7 @@ StorageRabbitMQ::StorageRabbitMQ(
         , login_password(std::make_pair(
                     global_context.getConfigRef().getString("rabbitmq.username"),
                     global_context.getConfigRef().getString("rabbitmq.password")))
+        , vhost(global_context.getConfigRef().getString("rabbitmq.vhost", "/"))
         , semaphore(0, num_consumers)
         , unique_strbase(getRandomName())
         , queue_size(std::max(QUEUE_SIZE, static_cast<uint32_t>(getMaxBlockSize())))
@@ -112,13 +113,13 @@ StorageRabbitMQ::StorageRabbitMQ(
 
     /// One looping task for all consumers as they share the same connection == the same handler == the same event loop
     event_handler->updateLoopState(Loop::STOP);
-    looping_task = global_context.getSchedulePool().createTask("RabbitMQLoopingTask", [this]{ loopingFunc(); });
+    looping_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQLoopingTask", [this]{ loopingFunc(); });
     looping_task->deactivate();
 
-    streaming_task = global_context.getSchedulePool().createTask("RabbitMQStreamingTask", [this]{ streamingToViewsFunc(); });
+    streaming_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQStreamingTask", [this]{ streamingToViewsFunc(); });
     streaming_task->deactivate();
 
-    connection_task = global_context.getSchedulePool().createTask("RabbitMQConnectionTask", [this]{ connectionFunc(); });
+    connection_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQConnectionTask", [this]{ connectionFunc(); });
     connection_task->deactivate();
 
     if (queue_base.empty())
@@ -199,6 +200,15 @@ std::shared_ptr<Context> StorageRabbitMQ::addSettings(const Context & context) c
     if (!schema_name.empty())
         modified_context->setSetting("format_schema", schema_name);
 
+    for (const auto & setting : *rabbitmq_settings)
+    {
+        const auto & setting_name = setting.getName();
+
+        /// check for non-rabbitmq-related settings
+        if (!setting_name.starts_with("rabbitmq_"))
+            modified_context->setSetting(setting_name, setting.getValue());
+    }
+
     return modified_context;
 }
 
@@ -483,7 +493,9 @@ bool StorageRabbitMQ::restoreConnection(bool reconnecting)
     }
 
     connection = std::make_unique<AMQP::TcpConnection>(event_handler.get(),
-            AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/"));
+            AMQP::Address(
+                parsed_address.first, parsed_address.second,
+                AMQP::Login(login_password.first, login_password.second), vhost));
 
     cnt_retries = 0;
     while (!connection->ready() && !stream_cancelled && ++cnt_retries != RETRIES_MAX)
@@ -702,7 +714,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer()
 ProducerBufferPtr StorageRabbitMQ::createWriteBuffer()
 {
     return std::make_shared<WriteBufferToRabbitMQProducer>(
-        parsed_address, global_context, login_password, routing_keys, exchange_name, exchange_type,
+        parsed_address, global_context, login_password, vhost, routing_keys, exchange_name, exchange_type,
         producer_id.fetch_add(1), persistent, wait_confirm, log,
         row_delimiter ? std::optional<char>{row_delimiter} : std::nullopt, 1, 1024);
 }
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h
index 893c5167a97..aa316e7a842 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.h
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h
@@ -94,6 +94,7 @@ private:
     String address;
     std::pair<String, UInt16> parsed_address;
     std::pair<String, String> login_password;
+    String vhost;
 
     std::unique_ptr<uv_loop_t> loop;
     std::shared_ptr<RabbitMQHandler> event_handler;
diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
index ac94659d321..ac1b253b4bb 100644
--- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
+++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
@@ -29,6 +29,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer(
         std::pair<String, UInt16> & parsed_address_,
         const Context & global_context,
         const std::pair<String, String> & login_password_,
+        const String & vhost_,
         const Names & routing_keys_,
         const String & exchange_name_,
         const AMQP::ExchangeType exchange_type_,
@@ -42,6 +43,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer(
         : WriteBuffer(nullptr, 0)
         , parsed_address(parsed_address_)
         , login_password(login_password_)
+        , vhost(vhost_)
         , routing_keys(routing_keys_)
         , exchange_name(exchange_name_)
         , exchange_type(exchange_type_)
@@ -55,7 +57,6 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer(
         , max_rows(rows_per_message)
         , chunk_size(chunk_size_)
 {
-
     loop = std::make_unique<uv_loop_t>();
     uv_loop_init(loop.get());
     event_handler = std::make_unique<RabbitMQHandler>(loop.get(), log);
@@ -85,6 +86,8 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer(
             key_arguments[matching[0]] = matching[1];
         }
     }
+
+    reinitializeChunks();
 }
 
 
@@ -122,9 +125,7 @@ void WriteBufferToRabbitMQProducer::countRow()
 
         payload.append(last_chunk, 0, last_chunk_size);
 
-        rows = 0;
-        chunks.clear();
-        set(nullptr, 0);
+        reinitializeChunks();
 
         ++payload_counter;
         payloads.push(std::make_pair(payload_counter, payload));
@@ -150,7 +151,9 @@ bool WriteBufferToRabbitMQProducer::setupConnection(bool reconnecting)
     }
 
     connection = std::make_unique<AMQP::TcpConnection>(event_handler.get(),
-            AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/"));
+            AMQP::Address(
+                parsed_address.first, parsed_address.second,
+                AMQP::Login(login_password.first, login_password.second), vhost));
 
     cnt_retries = 0;
     while (!connection->ready() && ++cnt_retries != RETRIES_MAX)
@@ -321,17 +324,32 @@ void WriteBufferToRabbitMQProducer::writingFunc()
             setupChannel();
     }
 
-    LOG_DEBUG(log, "Prodcuer on channel {} completed", channel_id);
+    LOG_DEBUG(log, "Producer on channel {} completed", channel_id);
 }
 
 
 void WriteBufferToRabbitMQProducer::nextImpl()
+{
+    addChunk();
+}
+
+void WriteBufferToRabbitMQProducer::addChunk()
 {
     chunks.push_back(std::string());
     chunks.back().resize(chunk_size);
     set(chunks.back().data(), chunk_size);
 }
 
+void WriteBufferToRabbitMQProducer::reinitializeChunks()
+{
+    rows = 0;
+    chunks.clear();
+    /// We cannot leave the buffer in the undefined state (i.e. without any
+    /// underlying buffer), since in this case the WriteBuffeR::next() will
+    /// not call our nextImpl() (due to available() == 0)
+    addChunk();
+}
+
 
 void WriteBufferToRabbitMQProducer::iterateEventLoop()
 {
diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
index 6fa4ca9587f..e88f92239ca 100644
--- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
+++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
@@ -21,6 +21,7 @@ public:
             std::pair<String, UInt16> & parsed_address_,
             const Context & global_context,
             const std::pair<String, String> & login_password_,
+            const String & vhost_,
             const Names & routing_keys_,
             const String & exchange_name_,
             const AMQP::ExchangeType exchange_type_,
@@ -41,6 +42,9 @@ public:
 
 private:
     void nextImpl() override;
+    void addChunk();
+    void reinitializeChunks();
+
     void iterateEventLoop();
     void writingFunc();
     bool setupConnection(bool reconnecting);
@@ -50,6 +54,7 @@ private:
 
     std::pair<String, UInt16> parsed_address;
     const std::pair<String, String> login_password;
+    const String vhost;
     const Names routing_keys;
     const String exchange_name;
     AMQP::ExchangeType exchange_type;
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index 249026d1011..d7456966467 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -24,6 +24,7 @@
 #include <Interpreters/Set.h>
 #include <Interpreters/PreparedSets.h>
 #include <Interpreters/TreeRewriter.h>
+#include <Interpreters/convertFieldToType.h>
 
 #include <Poco/File.h>
 #include <Poco/Path.h>
@@ -44,9 +45,12 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+using FieldVectorPtr = std::shared_ptr<FieldVector>;
+
 
 // returns keys may be filter by condition
-static bool traverseASTFilter(const String & primary_key, const DataTypePtr & primary_key_type, const ASTPtr & elem, const PreparedSets & sets, FieldVector & res)
+static bool traverseASTFilter(
+    const String & primary_key, const DataTypePtr & primary_key_type, const ASTPtr & elem, const PreparedSets & sets, FieldVectorPtr & res)
 {
     const auto * function = elem->as<ASTFunction>();
     if (!function)
@@ -63,13 +67,9 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
     else if (function->name == "or")
     {
         // make sure every child has the key filter condition
-        FieldVector child_res;
         for (const auto & child : function->arguments->children)
-        {
-            if (!traverseASTFilter(primary_key, primary_key_type, child, sets, child_res))
+            if (!traverseASTFilter(primary_key, primary_key_type, child, sets, res))
                 return false;
-        }
-        res.insert(res.end(), child_res.begin(), child_res.end());
         return true;
     }
     else if (function->name == "equals" || function->name == "in")
@@ -108,9 +108,7 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
             prepared_set->checkColumnsNumber(1);
             const auto & set_column = *prepared_set->getSetElements()[0];
             for (size_t row = 0; row < set_column.size(); ++row)
-            {
-                res.push_back(set_column[row]);
-            }
+                res->push_back(set_column[row]);
             return true;
         }
         else
@@ -125,10 +123,12 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
             if (ident->name() != primary_key)
                 return false;
 
-            //function->name == "equals"
+            /// function->name == "equals"
             if (const auto * literal = value->as<ASTLiteral>())
             {
-                res.push_back(literal->value);
+                auto converted_field = convertFieldToType(literal->value, *primary_key_type);
+                if (!converted_field.isNull())
+                    res->push_back(converted_field);
                 return true;
             }
         }
@@ -140,14 +140,14 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
 /** Retrieve from the query a condition of the form `key = 'key'`, `key in ('xxx_'), from conjunctions in the WHERE clause.
   * TODO support key like search
   */
-static std::pair<FieldVector, bool> getFilterKeys(const String & primary_key, const DataTypePtr & primary_key_type, const SelectQueryInfo & query_info)
+static std::pair<FieldVectorPtr, bool> getFilterKeys(
+    const String & primary_key, const DataTypePtr & primary_key_type, const SelectQueryInfo & query_info)
 {
     const auto & select = query_info.query->as<ASTSelectQuery &>();
     if (!select.where())
-    {
-        return std::make_pair(FieldVector{}, true);
-    }
-    FieldVector res;
+        return {{}, true};
+
+    FieldVectorPtr res = std::make_shared<FieldVector>();
     auto matched_keys = traverseASTFilter(primary_key, primary_key_type, select.where(), query_info.sets, res);
     return std::make_pair(res, !matched_keys);
 }
@@ -159,23 +159,19 @@ public:
     EmbeddedRocksDBSource(
         const StorageEmbeddedRocksDB & storage_,
         const StorageMetadataPtr & metadata_snapshot_,
-        const FieldVector & keys_,
-        const size_t start_,
-        const size_t end_,
+        FieldVectorPtr keys_,
+        FieldVector::const_iterator begin_,
+        FieldVector::const_iterator end_,
         const size_t max_block_size_)
         : SourceWithProgress(metadata_snapshot_->getSampleBlock())
         , storage(storage_)
         , metadata_snapshot(metadata_snapshot_)
-        , start(start_)
+        , keys(std::move(keys_))
+        , begin(begin_)
         , end(end_)
+        , it(begin)
         , max_block_size(max_block_size_)
     {
-        // slice the keys
-        if (end > start)
-        {
-            keys.resize(end - start);
-            std::copy(keys_.begin() + start, keys_.begin() + end, keys.begin());
-        }
     }
 
     String getName() const override
@@ -185,27 +181,34 @@ public:
 
     Chunk generate() override
     {
-        if (processed_keys >= keys.size() || (start == end))
+        if (it >= end)
             return {};
 
-        std::vector<rocksdb::Slice> slices_keys;
-        slices_keys.reserve(keys.size());
-        std::vector<String> values;
-        std::vector<WriteBufferFromOwnString> wbs(keys.size());
+        size_t num_keys = end - begin;
+
+        std::vector<std::string> serialized_keys(num_keys);
+        std::vector<rocksdb::Slice> slices_keys(num_keys);
 
         const auto & sample_block = metadata_snapshot->getSampleBlock();
         const auto & key_column = sample_block.getByName(storage.primary_key);
         auto columns = sample_block.cloneEmptyColumns();
         size_t primary_key_pos = sample_block.getPositionByName(storage.primary_key);
 
-        for (size_t i = processed_keys; i < std::min(keys.size(), processed_keys + max_block_size); ++i)
+        size_t rows_processed = 0;
+        while (it < end && rows_processed < max_block_size)
         {
-            key_column.type->serializeBinary(keys[i], wbs[i]);
-            auto str_ref = wbs[i].stringRef();
-            slices_keys.emplace_back(str_ref.data, str_ref.size);
+            WriteBufferFromString wb(serialized_keys[rows_processed]);
+            key_column.type->serializeBinary(*it, wb);
+            wb.finalize();
+            slices_keys[rows_processed] = std::move(serialized_keys[rows_processed]);
+
+            ++it;
+            ++rows_processed;
         }
 
+        std::vector<String> values;
         auto statuses = storage.rocksdb_ptr->MultiGet(rocksdb::ReadOptions(), slices_keys, &values);
+
         for (size_t i = 0; i < statuses.size(); ++i)
         {
             if (statuses[i].ok())
@@ -221,7 +224,6 @@ public:
                 }
             }
         }
-        processed_keys += max_block_size;
 
         UInt64 num_rows = columns.at(0)->size();
         return Chunk(std::move(columns), num_rows);
@@ -231,12 +233,11 @@ private:
     const StorageEmbeddedRocksDB & storage;
 
     const StorageMetadataPtr metadata_snapshot;
-    const size_t start;
-    const size_t end;
+    FieldVectorPtr keys;
+    FieldVector::const_iterator begin;
+    FieldVector::const_iterator end;
+    FieldVector::const_iterator it;
     const size_t max_block_size;
-    FieldVector keys;
-
-    size_t processed_keys = 0;
 };
 
 
@@ -289,7 +290,8 @@ Pipe StorageEmbeddedRocksDB::read(
         unsigned num_streams)
 {
     metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
-    FieldVector keys;
+
+    FieldVectorPtr keys;
     bool all_scan = false;
 
     auto primary_key_data_type = metadata_snapshot->getSampleBlock().getByName(primary_key).type;
@@ -302,37 +304,34 @@ Pipe StorageEmbeddedRocksDB::read(
     }
     else
     {
-        if (keys.empty())
+        if (keys->empty())
             return {};
 
-        std::sort(keys.begin(), keys.end());
-        auto unique_iter = std::unique(keys.begin(), keys.end());
-        if (unique_iter != keys.end())
-            keys.erase(unique_iter, keys.end());
+        std::sort(keys->begin(), keys->end());
+        keys->erase(std::unique(keys->begin(), keys->end()), keys->end());
 
         Pipes pipes;
-        size_t start = 0;
-        size_t end;
 
-        const size_t num_threads = std::min(size_t(num_streams), keys.size());
-        const size_t batch_per_size = ceil(keys.size() * 1.0 / num_threads);
+        size_t num_keys = keys->size();
+        size_t num_threads = std::min(size_t(num_streams), keys->size());
 
-        for (size_t t = 0; t < num_threads; ++t)
+        assert(num_keys <= std::numeric_limits<uint32_t>::max());
+        assert(num_threads <= std::numeric_limits<uint32_t>::max());
+
+        for (size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx)
         {
-            if (start >= keys.size())
-                start = end = 0;
-            else
-                end = start + batch_per_size > keys.size() ? keys.size() : start + batch_per_size;
+            size_t begin = num_keys * thread_idx / num_threads;
+            size_t end = num_keys * (thread_idx + 1) / num_threads;
 
-            pipes.emplace_back(
-                std::make_shared<EmbeddedRocksDBSource>(*this, metadata_snapshot, keys, start, end, max_block_size));
-            start += batch_per_size;
+            pipes.emplace_back(std::make_shared<EmbeddedRocksDBSource>(
+                    *this, metadata_snapshot, keys, keys->begin() + begin, keys->begin() + end, max_block_size));
         }
         return Pipe::unitePipes(std::move(pipes));
     }
 }
 
-BlockOutputStreamPtr StorageEmbeddedRocksDB::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/)
+BlockOutputStreamPtr StorageEmbeddedRocksDB::write(
+    const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/)
 {
     return std::make_shared<EmbeddedRocksDBBlockOutputStream>(*this, metadata_snapshot);
 }
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index ce74567c62b..e28d5f4d6d1 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -4,7 +4,7 @@
 #include <Interpreters/InterpreterAlterQuery.h>
 #include <Interpreters/castColumn.h>
 #include <Interpreters/evaluateConstantExpression.h>
-#include <Processors/QueryPlan/AddingMissedStep.h>
+#include <Interpreters/addMissingDefaults.h>
 #include <DataStreams/IBlockInputStream.h>
 #include <Storages/StorageBuffer.h>
 #include <Storages/StorageFactory.h>
@@ -72,14 +72,14 @@ StorageBuffer::StorageBuffer(
     const StorageID & destination_id_,
     bool allow_materialized_)
     : IStorage(table_id_)
-    , global_context(context_.getGlobalContext())
+    , buffer_context(context_.getBufferContext())
     , num_shards(num_shards_), buffers(num_shards_)
     , min_thresholds(min_thresholds_)
     , max_thresholds(max_thresholds_)
     , destination_id(destination_id_)
     , allow_materialized(allow_materialized_)
     , log(&Poco::Logger::get("StorageBuffer (" + table_id_.getFullTableName() + ")"))
-    , bg_pool(global_context.getBufferFlushSchedulePool())
+    , bg_pool(buffer_context.getBufferFlushSchedulePool())
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_);
@@ -246,10 +246,15 @@ void StorageBuffer::read(
                 if (query_plan.isInitialized())
                 {
 
-                    auto adding_missed = std::make_unique<AddingMissedStep>(
+                    auto actions = addMissingDefaults(
+                            query_plan.getCurrentDataStream().header,
+                            header_after_adding_defaults.getNamesAndTypesList(),
+                            metadata_snapshot->getColumns(),
+                            context);
+
+                    auto adding_missed = std::make_unique<ExpressionStep>(
                             query_plan.getCurrentDataStream(),
-                            header_after_adding_defaults,
-                            metadata_snapshot->getColumns(), context);
+                            std::move(actions));
 
                     adding_missed->setStepDescription("Add columns missing in destination table");
                     query_plan.addStep(std::move(adding_missed));
@@ -470,7 +475,7 @@ public:
         StoragePtr destination;
         if (storage.destination_id)
         {
-            destination = DatabaseCatalog::instance().tryGetTable(storage.destination_id, storage.global_context);
+            destination = DatabaseCatalog::instance().tryGetTable(storage.destination_id, storage.buffer_context);
             if (destination.get() == &storage)
                 throw Exception("Destination table is myself. Write will cause infinite loop.", ErrorCodes::INFINITE_LOOP);
         }
@@ -586,9 +591,9 @@ bool StorageBuffer::mayBenefitFromIndexForIn(
 
 void StorageBuffer::startup()
 {
-    if (global_context.getSettingsRef().readonly)
+    if (buffer_context.getSettingsRef().readonly)
     {
-        LOG_WARNING(log, "Storage {} is run with readonly settings, it will not be able to insert data. Set appropriate system_profile to fix this.", getName());
+        LOG_WARNING(log, "Storage {} is run with readonly settings, it will not be able to insert data. Set appropriate buffer_profile to fix this.", getName());
     }
 
     flush_handle = bg_pool.createTask(log->name() + "/Bg", [this]{ backgroundFlush(); });
@@ -605,7 +610,7 @@ void StorageBuffer::shutdown()
 
     try
     {
-        optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, {}, global_context);
+        optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, {}, buffer_context);
     }
     catch (...)
     {
@@ -646,6 +651,15 @@ bool StorageBuffer::optimize(
     return true;
 }
 
+bool StorageBuffer::supportsPrewhere() const
+{
+    if (!destination_id)
+        return false;
+    auto dest = DatabaseCatalog::instance().tryGetTable(destination_id, buffer_context);
+    if (dest && dest.get() != this)
+        return dest->supportsPrewhere();
+    return false;
+}
 
 bool StorageBuffer::checkThresholds(const Buffer & buffer, time_t current_time, size_t additional_rows, size_t additional_bytes) const
 {
@@ -752,7 +766,7 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds, bool loc
     Stopwatch watch;
     try
     {
-        writeBlockToDestination(block_to_write, DatabaseCatalog::instance().tryGetTable(destination_id, global_context));
+        writeBlockToDestination(block_to_write, DatabaseCatalog::instance().tryGetTable(destination_id, buffer_context));
         if (reset_block_structure)
             buffer.data.clear();
     }
@@ -834,7 +848,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl
     for (const auto & column : block_to_write)
         list_of_columns->children.push_back(std::make_shared<ASTIdentifier>(column.name));
 
-    auto insert_context = Context(global_context);
+    auto insert_context = Context(buffer_context);
     insert_context.makeQueryContext();
 
     InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized};
@@ -911,7 +925,7 @@ void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, const S
 std::optional<UInt64> StorageBuffer::totalRows(const Settings & settings) const
 {
     std::optional<UInt64> underlying_rows;
-    auto underlying = DatabaseCatalog::instance().tryGetTable(destination_id, global_context);
+    auto underlying = DatabaseCatalog::instance().tryGetTable(destination_id, buffer_context);
 
     if (underlying)
         underlying_rows = underlying->totalRows(settings);
diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h
index 9656c78637b..46907ca196b 100644
--- a/src/Storages/StorageBuffer.h
+++ b/src/Storages/StorageBuffer.h
@@ -93,15 +93,7 @@ public:
         const Context & context) override;
 
     bool supportsSampling() const override { return true; }
-    bool supportsPrewhere() const override
-    {
-        if (!destination_id)
-            return false;
-        auto dest = DatabaseCatalog::instance().tryGetTable(destination_id, global_context);
-        if (dest && dest.get() != this)
-            return dest->supportsPrewhere();
-        return false;
-    }
+    bool supportsPrewhere() const override;
     bool supportsFinal() const override { return true; }
     bool supportsIndexForIn() const override { return true; }
 
@@ -120,7 +112,7 @@ public:
 
 
 private:
-    const Context & global_context;
+    const Context & buffer_context;
 
     struct Buffer
     {
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index afd7d6b876e..c08dc38fa2d 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -1,10 +1,10 @@
 #include <Storages/StorageDistributed.h>
 
 #include <Databases/IDatabase.h>
-#include <Disks/StoragePolicy.h>
 #include <Disks/IDisk.h>
 
 #include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypesNumber.h>
 
 #include <Storages/Distributed/DistributedBlockOutputStream.h>
@@ -83,6 +83,7 @@ namespace ErrorCodes
     extern const int TYPE_MISMATCH;
     extern const int TOO_MANY_ROWS;
     extern const int UNABLE_TO_SKIP_UNUSED_SHARDS;
+    extern const int INVALID_SHARD_ID;
 }
 
 namespace ActionLocks
@@ -346,6 +347,7 @@ NamesAndTypesList StorageDistributed::getVirtuals() const
             NameAndTypePair("_table", std::make_shared<DataTypeString>()),
             NameAndTypePair("_part", std::make_shared<DataTypeString>()),
             NameAndTypePair("_part_index", std::make_shared<DataTypeUInt64>()),
+            NameAndTypePair("_part_uuid", std::make_shared<DataTypeUUID>()),
             NameAndTypePair("_partition_id", std::make_shared<DataTypeString>()),
             NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
             NameAndTypePair("_shard_num", std::make_shared<DataTypeUInt32>()),
@@ -541,22 +543,29 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMeta
     const auto & settings = context.getSettingsRef();
 
     /// Ban an attempt to make async insert into the table belonging to DatabaseMemory
-    if (!storage_policy && !owned_cluster && !settings.insert_distributed_sync)
+    if (!storage_policy && !owned_cluster && !settings.insert_distributed_sync && !settings.insert_shard_id)
     {
         throw Exception("Storage " + getName() + " must have own data directory to enable asynchronous inserts",
                         ErrorCodes::BAD_ARGUMENTS);
     }
 
+    auto shard_num = cluster->getLocalShardCount() + cluster->getRemoteShardCount();
+
     /// If sharding key is not specified, then you can only write to a shard containing only one shard
-    if (!settings.insert_distributed_one_random_shard && !has_sharding_key
-        && ((cluster->getLocalShardCount() + cluster->getRemoteShardCount()) >= 2))
+    if (!settings.insert_shard_id && !settings.insert_distributed_one_random_shard && !has_sharding_key && shard_num >= 2)
     {
-        throw Exception("Method write is not supported by storage " + getName() + " with more than one shard and no sharding key provided",
-                        ErrorCodes::STORAGE_REQUIRES_PARAMETER);
+        throw Exception(
+            "Method write is not supported by storage " + getName() + " with more than one shard and no sharding key provided",
+            ErrorCodes::STORAGE_REQUIRES_PARAMETER);
+    }
+
+    if (settings.insert_shard_id && settings.insert_shard_id > shard_num)
+    {
+        throw Exception("Shard id should be range from 1 to shard number", ErrorCodes::INVALID_SHARD_ID);
     }
 
     /// Force sync insertion if it is remote() table function
-    bool insert_sync = settings.insert_distributed_sync || owned_cluster;
+    bool insert_sync = settings.insert_distributed_sync || settings.insert_shard_id || owned_cluster;
     auto timeout = settings.insert_distributed_timeout;
 
     /// DistributedBlockOutputStream will not own cluster, but will own ConnectionPools of the cluster
@@ -672,7 +681,7 @@ void StorageDistributed::truncate(const ASTPtr &, const StorageMetadataPtr &, co
 
     for (auto it = cluster_nodes_data.begin(); it != cluster_nodes_data.end();)
     {
-        it->second.shutdownAndDropAllData();
+        it->second.directory_monitor->shutdownAndDropAllData();
         it = cluster_nodes_data.erase(it);
     }
 
@@ -790,16 +799,6 @@ ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, cons
     return cluster;
 }
 
-void StorageDistributed::ClusterNodeData::flushAllData() const
-{
-    directory_monitor->flushAllData();
-}
-
-void StorageDistributed::ClusterNodeData::shutdownAndDropAllData() const
-{
-    directory_monitor->shutdownAndDropAllData();
-}
-
 IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result)
 {
     const auto & slot_to_shard = cluster->getSlotToShard();
@@ -883,13 +882,24 @@ ActionLock StorageDistributed::getActionLock(StorageActionBlockType type)
     return {};
 }
 
-void StorageDistributed::flushClusterNodesAllData()
+void StorageDistributed::flushClusterNodesAllData(const Context & context)
 {
-    std::lock_guard lock(cluster_nodes_mutex);
+    /// Sync SYSTEM FLUSH DISTRIBUTED with TRUNCATE
+    auto table_lock = lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
+
+    std::vector<std::shared_ptr<StorageDistributedDirectoryMonitor>> directory_monitors;
+
+    {
+        std::lock_guard lock(cluster_nodes_mutex);
+
+        directory_monitors.reserve(cluster_nodes_data.size());
+        for (auto & node : cluster_nodes_data)
+            directory_monitors.push_back(node.second.directory_monitor);
+    }
 
     /// TODO: Maybe it should be executed in parallel
-    for (auto & node : cluster_nodes_data)
-        node.second.flushAllData();
+    for (auto & node : directory_monitors)
+        node->flushAllData();
 }
 
 void StorageDistributed::rename(const String & new_path_to_table_data, const StorageID & new_table_id)
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index 585efafddfb..4d3869f7c5c 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -114,7 +114,7 @@ public:
     /// (note that monitors are created lazily, i.e. until at least one INSERT executed)
     std::vector<StorageDistributedDirectoryMonitor::Status> getDirectoryMonitorsStatuses() const;
 
-    void flushClusterNodesAllData();
+    void flushClusterNodesAllData(const Context & context);
 
     ClusterPtr getCluster() const;
 
@@ -200,11 +200,8 @@ protected:
 
     struct ClusterNodeData
     {
-        std::unique_ptr<StorageDistributedDirectoryMonitor> directory_monitor;
+        std::shared_ptr<StorageDistributedDirectoryMonitor> directory_monitor;
         ConnectionPoolPtr connection_pool;
-
-        void flushAllData() const;
-        void shutdownAndDropAllData() const;
     };
     std::unordered_map<std::string, ClusterNodeData> cluster_nodes_data;
     mutable std::mutex cluster_nodes_mutex;
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index a5935ba3bf4..5524569e1f0 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -34,6 +34,7 @@
 #include <Storages/Distributed/DirectoryMonitor.h>
 #include <Processors/Sources/SourceWithProgress.h>
 #include <Processors/Formats/InputStreamFromInputFormat.h>
+#include <Processors/Sources/NullSource.h>
 #include <Processors/Pipe.h>
 
 namespace fs = std::filesystem;
@@ -427,7 +428,12 @@ Pipe StorageFile::read(
         paths = {""};   /// when use fd, paths are empty
     else
         if (paths.size() == 1 && !Poco::File(paths[0]).exists())
-            throw Exception("File " + paths[0] + " doesn't exist", ErrorCodes::FILE_DOESNT_EXIST);
+        {
+            if (context.getSettingsRef().engine_file_empty_if_not_exists)
+                return Pipe(std::make_shared<NullSource>(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID())));
+            else
+                throw Exception("File " + paths[0] + " doesn't exist", ErrorCodes::FILE_DOESNT_EXIST);
+        }
 
 
     auto files_info = std::make_shared<StorageFileSource::FilesInfo>();
@@ -469,7 +475,8 @@ public:
         std::unique_lock<std::shared_timed_mutex> && lock_,
         const CompressionMethod compression_method,
         const Context & context,
-        const std::optional<FormatSettings> & format_settings)
+        const std::optional<FormatSettings> & format_settings,
+        int & flags)
         : storage(storage_)
         , metadata_snapshot(metadata_snapshot_)
         , lock(std::move(lock_))
@@ -485,13 +492,14 @@ public:
               * INSERT data; SELECT *; last SELECT returns only insert_data
               */
             storage.table_fd_was_used = true;
-            naked_buffer = std::make_unique<WriteBufferFromFileDescriptor>(storage.table_fd);
+            naked_buffer = std::make_unique<WriteBufferFromFileDescriptor>(storage.table_fd, DBMS_DEFAULT_BUFFER_SIZE);
         }
         else
         {
             if (storage.paths.size() != 1)
                 throw Exception("Table '" + storage.getStorageID().getNameForLogs() + "' is in readonly mode because of globs in filepath", ErrorCodes::DATABASE_ACCESS_DENIED);
-            naked_buffer = std::make_unique<WriteBufferFromFile>(storage.paths[0], DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT);
+            flags |= O_WRONLY | O_APPEND | O_CREAT;
+            naked_buffer = std::make_unique<WriteBufferFromFile>(storage.paths[0], DBMS_DEFAULT_BUFFER_SIZE, flags);
         }
 
         /// In case of CSVWithNames we have already written prefix.
@@ -546,7 +554,12 @@ BlockOutputStreamPtr StorageFile::write(
     if (format_name == "Distributed")
         throw Exception("Method write is not implemented for Distributed format", ErrorCodes::NOT_IMPLEMENTED);
 
+    int flags = 0;
+
     std::string path;
+    if (context.getSettingsRef().engine_file_truncate_on_insert)
+        flags |= O_TRUNC;
+
     if (!paths.empty())
     {
         path = paths[0];
@@ -559,7 +572,8 @@ BlockOutputStreamPtr StorageFile::write(
         std::unique_lock{rwlock, getLockTimeout(context)},
         chooseCompressionMethod(path, compression_method),
         context,
-        format_settings);
+        format_settings,
+        flags);
 }
 
 bool StorageFile::storesDataOnDisk() const
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index 2f488ce36c6..871ff38c07f 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -128,7 +128,7 @@ TTLTableDescription StorageInMemoryMetadata::getTableTTLs() const
 
 bool StorageInMemoryMetadata::hasAnyTableTTL() const
 {
-    return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL();
+    return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL() || hasAnyGroupByTTL() || hasAnyRowsWhereTTL();
 }
 
 TTLColumnsDescription StorageInMemoryMetadata::getColumnTTLs() const
@@ -151,6 +151,16 @@ bool StorageInMemoryMetadata::hasRowsTTL() const
     return table_ttl.rows_ttl.expression != nullptr;
 }
 
+TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTLs() const
+{
+    return table_ttl.rows_where_ttl;
+}
+
+bool StorageInMemoryMetadata::hasAnyRowsWhereTTL() const
+{
+    return !table_ttl.rows_where_ttl.empty();
+}
+
 TTLDescriptions StorageInMemoryMetadata::getMoveTTLs() const
 {
     return table_ttl.move_ttl;
@@ -171,6 +181,16 @@ bool StorageInMemoryMetadata::hasAnyRecompressionTTL() const
     return !table_ttl.recompression_ttl.empty();
 }
 
+TTLDescriptions StorageInMemoryMetadata::getGroupByTTLs() const
+{
+    return table_ttl.group_by_ttl;
+}
+
+bool StorageInMemoryMetadata::hasAnyGroupByTTL() const
+{
+    return !table_ttl.group_by_ttl.empty();
+}
+
 ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet & updated_columns) const
 {
     if (updated_columns.empty())
diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index 4c5edf31efe..038416aff7d 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -109,6 +109,9 @@ struct StorageInMemoryMetadata
     TTLDescription getRowsTTL() const;
     bool hasRowsTTL() const;
 
+    TTLDescriptions getRowsWhereTTLs() const;
+    bool hasAnyRowsWhereTTL() const;
+
     /// Just wrapper for table TTLs, return moves (to disks or volumes) parts of
     /// table TTL.
     TTLDescriptions getMoveTTLs() const;
@@ -118,6 +121,10 @@ struct StorageInMemoryMetadata
     TTLDescriptions getRecompressionTTLs() const;
     bool hasAnyRecompressionTTL() const;
 
+    // Just wrapper for table TTLs, return info about recompression ttl
+    TTLDescriptions getGroupByTTLs() const;
+    bool hasAnyGroupByTTL() const;
+
     /// Returns columns, which will be needed to calculate dependencies (skip
     /// indices, TTL expressions) if we update @updated_columns set of columns.
     ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const;
diff --git a/src/Storages/StorageMaterializeMySQL.cpp b/src/Storages/StorageMaterializeMySQL.cpp
index 721221e3fdc..e59f1e22958 100644
--- a/src/Storages/StorageMaterializeMySQL.cpp
+++ b/src/Storages/StorageMaterializeMySQL.cpp
@@ -30,9 +30,8 @@ namespace DB
 StorageMaterializeMySQL::StorageMaterializeMySQL(const StoragePtr & nested_storage_, const IDatabase * database_)
     : StorageProxy(nested_storage_->getStorageID()), nested_storage(nested_storage_), database(database_)
 {
-    auto nested_memory_metadata = nested_storage->getInMemoryMetadata();
     StorageInMemoryMetadata in_memory_metadata;
-    in_memory_metadata.setColumns(nested_memory_metadata.getColumns());
+    in_memory_metadata = nested_storage->getInMemoryMetadata();
     setInMemoryMetadata(in_memory_metadata);
 }
 
diff --git a/src/Storages/StorageMaterializeMySQL.h b/src/Storages/StorageMaterializeMySQL.h
index ea90c1ffc9e..f787470e2d2 100644
--- a/src/Storages/StorageMaterializeMySQL.h
+++ b/src/Storages/StorageMaterializeMySQL.h
@@ -1,6 +1,8 @@
 #pragma once
 
+#if !defined(ARCADIA_BUILD)
 #include "config_core.h"
+#endif
 
 #if USE_MYSQL
 
diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index af00b37b1d5..325bf3d2f74 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -89,6 +89,7 @@ StorageMaterializedView::StorageMaterializedView(
     else
     {
         /// We will create a query to create an internal table.
+        auto create_context = Context(local_context);
         auto manual_create_query = std::make_shared<ASTCreateQuery>();
         manual_create_query->database = getStorageID().database_name;
         manual_create_query->table = generateInnerTableName(getStorageID());
@@ -99,7 +100,7 @@ StorageMaterializedView::StorageMaterializedView(
         manual_create_query->set(manual_create_query->columns_list, new_columns_list);
         manual_create_query->set(manual_create_query->storage, query.storage->ptr());
 
-        InterpreterCreateQuery create_interpreter(manual_create_query, local_context);
+        InterpreterCreateQuery create_interpreter(manual_create_query, create_context);
         create_interpreter.setInternal(true);
         create_interpreter.execute();
 
@@ -193,9 +194,9 @@ BlockOutputStreamPtr StorageMaterializedView::write(const ASTPtr & query, const
 }
 
 
-static void executeDropQuery(ASTDropQuery::Kind kind, Context & global_context, const StorageID & target_table_id, bool no_delay)
+static void executeDropQuery(ASTDropQuery::Kind kind, const Context & global_context, const Context & current_context, const StorageID & target_table_id, bool no_delay)
 {
-    if (DatabaseCatalog::instance().tryGetTable(target_table_id, global_context))
+    if (DatabaseCatalog::instance().tryGetTable(target_table_id, current_context))
     {
         /// We create and execute `drop` query for internal table.
         auto drop_query = std::make_shared<ASTDropQuery>();
@@ -205,7 +206,19 @@ static void executeDropQuery(ASTDropQuery::Kind kind, Context & global_context,
         drop_query->no_delay = no_delay;
         drop_query->if_exists = true;
         ASTPtr ast_drop_query = drop_query;
-        InterpreterDropQuery drop_interpreter(ast_drop_query, global_context);
+        /// FIXME We have to use global context to execute DROP query for inner table
+        /// to avoid "Not enough privileges" error if current user has only DROP VIEW ON mat_view_name privilege
+        /// and not allowed to drop inner table explicitly. Allowing to drop inner table without explicit grant
+        /// looks like expected behaviour and we have tests for it.
+        auto drop_context = Context(global_context);
+        drop_context.getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+        if (auto txn = current_context.getZooKeeperMetadataTransaction())
+        {
+            /// For Replicated database
+            drop_context.setQueryContext(const_cast<Context &>(current_context));
+            drop_context.initZooKeeperMetadataTransaction(txn, true);
+        }
+        InterpreterDropQuery drop_interpreter(ast_drop_query, drop_context);
         drop_interpreter.execute();
     }
 }
@@ -218,19 +231,19 @@ void StorageMaterializedView::drop()
     if (!select_query.select_table_id.empty())
         DatabaseCatalog::instance().removeDependency(select_query.select_table_id, table_id);
 
-    dropInnerTable(true);
+    dropInnerTable(true, global_context);
 }
 
-void StorageMaterializedView::dropInnerTable(bool no_delay)
+void StorageMaterializedView::dropInnerTable(bool no_delay, const Context & context)
 {
     if (has_inner_table && tryGetTargetTable())
-        executeDropQuery(ASTDropQuery::Kind::Drop, global_context, target_table_id, no_delay);
+        executeDropQuery(ASTDropQuery::Kind::Drop, global_context, context, target_table_id, no_delay);
 }
 
-void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &)
+void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context & context, TableExclusiveLockHolder &)
 {
     if (has_inner_table)
-        executeDropQuery(ASTDropQuery::Kind::Truncate, global_context, target_table_id, true);
+        executeDropQuery(ASTDropQuery::Kind::Truncate, global_context, context, target_table_id, true);
 }
 
 void StorageMaterializedView::checkStatementCanBeForwarded() const
diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h
index fab9e28afe3..94e4295cd34 100644
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@@ -37,7 +37,7 @@ public:
     BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override;
 
     void drop() override;
-    void dropInnerTable(bool no_delay);
+    void dropInnerTable(bool no_delay, const Context & context);
 
     void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override;
 
diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index 1474fbcee02..d7b0ae055ab 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -6,6 +6,7 @@
 #include <Interpreters/MutationsInterpreter.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageMemory.h>
+#include <Storages/MemorySettings.h>
 
 #include <IO/WriteHelpers.h>
 #include <Processors/Sources/SourceWithProgress.h>
@@ -71,6 +72,8 @@ protected:
         for (const auto & elem : column_names_and_types)
         {
             auto current_column = src.getByName(elem.getNameInStorage()).column;
+            current_column = current_column->decompress();
+
             if (elem.isSubcolumn())
                 columns.emplace_back(elem.getTypeInStorage()->getSubcolumn(elem.getSubcolumnName(), *current_column));
             else
@@ -104,40 +107,69 @@ private:
 class MemoryBlockOutputStream : public IBlockOutputStream
 {
 public:
-    explicit MemoryBlockOutputStream(
+    MemoryBlockOutputStream(
         StorageMemory & storage_,
         const StorageMetadataPtr & metadata_snapshot_)
         : storage(storage_)
         , metadata_snapshot(metadata_snapshot_)
-    {}
+    {
+    }
 
     Block getHeader() const override { return metadata_snapshot->getSampleBlock(); }
 
     void write(const Block & block) override
     {
-        const auto size_bytes_diff = block.allocatedBytes();
-        const auto size_rows_diff = block.rows();
-
         metadata_snapshot->check(block, true);
-        {
-            std::lock_guard lock(storage.mutex);
-            auto new_data = std::make_unique<Blocks>(*(storage.data.get()));
-            new_data->push_back(block);
-            storage.data.set(std::move(new_data));
 
-            storage.total_size_bytes.fetch_add(size_bytes_diff, std::memory_order_relaxed);
-            storage.total_size_rows.fetch_add(size_rows_diff, std::memory_order_relaxed);
+        if (storage.compress)
+        {
+            Block compressed_block;
+            for (const auto & elem : block)
+                compressed_block.insert({ elem.column->compress(), elem.type, elem.name });
+
+            new_blocks.emplace_back(compressed_block);
+        }
+        else
+        {
+            new_blocks.emplace_back(block);
+        }
+    }
+
+    void writeSuffix() override
+    {
+        size_t inserted_bytes = 0;
+        size_t inserted_rows = 0;
+
+        for (const auto & block : new_blocks)
+        {
+            inserted_bytes += block.allocatedBytes();
+            inserted_rows += block.rows();
         }
 
+        std::lock_guard lock(storage.mutex);
+
+        auto new_data = std::make_unique<Blocks>(*(storage.data.get()));
+        new_data->insert(new_data->end(), new_blocks.begin(), new_blocks.end());
+
+        storage.data.set(std::move(new_data));
+        storage.total_size_bytes.fetch_add(inserted_bytes, std::memory_order_relaxed);
+        storage.total_size_rows.fetch_add(inserted_rows, std::memory_order_relaxed);
     }
+
 private:
+    Blocks new_blocks;
+
     StorageMemory & storage;
     StorageMetadataPtr metadata_snapshot;
 };
 
 
-StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_)
-    : IStorage(table_id_), data(std::make_unique<const Blocks>())
+StorageMemory::StorageMemory(
+    const StorageID & table_id_,
+    ColumnsDescription columns_description_,
+    ConstraintsDescription constraints_,
+    bool compress_)
+    : IStorage(table_id_), data(std::make_unique<const Blocks>()), compress(compress_)
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(std::move(columns_description_));
@@ -232,9 +264,12 @@ void StorageMemory::mutate(const MutationCommands & commands, const Context & co
 
     in->readPrefix();
     Blocks out;
-    Block block;
-    while ((block = in->read()))
+    while (Block block = in->read())
     {
+        if (compress)
+            for (auto & elem : block)
+                elem.column = elem.column->compress();
+
         out.push_back(block);
     }
     in->readSuffix();
@@ -304,13 +339,19 @@ void registerStorageMemory(StorageFactory & factory)
     factory.registerStorage("Memory", [](const StorageFactory::Arguments & args)
     {
         if (!args.engine_args.empty())
-            throw Exception(
-                "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)",
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Engine {} doesn't support any arguments ({} given)",
+                args.engine_name, args.engine_args.size());
 
-        return StorageMemory::create(args.table_id, args.columns, args.constraints);
+        bool has_settings = args.storage_def->settings;
+        MemorySettings settings;
+        if (has_settings)
+            settings.loadFromQuery(*args.storage_def);
+
+        return StorageMemory::create(args.table_id, args.columns, args.constraints, settings.compress);
     },
     {
+        .supports_settings = true,
         .supports_parallel_insert = true,
     });
 }
diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h
index 702cb265ea9..db71c13ca99 100644
--- a/src/Storages/StorageMemory.h
+++ b/src/Storages/StorageMemory.h
@@ -40,9 +40,13 @@ public:
         unsigned num_streams) override;
 
     bool supportsParallelInsert() const override { return true; }
-
     bool supportsSubcolumns() const override { return true; }
 
+    /// Smaller blocks (e.g. 64K rows) are better for CPU cache.
+    bool prefersLargeBlocks() const override { return false; }
+
+    bool hasEvenlyDistributedRead() const override { return true; }
+
     BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, const Context & context) override;
 
     void drop() override;
@@ -93,6 +97,7 @@ public:
 
 private:
     /// MultiVersion data storage, so that we can copy the list of blocks to readers.
+
     MultiVersion<Blocks> data;
 
     mutable std::mutex mutex;
@@ -102,8 +107,14 @@ private:
     std::atomic<size_t> total_size_bytes = 0;
     std::atomic<size_t> total_size_rows = 0;
 
+    bool compress;
+
 protected:
-    StorageMemory(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_);
+    StorageMemory(
+        const StorageID & table_id_,
+        ColumnsDescription columns_description_,
+        ConstraintsDescription constraints_,
+        bool compress_ = false);
 };
 
 }
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 74df6dd185b..91ebfaa3a27 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -24,7 +24,6 @@
 #include <Parsers/queryToString.h>
 #include <Processors/Transforms/MaterializingTransform.h>
 #include <Processors/ConcatProcessor.h>
-#include <Processors/Transforms/AddingConstColumnTransform.h>
 #include <Processors/Transforms/ExpressionTransform.h>
 
 
@@ -364,9 +363,13 @@ Pipe StorageMerge::createSources(
             column.name = "_table";
             column.type = std::make_shared<DataTypeString>();
             column.column = column.type->createColumnConst(0, Field(table_name));
+
+            auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
+            auto adding_column_actions = std::make_shared<ExpressionActions>(std::move(adding_column_dag));
+
             pipe.addSimpleTransform([&](const Block & stream_header)
             {
-                return std::make_shared<AddingConstColumnTransform>(stream_header, column);
+                return std::make_shared<ExpressionTransform>(stream_header, adding_column_actions);
             });
         }
 
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 070e6eb0483..202e909af0f 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -22,7 +22,6 @@
 #include <Storages/MergeTree/MergeTreeBlockOutputStream.h>
 #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/PartitionPruner.h>
-#include <Disks/StoragePolicy.h>
 #include <Storages/MergeTree/MergeList.h>
 #include <Storages/MergeTree/checkDataPart.h>
 #include <Processors/Pipe.h>
@@ -732,6 +731,10 @@ std::shared_ptr<StorageMergeTree::MergeMutateSelectedEntry> StorageMergeTree::se
         return {};
     }
 
+    /// Account TTL merge here to avoid exceeding the max_number_of_merges_with_ttl_in_pool limit
+    if (isTTLMergeType(future_part.merge_type))
+        global_context.getMergeList().bookMergeWithTTL();
+
     merging_tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part.parts), *this, metadata_snapshot, false);
     return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(merging_tagger), MutationCommands{});
 }
@@ -959,9 +962,11 @@ std::optional<JobAndPool> StorageMergeTree::getDataProcessingJob()
         return JobAndPool{[this, metadata_snapshot, merge_entry, mutate_entry, share_lock] () mutable
         {
             if (merge_entry)
-                mergeSelectedParts(metadata_snapshot, false, {}, *merge_entry, share_lock);
+                return mergeSelectedParts(metadata_snapshot, false, {}, *merge_entry, share_lock);
             else if (mutate_entry)
-                mutateSelectedPart(metadata_snapshot, *mutate_entry, share_lock);
+                return mutateSelectedPart(metadata_snapshot, *mutate_entry, share_lock);
+
+            __builtin_unreachable();
         }, PoolType::MERGE_MUTATE};
     }
     else if (auto lock = time_after_previous_cleanup.compareAndRestartDeferred(1))
@@ -975,6 +980,7 @@ std::optional<JobAndPool> StorageMergeTree::getDataProcessingJob()
             clearOldWriteAheadLogs();
             clearOldMutations();
             clearEmptyParts();
+            return true;
         }, PoolType::MERGE_MUTATE};
     }
     return {};
diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp
index be1159b1a63..09fd413af75 100644
--- a/src/Storages/StorageMongoDB.cpp
+++ b/src/Storages/StorageMongoDB.cpp
@@ -42,7 +42,6 @@ StorageMongoDB::StorageMongoDB(
     , collection_name(collection_name_)
     , username(username_)
     , password(password_)
-    , connection{std::make_shared<Poco::MongoDB::Connection>(host, port)}
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_);
@@ -51,6 +50,26 @@ StorageMongoDB::StorageMongoDB(
 }
 
 
+void StorageMongoDB::connectIfNotConnected()
+{
+    std::lock_guard lock{connection_mutex};
+    if (!connection)
+        connection = std::make_shared<Poco::MongoDB::Connection>(host, port);
+
+    if (!authentified)
+    {
+#       if POCO_VERSION >= 0x01070800
+            Poco::MongoDB::Database poco_db(database_name);
+            if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1))
+                throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
+#       else
+            authenticate(*connection, database_name, username, password);
+#       endif
+        authentified = true;
+    }
+}
+
+
 Pipe StorageMongoDB::read(
     const Names & column_names,
     const StorageMetadataPtr & metadata_snapshot,
@@ -60,15 +79,9 @@ Pipe StorageMongoDB::read(
     size_t max_block_size,
     unsigned)
 {
-    metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
+    connectIfNotConnected();
 
-#if POCO_VERSION >= 0x01070800
-    Poco::MongoDB::Database poco_db(database_name);
-    if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1))
-        throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
-#else
-    authenticate(*connection, database_name, username, password);
-#endif
+    metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
 
     Block sample_block;
     for (const String & column_name : column_names)
diff --git a/src/Storages/StorageMongoDB.h b/src/Storages/StorageMongoDB.h
index d7b71495574..589ab276539 100644
--- a/src/Storages/StorageMongoDB.h
+++ b/src/Storages/StorageMongoDB.h
@@ -40,16 +40,19 @@ public:
         size_t max_block_size,
         unsigned num_streams) override;
 
-
 private:
-    std::string host;
-    short unsigned int port;
-    std::string database_name;
-    std::string collection_name;
-    std::string username;
-    std::string password;
+    void connectIfNotConnected();
+
+    const std::string host;
+    const short unsigned int port;
+    const std::string database_name;
+    const std::string collection_name;
+    const std::string username;
+    const std::string password;
 
     std::shared_ptr<Poco::MongoDB::Connection> connection;
+    bool authentified = false;
+    std::mutex connection_mutex; /// Protects the variables `connection` and `authentified`.
 };
 
 }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 70e90e9706a..f2c88cdedd9 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -27,9 +27,9 @@
 #include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
 #include <Storages/VirtualColumnUtils.h>
 
-#include <Disks/StoragePolicy.h>
 
 #include <Databases/IDatabase.h>
+#include <Databases/DatabaseOnDisk.h>
 
 #include <Parsers/formatAST.h>
 #include <Parsers/ASTDropQuery.h>
@@ -47,6 +47,7 @@
 #include <Interpreters/InterpreterAlterQuery.h>
 #include <Interpreters/PartLog.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/DDLTask.h>
 
 #include <DataStreams/RemoteBlockInputStream.h>
 #include <DataStreams/copyData.h>
@@ -752,7 +753,7 @@ void StorageReplicatedMergeTree::drop()
         auto zookeeper = global_context.getZooKeeper();
 
         /// If probably there is metadata in ZooKeeper, we don't allow to drop the table.
-        if (is_readonly || !zookeeper)
+        if (!zookeeper)
             throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY);
 
         shutdown();
@@ -1490,7 +1491,12 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry)
     future_merged_part.updatePath(*this, reserved_space);
     future_merged_part.merge_type = entry.merge_type;
 
+    /// Account TTL merge
+    if (isTTLMergeType(future_merged_part.merge_type))
+        global_context.getMergeList().bookMergeWithTTL();
+
     auto table_id = getStorageID();
+    /// Add merge to list
     MergeList::EntryPtr merge_entry = global_context.getMergeList().insert(table_id.database_name, table_id.table_name, future_merged_part);
 
     Transaction transaction(*this);
@@ -2678,7 +2684,7 @@ std::optional<JobAndPool> StorageReplicatedMergeTree::getDataProcessingJob()
 
     return JobAndPool{[this, selected_entry] () mutable
     {
-        processQueueEntry(selected_entry);
+        return processQueueEntry(selected_entry);
     }, pool_type};
 }
 
@@ -3008,6 +3014,21 @@ void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_nam
     ops.emplace_back(zkutil::makeRemoveRequest(part_path, -1));
 }
 
+void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_name)
+{
+    auto zookeeper = getZooKeeper();
+    String part_path = replica_path + "/parts/" + part_name;
+    Coordination::Stat stat;
+
+    /// Part doesn't exist, nothing to remove
+    if (!zookeeper->exists(part_path, &stat))
+        return;
+
+    Coordination::Requests ops;
+
+    removePartFromZooKeeper(part_name, ops, stat.numChildren > 0);
+    zookeeper->multi(ops);
+}
 
 void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_name)
 {
@@ -3680,7 +3701,7 @@ void StorageReplicatedMergeTree::shutdown()
 
     /// We clear all old parts after stopping all background operations. It's
     /// important, because background operations can produce temporary parts
-    /// which will remove themselves in their descrutors. If so, we may have
+    /// which will remove themselves in their destructors. If so, we may have
     /// race condition between our remove call and background process.
     clearOldPartsFromFilesystem(true);
 }
@@ -4199,6 +4220,8 @@ void StorageReplicatedMergeTree::alter(
             future_metadata_in_zk.constraints = new_constraints_str;
 
         Coordination::Requests ops;
+        size_t alter_path_idx = std::numeric_limits<size_t>::max();
+        size_t mutation_path_idx = std::numeric_limits<size_t>::max();
 
         String new_metadata_str = future_metadata_in_zk.toString();
         ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/metadata", new_metadata_str, metadata_version));
@@ -4230,6 +4253,7 @@ void StorageReplicatedMergeTree::alter(
             *current_metadata, query_context.getSettingsRef().materialize_ttl_after_modify, query_context);
         alter_entry->have_mutation = !maybe_mutation_commands.empty();
 
+        alter_path_idx = ops.size();
         ops.emplace_back(zkutil::makeCreateRequest(
             zookeeper_path + "/log/log-", alter_entry->toString(), zkutil::CreateMode::PersistentSequential));
 
@@ -4253,10 +4277,22 @@ void StorageReplicatedMergeTree::alter(
             mutation_entry.create_time = time(nullptr);
 
             ops.emplace_back(zkutil::makeSetRequest(mutations_path, String(), mutations_stat.version));
+            mutation_path_idx = ops.size();
             ops.emplace_back(
                 zkutil::makeCreateRequest(mutations_path + "/", mutation_entry.toString(), zkutil::CreateMode::PersistentSequential));
         }
 
+        if (auto txn = query_context.getZooKeeperMetadataTransaction())
+        {
+            txn->moveOpsTo(ops);
+            /// NOTE: IDatabase::alterTable(...) is called when executing ALTER_METADATA queue entry without query context,
+            /// so we have to update metadata of DatabaseReplicated here.
+            String metadata_zk_path = txn->getDatabaseZooKeeperPath() + "/metadata/" + escapeForFileName(table_id.table_name);
+            auto ast = DatabaseCatalog::instance().getDatabase(table_id.database_name)->getCreateTableQuery(table_id.table_name, query_context);
+            applyMetadataChangesToCreateQuery(ast, future_metadata);
+            ops.emplace_back(zkutil::makeSetRequest(metadata_zk_path, getObjectDefinitionFromCreateQuery(ast), -1));
+        }
+
         Coordination::Responses results;
         Coordination::Error rc = zookeeper->tryMulti(ops, results);
 
@@ -4270,17 +4306,17 @@ void StorageReplicatedMergeTree::alter(
             if (alter_entry->have_mutation)
             {
                 /// ALTER_METADATA record in replication /log
-                String alter_path = dynamic_cast<const Coordination::CreateResponse &>(*results[2]).path_created;
+                String alter_path = dynamic_cast<const Coordination::CreateResponse &>(*results[alter_path_idx]).path_created;
                 alter_entry->znode_name = alter_path.substr(alter_path.find_last_of('/') + 1);
 
                 /// ReplicatedMergeTreeMutationEntry record in /mutations
-                String mutation_path = dynamic_cast<const Coordination::CreateResponse &>(*results.back()).path_created;
+                String mutation_path = dynamic_cast<const Coordination::CreateResponse &>(*results[mutation_path_idx]).path_created;
                 mutation_znode = mutation_path.substr(mutation_path.find_last_of('/') + 1);
             }
             else
             {
                 /// ALTER_METADATA record in replication /log
-                String alter_path = dynamic_cast<const Coordination::CreateResponse &>(*results.back()).path_created;
+                String alter_path = dynamic_cast<const Coordination::CreateResponse &>(*results[alter_path_idx]).path_created;
                 alter_entry->znode_name = alter_path.substr(alter_path.find_last_of('/') + 1);
             }
             break;
@@ -4402,7 +4438,7 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool de
     else
     {
         String partition_id = getPartitionIDFromQuery(partition, query_context);
-        did_drop = dropAllPartsInPartition(*zookeeper, partition_id, entry, detach);
+        did_drop = dropAllPartsInPartition(*zookeeper, partition_id, entry, query_context, detach);
     }
 
     if (did_drop)
@@ -4426,7 +4462,7 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool de
 
 
 void StorageReplicatedMergeTree::truncate(
-    const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder & table_lock)
+    const ASTPtr &, const StorageMetadataPtr &, const Context & query_context, TableExclusiveLockHolder & table_lock)
 {
     table_lock.release();   /// Truncate is done asynchronously.
 
@@ -4442,7 +4478,7 @@ void StorageReplicatedMergeTree::truncate(
     {
         LogEntry entry;
 
-        if (dropAllPartsInPartition(*zookeeper, partition_id, entry, false))
+        if (dropAllPartsInPartition(*zookeeper, partition_id, entry, query_context, false))
             waitForAllReplicasToProcessLogEntry(entry);
     }
 }
@@ -5226,6 +5262,9 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, const
         requests.emplace_back(zkutil::makeCreateRequest(
             mutations_path + "/", mutation_entry.toString(), zkutil::CreateMode::PersistentSequential));
 
+        if (auto txn = query_context.getZooKeeperMetadataTransaction())
+            txn->moveOpsTo(requests);
+
         Coordination::Responses responses;
         Coordination::Error rc = zookeeper->tryMulti(requests, responses);
 
@@ -5727,6 +5766,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
             }
         }
 
+        if (auto txn = context.getZooKeeperMetadataTransaction())
+            txn->moveOpsTo(ops);
+
         ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1));  /// Just update version
         ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential));
 
@@ -6195,7 +6237,7 @@ bool StorageReplicatedMergeTree::dropPart(
 }
 
 bool StorageReplicatedMergeTree::dropAllPartsInPartition(
-    zkutil::ZooKeeper & zookeeper, String & partition_id, LogEntry & entry, bool detach)
+    zkutil::ZooKeeper & zookeeper, String & partition_id, LogEntry & entry, const Context & query_context, bool detach)
 {
     MergeTreePartInfo drop_range_info;
     if (!getFakePartCoveringAllPartsInPartition(partition_id, drop_range_info))
@@ -6227,6 +6269,8 @@ bool StorageReplicatedMergeTree::dropAllPartsInPartition(
     Coordination::Requests ops;
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential));
     ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1));  /// Just update version.
+    if (auto txn = query_context.getZooKeeperMetadataTransaction())
+        txn->moveOpsTo(ops);
     Coordination::Responses responses = zookeeper.multi(ops);
 
     String log_znode_path = dynamic_cast<const Coordination::CreateResponse &>(*responses.front()).path_created;
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index cf36cf82fc9..a1a70ada9b2 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -134,7 +134,7 @@ public:
       */
     void drop() override;
 
-    void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override;
+    void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context & query_context, TableExclusiveLockHolder &) override;
 
     void checkTableCanBeRenamed() const override;
 
@@ -381,6 +381,9 @@ private:
     /// Set has_children to true for "old-style" parts (those with /columns and /checksums child znodes).
     void removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops, bool has_children);
 
+    /// Just removes part from ZooKeeper using previous method
+    void removePartFromZooKeeper(const String & part_name);
+
     /// Quickly removes big set of parts from ZooKeeper (using async multi queries)
     void removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names,
                                   NameSet * parts_should_be_retried = nullptr);
@@ -574,7 +577,7 @@ private:
 
     bool dropPart(zkutil::ZooKeeperPtr & zookeeper, String part_name, LogEntry & entry, bool detach, bool throw_if_noop);
     bool dropAllPartsInPartition(
-        zkutil::ZooKeeper & zookeeper, String & partition_id, LogEntry & entry, bool detach);
+        zkutil::ZooKeeper & zookeeper, String & partition_id, LogEntry & entry, const Context & query_context, bool detach);
 
     // Partition helpers
     void dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & query_context, bool throw_if_noop) override;
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 0af115dc0b5..d0e3b70d900 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -234,6 +234,7 @@ StorageS3::StorageS3(
         uri_.is_virtual_hosted_style,
         credentials.GetAWSAccessKeyId(),
         credentials.GetAWSSecretKey(),
+        settings.server_side_encryption_customer_key_base64,
         std::move(settings.headers),
         settings.use_environment_credentials.value_or(global_context.getConfigRef().getBool("s3.use_environment_credentials", false))
     );
@@ -329,7 +330,7 @@ Pipe StorageS3::read(
             context,
             metadata_snapshot->getColumns(),
             max_block_size,
-            chooseCompressionMethod(uri.endpoint, compression_method),
+            chooseCompressionMethod(uri.key, compression_method),
             client,
             uri.bucket,
             key));
@@ -347,7 +348,7 @@ BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMet
         format_name,
         metadata_snapshot->getSampleBlock(),
         global_context,
-        chooseCompressionMethod(uri.endpoint, compression_method),
+        chooseCompressionMethod(uri.key, compression_method),
         client,
         uri.bucket,
         uri.key,
diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp
index 54384ac8253..6d97e6fae95 100644
--- a/src/Storages/StorageS3Settings.cpp
+++ b/src/Storages/StorageS3Settings.cpp
@@ -30,6 +30,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
             auto endpoint = config.getString(config_elem + "." + key + ".endpoint");
             auto access_key_id = config.getString(config_elem + "." + key + ".access_key_id", "");
             auto secret_access_key = config.getString(config_elem + "." + key + ".secret_access_key", "");
+            auto server_side_encryption_customer_key_base64 = config.getString(config_elem + "." + key + ".server_side_encryption_customer_key_base64", "");
             std::optional<bool> use_environment_credentials;
             if (config.has(config_elem + "." + key + ".use_environment_credentials"))
             {
@@ -51,7 +52,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
                 }
             }
 
-            settings.emplace(endpoint, S3AuthSettings{std::move(access_key_id), std::move(secret_access_key), std::move(headers), use_environment_credentials});
+            settings.emplace(endpoint, S3AuthSettings{std::move(access_key_id), std::move(secret_access_key), std::move(server_side_encryption_customer_key_base64), std::move(headers), use_environment_credentials});
         }
     }
 }
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index 88f964774c6..59b98ebdfdd 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -27,6 +27,7 @@ struct S3AuthSettings
 {
     const String access_key_id;
     const String secret_access_key;
+    const String server_side_encryption_customer_key_base64;
 
     const HeaderCollection headers;
 
diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp
index e518c7da0e4..d64042f0c1e 100644
--- a/src/Storages/StorageSet.cpp
+++ b/src/Storages/StorageSet.cpp
@@ -242,15 +242,12 @@ void registerStorageSet(StorageFactory & factory)
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
         bool has_settings = args.storage_def->settings;
-
-        auto set_settings = std::make_unique<SetSettings>();
+        SetSettings set_settings;
         if (has_settings)
-        {
-            set_settings->loadFromQuery(*args.storage_def);
-        }
+            set_settings.loadFromQuery(*args.storage_def);
 
-        DiskPtr disk = args.context.getDisk(set_settings->disk);
-        return StorageSet::create(disk, args.relative_data_path, args.table_id, args.columns, args.constraints, set_settings->persistent);
+        DiskPtr disk = args.context.getDisk(set_settings.disk);
+        return StorageSet::create(disk, args.relative_data_path, args.table_id, args.columns, args.constraints, set_settings.persistent);
     }, StorageFactory::StorageFeatures{ .supports_settings = true, });
 }
 
diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp
index 83d165f54f7..7e16deb6d22 100644
--- a/src/Storages/System/StorageSystemClusters.cpp
+++ b/src/Storages/System/StorageSystemClusters.cpp
@@ -1,9 +1,9 @@
-#include <Common/DNSResolver.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/Context.h>
 #include <Storages/System/StorageSystemClusters.h>
+#include <Databases/DatabaseReplicated.h>
 
 namespace DB
 {
@@ -27,40 +27,51 @@ NamesAndTypesList StorageSystemClusters::getNamesAndTypes()
     };
 }
 
+
 void StorageSystemClusters::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const
 {
     for (const auto & name_and_cluster : context.getClusters().getContainer())
+        writeCluster(res_columns, name_and_cluster);
+
+    const auto databases = DatabaseCatalog::instance().getDatabases();
+    for (const auto & name_and_database : databases)
     {
-        const String & cluster_name = name_and_cluster.first;
-        const ClusterPtr & cluster = name_and_cluster.second;
-        const auto & shards_info = cluster->getShardsInfo();
-        const auto & addresses_with_failover = cluster->getShardsAddresses();
+        if (const auto * replicated = typeid_cast<const DatabaseReplicated *>(name_and_database.second.get()))
+            writeCluster(res_columns, {name_and_database.first, replicated->getCluster()});
+    }
+}
 
-        for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
+void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster)
+{
+    const String & cluster_name = name_and_cluster.first;
+    const ClusterPtr & cluster = name_and_cluster.second;
+    const auto & shards_info = cluster->getShardsInfo();
+    const auto & addresses_with_failover = cluster->getShardsAddresses();
+
+    for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
+    {
+        const auto & shard_info = shards_info[shard_index];
+        const auto & shard_addresses = addresses_with_failover[shard_index];
+        const auto pool_status = shard_info.pool->getStatus();
+
+        for (size_t replica_index = 0; replica_index < shard_addresses.size(); ++replica_index)
         {
-            const auto & shard_info = shards_info[shard_index];
-            const auto & shard_addresses = addresses_with_failover[shard_index];
-            const auto pool_status = shard_info.pool->getStatus();
+            size_t i = 0;
+            const auto & address = shard_addresses[replica_index];
 
-            for (size_t replica_index = 0; replica_index < shard_addresses.size(); ++replica_index)
-            {
-                size_t i = 0;
-                const auto & address = shard_addresses[replica_index];
-
-                res_columns[i++]->insert(cluster_name);
-                res_columns[i++]->insert(shard_info.shard_num);
-                res_columns[i++]->insert(shard_info.weight);
-                res_columns[i++]->insert(replica_index + 1);
-                res_columns[i++]->insert(address.host_name);
-                auto resolved = address.getResolvedAddress();
-                res_columns[i++]->insert(resolved ? resolved->host().toString() : String());
-                res_columns[i++]->insert(address.port);
-                res_columns[i++]->insert(address.is_local);
-                res_columns[i++]->insert(address.user);
-                res_columns[i++]->insert(address.default_database);
-                res_columns[i++]->insert(pool_status[replica_index].error_count);
-                res_columns[i++]->insert(pool_status[replica_index].estimated_recovery_time.count());
-            }
+            res_columns[i++]->insert(cluster_name);
+            res_columns[i++]->insert(shard_info.shard_num);
+            res_columns[i++]->insert(shard_info.weight);
+            res_columns[i++]->insert(replica_index + 1);
+            res_columns[i++]->insert(address.host_name);
+            auto resolved = address.getResolvedAddress();
+            res_columns[i++]->insert(resolved ? resolved->host().toString() : String());
+            res_columns[i++]->insert(address.port);
+            res_columns[i++]->insert(address.is_local);
+            res_columns[i++]->insert(address.user);
+            res_columns[i++]->insert(address.default_database);
+            res_columns[i++]->insert(pool_status[replica_index].error_count);
+            res_columns[i++]->insert(pool_status[replica_index].estimated_recovery_time.count());
         }
     }
 }
diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h
index 4cda7c372b2..4f2a843999f 100644
--- a/src/Storages/System/StorageSystemClusters.h
+++ b/src/Storages/System/StorageSystemClusters.h
@@ -10,6 +10,7 @@ namespace DB
 {
 
 class Context;
+class Cluster;
 
 /** Implements system table 'clusters'
   *  that allows to obtain information about available clusters
@@ -25,8 +26,10 @@ public:
 
 protected:
     using IStorageSystemOneBlock::IStorageSystemOneBlock;
+    using NameAndCluster = std::pair<String, std::shared_ptr<Cluster>>;
 
     void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override;
+    static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster);
 };
 
 }
diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp
index 0c50e452e95..d9f4a2a7dd0 100644
--- a/src/Storages/System/StorageSystemContributors.generated.cpp
+++ b/src/Storages/System/StorageSystemContributors.generated.cpp
@@ -23,6 +23,7 @@ const char * auto_contributors[] {
     "Alexander Burmak",
     "Alexander Ermolaev",
     "Alexander GQ Gerasiov",
+    "Alexander Gololobov",
     "Alexander Kazakov",
     "Alexander Kozhikhov",
     "Alexander Krasheninnikov",
@@ -43,6 +44,7 @@ const char * auto_contributors[] {
     "Alexandr Krasheninnikov",
     "Alexandr Orlov",
     "Alexandra Latysheva",
+    "Alexandre Snarskii",
     "Alexei Averchenko",
     "Alexey",
     "Alexey Arno",
@@ -143,6 +145,7 @@ const char * auto_contributors[] {
     "CurtizJ",
     "Daniel Bershatsky",
     "Daniel Dao",
+    "Daniel Qin",
     "Danila Kutenin",
     "Dao Minh Thuc",
     "Daria Mozhaeva",
@@ -309,7 +312,9 @@ const char * auto_contributors[] {
     "Marek Vavrusa",
     "Marek Vavruša",
     "Marek Vavruša",
+    "Mariano Benítez Mulet",
     "Mark Andreev",
+    "Mark Frost",
     "Mark Papadakis",
     "Maroun Maroun",
     "Marsel Arduanov",
@@ -422,6 +427,7 @@ const char * auto_contributors[] {
     "Rafael David Tinoco",
     "Ramazan Polat",
     "Ravengg",
+    "RegulusZ",
     "Reilee",
     "Reto Kromer",
     "Ri",
@@ -482,9 +488,11 @@ const char * auto_contributors[] {
     "Tangaev",
     "Tema Novikov",
     "The-Alchemist",
+    "TiunovNN",
     "Tobias Adamson",
     "Tom Bombadil",
     "Tsarkova Anastasia",
+    "TszkitLo40",
     "Ubuntu",
     "Ubus",
     "UnamedRus",
@@ -556,6 +564,7 @@ const char * auto_contributors[] {
     "Yury Stankevich",
     "Zhichang Yu",
     "Zhipeng",
+    "Zoran Pandovski",
     "a.palagashvili",
     "abdrakhmanov",
     "abyss7",
@@ -571,6 +580,7 @@ const char * auto_contributors[] {
     "alex.lvxin",
     "alexander kozhikhov",
     "alexey-milovidov",
+    "alfredlu",
     "amoschen",
     "amudong",
     "ana-uvarova",
@@ -588,14 +598,17 @@ const char * auto_contributors[] {
     "avsharapov",
     "awesomeleo",
     "benamazing",
+    "benbiti",
     "bgranvea",
     "bharatnc",
     "blazerer",
     "bluebirddm",
+    "bo zeng",
     "bobrovskij artemij",
     "booknouse",
     "bseng",
     "cekc",
+    "centos7",
     "champtar",
     "chang.chen",
     "chengy8934",
@@ -606,6 +619,7 @@ const char * auto_contributors[] {
     "comunodi",
     "coraxster",
     "damozhaeva",
+    "dankondr",
     "daoready",
     "dasmfm",
     "davydovska",
@@ -627,6 +641,7 @@ const char * auto_contributors[] {
     "elBroom",
     "elenaspb2019",
     "emakarov",
+    "emhlbmc",
     "emironyuk",
     "evtan",
     "exprmntr",
@@ -673,6 +688,7 @@ const char * auto_contributors[] {
     "javi santana",
     "jetgm",
     "jianmei zhang",
+    "jyz0309",
     "kmeaw",
     "koshachy",
     "kreuzerkrieg",
@@ -779,7 +795,9 @@ const char * auto_contributors[] {
     "taiyang-li",
     "tao jiang",
     "tavplubix",
+    "templarzq",
     "tiger.yan",
+    "tison",
     "topvisor",
     "tyrionhuang",
     "ubuntu",
@@ -800,6 +818,7 @@ const char * auto_contributors[] {
     "weeds085490",
     "xPoSx",
     "yangshuai",
+    "ygrek",
     "yhgcn",
     "ylchou",
     "yonesko",
diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp
index 229325313e2..04321544f5d 100644
--- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp
+++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp
@@ -4,7 +4,7 @@
 #include "StorageSystemDDLWorkerQueue.h"
 
 #include <Columns/ColumnArray.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/DDLTask.h>
 
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeDateTime.h>
diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp
index fbbee51e34e..b04d24cc705 100644
--- a/src/Storages/System/StorageSystemDisks.cpp
+++ b/src/Storages/System/StorageSystemDisks.cpp
@@ -51,7 +51,7 @@ Pipe StorageSystemDisks::read(
         col_free->insert(disk_ptr->getAvailableSpace());
         col_total->insert(disk_ptr->getTotalSpace());
         col_keep->insert(disk_ptr->getKeepingFreeSpace());
-        col_type->insert(disk_ptr->getType());
+        col_type->insert(DiskType::toString(disk_ptr->getType()));
     }
 
     Columns res_columns;
diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index 5b9461b5c25..45e1663cb93 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -68,6 +68,14 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_)
         {"recompression_ttl_info.expression",           std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
         {"recompression_ttl_info.min",                  std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
         {"recompression_ttl_info.max",                  std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+
+        {"group_by_ttl_info.expression",                std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
+        {"group_by_ttl_info.min",                       std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+        {"group_by_ttl_info.max",                       std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+
+        {"rows_where_ttl_info.expression",              std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
+        {"rows_where_ttl_info.min",                     std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+        {"rows_where_ttl_info.max",                     std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())}
     }
     )
 {
@@ -181,10 +189,13 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto
         columns_[i++]->insert(queryToString(part->default_codec->getCodecDesc()));
 
         add_ttl_info_map(part->ttl_infos.recompression_ttl);
+        add_ttl_info_map(part->ttl_infos.group_by_ttl);
+        add_ttl_info_map(part->ttl_infos.rows_where_ttl);
 
         /// _state column should be the latest.
+        /// Do not use part->getState*, it can be changed from different thread
         if (has_state_column)
-            columns_[i++]->insert(part->stateString());
+            columns_[i++]->insert(IMergeTreeDataPart::stateToString(part_state));
     }
 }
 
diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp
index 5a3d7027dd4..9b5bf5a4b48 100644
--- a/src/Storages/System/StorageSystemPartsBase.cpp
+++ b/src/Storages/System/StorageSystemPartsBase.cpp
@@ -7,6 +7,7 @@
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeDate.h>
 #include <Storages/MergeTree/MergeTreeData.h>
+#include <Storages/StorageMaterializeMySQL.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Access/ContextAccess.h>
 #include <Databases/IDatabase.h>
@@ -119,6 +120,13 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const
 
                     String engine_name = storage->getName();
 
+#if USE_MYSQL
+                    if (auto * proxy = dynamic_cast<StorageMaterializeMySQL *>(storage.get()))
+                    {
+                        auto nested = proxy->getNested();
+                        storage.swap(nested);
+                    }
+#endif
                     if (!dynamic_cast<MergeTreeData *>(storage.get()))
                         continue;
 
diff --git a/src/Storages/System/StorageSystemQuotaUsage.cpp b/src/Storages/System/StorageSystemQuotaUsage.cpp
index 002ab081bcf..6d6e22e7be6 100644
--- a/src/Storages/System/StorageSystemQuotaUsage.cpp
+++ b/src/Storages/System/StorageSystemQuotaUsage.cpp
@@ -137,6 +137,9 @@ void StorageSystemQuotaUsage::fillDataImpl(
         column_quota_name.insertData(quota_name.data(), quota_name.length());
         column_quota_key.insertData(quota_key.data(), quota_key.length());
 
+        if (add_column_is_current)
+            column_is_current->push_back(quota_id == current_quota_id);
+
         if (!interval)
         {
             column_start_time.insertDefault();
@@ -171,9 +174,6 @@ void StorageSystemQuotaUsage::fillDataImpl(
             addValue(*column_max[resource_type], *column_max_null_map[resource_type], interval->max[resource_type], type_info);
             addValue(*column_usage[resource_type], *column_usage_null_map[resource_type], interval->used[resource_type], type_info);
         }
-
-        if (add_column_is_current)
-            column_is_current->push_back(quota_id == current_quota_id);
     };
 
     auto add_rows = [&](const String & quota_name, const UUID & quota_id, const String & quota_key, const std::vector<QuotaUsage::Interval> & intervals)
diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp
index abb2fdf54ed..e74d56108ad 100644
--- a/src/Storages/System/StorageSystemStackTrace.cpp
+++ b/src/Storages/System/StorageSystemStackTrace.cpp
@@ -16,6 +16,7 @@
 #include <Common/PipeFDs.h>
 #include <Common/CurrentThread.h>
 #include <common/getThreadId.h>
+#include <common/logger_useful.h>
 
 
 namespace DB
@@ -150,6 +151,7 @@ namespace
 
 StorageSystemStackTrace::StorageSystemStackTrace(const StorageID & table_id_)
     : IStorageSystemOneBlock<StorageSystemStackTrace>(table_id_)
+    , log(&Poco::Logger::get("StorageSystemStackTrace"))
 {
     notification_pipe.open();
 
@@ -229,6 +231,8 @@ void StorageSystemStackTrace::fillData(MutableColumns & res_columns, const Conte
         }
         else
         {
+            LOG_DEBUG(log, "Cannot obtain a stack trace for thread {}", tid);
+
             /// Cannot obtain a stack trace. But create a record in result nevertheless.
 
             res_columns[0]->insert(tid);
diff --git a/src/Storages/System/StorageSystemStackTrace.h b/src/Storages/System/StorageSystemStackTrace.h
index a389f02eb09..582618d2ecd 100644
--- a/src/Storages/System/StorageSystemStackTrace.h
+++ b/src/Storages/System/StorageSystemStackTrace.h
@@ -6,6 +6,10 @@
 #include <ext/shared_ptr_helper.h>
 #include <Storages/System/IStorageSystemOneBlock.h>
 
+namespace Poco
+{
+class Logger;
+}
 
 namespace DB
 {
@@ -30,6 +34,8 @@ protected:
     void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override;
 
     mutable std::mutex mutex;
+
+    Poco::Logger * log;
 };
 
 }
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index 363a2a20828..132ed234323 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -15,7 +15,7 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeArray.h>
-#include <Disks/StoragePolicy.h>
+#include <Disks/IStoragePolicy.h>
 #include <Processors/Sources/SourceWithProgress.h>
 #include <Processors/Pipe.h>
 #include <DataTypes/DataTypeUUID.h>
diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index 287650ef86c..8fa5ccbd630 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -12,6 +12,9 @@
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/typeid_cast.h>
+#include <Parsers/ASTSubquery.h>
+#include <Interpreters/Set.h>
+#include <Interpreters/interpretSubquery.h>
 
 
 namespace DB
@@ -43,8 +46,24 @@ NamesAndTypesList StorageSystemZooKeeper::getNamesAndTypes()
     };
 }
 
+using Paths = Strings;
 
-static bool extractPathImpl(const IAST & elem, String & res, const Context & context)
+static String pathCorrected(const String & path)
+{
+    String path_corrected;
+    /// path should starts with '/', otherwise ZBADARGUMENTS will be thrown in
+    /// ZooKeeper::sendThread and the session will fail.
+    if (path[0] != '/')
+        path_corrected = '/';
+    path_corrected += path;
+    /// In all cases except the root, path must not end with a slash.
+    if (path_corrected != "/" && path_corrected.back() == '/')
+        path_corrected.resize(path_corrected.size() - 1);
+    return path_corrected;
+}
+
+
+static bool extractPathImpl(const IAST & elem, Paths & res, const Context & context)
 {
     const auto * function = elem.as<ASTFunction>();
     if (!function)
@@ -59,15 +78,65 @@ static bool extractPathImpl(const IAST & elem, String & res, const Context & con
         return false;
     }
 
-    if (function->name == "equals")
-    {
-        const auto & args = function->arguments->as<ASTExpressionList &>();
-        ASTPtr value;
+    const auto & args = function->arguments->as<ASTExpressionList &>();
+    if (args.children.size() != 2)
+        return false;
 
-        if (args.children.size() != 2)
+    if (function->name == "in")
+    {
+        const ASTIdentifier * ident = args.children.at(0)->as<ASTIdentifier>();
+        if (!ident || ident->name() != "path")
             return false;
 
+        ASTPtr value = args.children.at(1);
+
+        if (value->as<ASTSubquery>())
+        {
+            auto interpreter_subquery = interpretSubquery(value, context, {}, {});
+            auto stream = interpreter_subquery->execute().getInputStream();
+            SizeLimits limites(context.getSettingsRef().max_rows_in_set, context.getSettingsRef().max_bytes_in_set, OverflowMode::THROW);
+            Set set(limites, true, context.getSettingsRef().transform_null_in);
+            set.setHeader(stream->getHeader());
+
+            stream->readPrefix();
+            while (Block block = stream->read())
+            {
+                set.insertFromBlock(block);
+            }
+            set.finishInsert();
+            stream->readSuffix();
+
+            set.checkColumnsNumber(1);
+            const auto & set_column = *set.getSetElements()[0];
+            for (size_t row = 0; row < set_column.size(); ++row)
+                res.emplace_back(set_column[row].safeGet<String>());
+        }
+        else
+        {
+            auto evaluated = evaluateConstantExpressionAsLiteral(value, context);
+            const auto * literal = evaluated->as<ASTLiteral>();
+            if (!literal)
+                return false;
+
+            if (String str; literal->value.tryGet(str))
+            {
+                res.emplace_back(str);
+            }
+            else if (Tuple tuple; literal->value.tryGet(tuple))
+            {
+                for (auto element : tuple)
+                    res.emplace_back(element.safeGet<String>());
+            }
+            else
+                return false;
+        }
+
+        return true;
+    }
+    else if (function->name == "equals")
+    {
         const ASTIdentifier * ident;
+        ASTPtr value;
         if ((ident = args.children.at(0)->as<ASTIdentifier>()))
             value = args.children.at(1);
         else if ((ident = args.children.at(1)->as<ASTIdentifier>()))
@@ -86,7 +155,7 @@ static bool extractPathImpl(const IAST & elem, String & res, const Context & con
         if (literal->value.getType() != Field::Types::String)
             return false;
 
-        res = literal->value.safeGet<String>();
+        res.emplace_back(literal->value.safeGet<String>());
         return true;
     }
 
@@ -96,69 +165,69 @@ static bool extractPathImpl(const IAST & elem, String & res, const Context & con
 
 /** Retrieve from the query a condition of the form `path = 'path'`, from conjunctions in the WHERE clause.
   */
-static String extractPath(const ASTPtr & query, const Context & context)
+static Paths extractPath(const ASTPtr & query, const Context & context)
 {
     const auto & select = query->as<ASTSelectQuery &>();
     if (!select.where())
-        return "";
+        return Paths();
 
-    String res;
-    return extractPathImpl(*select.where(), res, context) ? res : "";
+    Paths res;
+    return extractPathImpl(*select.where(), res, context) ? res : Paths();
 }
 
 
 void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const
 {
-    String path = extractPath(query_info.query, context);
-    if (path.empty())
-        throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' in WHERE clause.", ErrorCodes::BAD_ARGUMENTS);
+    const Paths & paths = extractPath(query_info.query, context);
+    if (paths.empty())
+        throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' or path IN ('path1','path2'...) or path IN (subquery) in WHERE clause.", ErrorCodes::BAD_ARGUMENTS);
 
     zkutil::ZooKeeperPtr zookeeper = context.getZooKeeper();
 
-    String path_corrected;
-    /// path should starts with '/', otherwise ZBADARGUMENTS will be thrown in
-    /// ZooKeeper::sendThread and the session will fail.
-    if (path[0] != '/')
-        path_corrected = '/';
-    path_corrected += path;
-    /// In all cases except the root, path must not end with a slash.
-    if (path_corrected != "/" && path_corrected.back() == '/')
-        path_corrected.resize(path_corrected.size() - 1);
-
-    zkutil::Strings nodes = zookeeper->getChildren(path_corrected);
-
-    String path_part = path_corrected;
-    if (path_part == "/")
-        path_part.clear();
-
-    std::vector<std::future<Coordination::GetResponse>> futures;
-    futures.reserve(nodes.size());
-    for (const String & node : nodes)
-        futures.push_back(zookeeper->asyncTryGet(path_part + '/' + node));
-
-    for (size_t i = 0, size = nodes.size(); i < size; ++i)
+    std::unordered_set<String> paths_corrected;
+    for (const auto & path : paths)
     {
-        auto res = futures[i].get();
-        if (res.error == Coordination::Error::ZNONODE)
-            continue;   /// Node was deleted meanwhile.
+        const String & path_corrected = pathCorrected(path);
+        auto [it, inserted] = paths_corrected.emplace(path_corrected);
+        if (!inserted) /// Do not repeat processing.
+            continue;
 
-        const Coordination::Stat & stat = res.stat;
+        zkutil::Strings nodes = zookeeper->getChildren(path_corrected);
 
-        size_t col_num = 0;
-        res_columns[col_num++]->insert(nodes[i]);
-        res_columns[col_num++]->insert(res.data);
-        res_columns[col_num++]->insert(stat.czxid);
-        res_columns[col_num++]->insert(stat.mzxid);
-        res_columns[col_num++]->insert(UInt64(stat.ctime / 1000));
-        res_columns[col_num++]->insert(UInt64(stat.mtime / 1000));
-        res_columns[col_num++]->insert(stat.version);
-        res_columns[col_num++]->insert(stat.cversion);
-        res_columns[col_num++]->insert(stat.aversion);
-        res_columns[col_num++]->insert(stat.ephemeralOwner);
-        res_columns[col_num++]->insert(stat.dataLength);
-        res_columns[col_num++]->insert(stat.numChildren);
-        res_columns[col_num++]->insert(stat.pzxid);
-        res_columns[col_num++]->insert(path);          /// This is the original path. In order to process the request, condition in WHERE should be triggered.
+        String path_part = path_corrected;
+        if (path_part == "/")
+            path_part.clear();
+
+        std::vector<std::future<Coordination::GetResponse>> futures;
+        futures.reserve(nodes.size());
+        for (const String & node : nodes)
+            futures.push_back(zookeeper->asyncTryGet(path_part + '/' + node));
+
+        for (size_t i = 0, size = nodes.size(); i < size; ++i)
+        {
+            auto res = futures[i].get();
+            if (res.error == Coordination::Error::ZNONODE)
+                continue; /// Node was deleted meanwhile.
+
+            const Coordination::Stat & stat = res.stat;
+
+            size_t col_num = 0;
+            res_columns[col_num++]->insert(nodes[i]);
+            res_columns[col_num++]->insert(res.data);
+            res_columns[col_num++]->insert(stat.czxid);
+            res_columns[col_num++]->insert(stat.mzxid);
+            res_columns[col_num++]->insert(UInt64(stat.ctime / 1000));
+            res_columns[col_num++]->insert(UInt64(stat.mtime / 1000));
+            res_columns[col_num++]->insert(stat.version);
+            res_columns[col_num++]->insert(stat.cversion);
+            res_columns[col_num++]->insert(stat.aversion);
+            res_columns[col_num++]->insert(stat.ephemeralOwner);
+            res_columns[col_num++]->insert(stat.dataLength);
+            res_columns[col_num++]->insert(stat.numChildren);
+            res_columns[col_num++]->insert(stat.pzxid);
+            res_columns[col_num++]->insert(
+                path); /// This is the original path. In order to process the request, condition in WHERE should be triggered.
+        }
     }
 }
 
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index b1c6a033d8b..41c20b2714b 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -1,15 +1,21 @@
 #include <Storages/TTLDescription.h>
 
+#include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <Functions/IFunction.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/TreeRewriter.h>
+#include <Interpreters/InDepthNodeVisitor.h>
+#include <Interpreters/addTypeConversionToAST.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTTTLElement.h>
 #include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTAssignment.h>
+#include <Parsers/ASTLiteral.h>
 #include <Storages/ColumnsDescription.h>
 #include <Interpreters/Context.h>
 
+#include <Parsers/queryToString.h>
 
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
@@ -77,6 +83,24 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin
     }
 }
 
+class FindAggregateFunctionData
+{
+public:
+    using TypeToVisit = ASTFunction;
+    bool has_aggregate_function = false;
+
+    void visit(const ASTFunction & func, ASTPtr &)
+    {
+        /// Do not throw if found aggregate function inside another aggregate function,
+        /// because it will be checked, while creating expressions.
+        if (AggregateFunctionFactory::instance().isAggregateFunctionName(func.name))
+            has_aggregate_function = true;
+    }
+};
+
+using FindAggregateFunctionFinderMatcher = OneTypeMatcher<FindAggregateFunctionData>;
+using FindAggregateFunctionVisitor = InDepthNodeVisitor<FindAggregateFunctionFinderMatcher, true>;
+
 }
 
 TTLDescription::TTLDescription(const TTLDescription & other)
@@ -182,11 +206,8 @@ TTLDescription TTLDescription::getTTLFromAST(
             if (ttl_element->group_by_key.size() > pk_columns.size())
                 throw Exception("TTL Expression GROUP BY key should be a prefix of primary key", ErrorCodes::BAD_TTL_EXPRESSION);
 
-            NameSet primary_key_columns_set(pk_columns.begin(), pk_columns.end());
             NameSet aggregation_columns_set;
-
-            for (const auto & column : primary_key.expression->getRequiredColumns())
-                primary_key_columns_set.insert(column);
+            NameSet used_primary_key_columns_set;
 
             for (size_t i = 0; i < ttl_element->group_by_key.size(); ++i)
             {
@@ -194,61 +215,54 @@ TTLDescription TTLDescription::getTTLFromAST(
                     throw Exception(
                         "TTL Expression GROUP BY key should be a prefix of primary key",
                         ErrorCodes::BAD_TTL_EXPRESSION);
+
+                used_primary_key_columns_set.insert(pk_columns[i]);
             }
 
-            for (const auto & [name, value] : ttl_element->group_by_aggregations)
+            std::vector<std::pair<String, ASTPtr>> aggregations;
+            for (const auto & ast : ttl_element->group_by_assignments)
             {
-                if (primary_key_columns_set.count(name))
-                    throw Exception(
-                        "Can not set custom aggregation for column in primary key in TTL Expression",
-                        ErrorCodes::BAD_TTL_EXPRESSION);
+                const auto assignment = ast->as<const ASTAssignment &>();
+                auto expression = assignment.expression();
 
-                aggregation_columns_set.insert(name);
+                FindAggregateFunctionVisitor::Data data{false};
+                FindAggregateFunctionVisitor(data).visit(expression);
+
+                if (!data.has_aggregate_function)
+                    throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
+                    "Invalid expression for assignment of column {}. Should contain an aggregate function", assignment.column_name);
+
+                expression = addTypeConversionToAST(std::move(expression), columns.getPhysical(assignment.column_name).type->getName());
+                aggregations.emplace_back(assignment.column_name, std::move(expression));
+                aggregation_columns_set.insert(assignment.column_name);
             }
 
-            if (aggregation_columns_set.size() != ttl_element->group_by_aggregations.size())
+            if (aggregation_columns_set.size() != ttl_element->group_by_assignments.size())
                 throw Exception(
                     "Multiple aggregations set for one column in TTL Expression",
                     ErrorCodes::BAD_TTL_EXPRESSION);
 
-
             result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size());
 
-            auto aggregations = ttl_element->group_by_aggregations;
+            const auto & primary_key_expressions = primary_key.expression_list_ast->children;
 
-            for (size_t i = 0; i < pk_columns.size(); ++i)
+            /// Wrap with 'any' aggregate function primary key columns,
+            /// which are not in 'GROUP BY' key and was not set explicitly.
+            /// The separate step, because not all primary key columns are ordinary columns.
+            for (size_t i = ttl_element->group_by_key.size(); i < primary_key_expressions.size(); ++i)
             {
-                ASTPtr value = primary_key.expression_list_ast->children[i]->clone();
-
-                if (i >= ttl_element->group_by_key.size())
+                if (!aggregation_columns_set.count(pk_columns[i]))
                 {
-                    ASTPtr value_max = makeASTFunction("max", value->clone());
-                    aggregations.emplace_back(value->getColumnName(), std::move(value_max));
-                }
-
-                if (value->as<ASTFunction>())
-                {
-                    auto syntax_result = TreeRewriter(context).analyze(value, columns.getAllPhysical(), {}, {}, true);
-                    auto expr_actions = ExpressionAnalyzer(value, syntax_result, context).getActions(false);
-                    for (const auto & column : expr_actions->getRequiredColumns())
-                    {
-                        if (i < ttl_element->group_by_key.size())
-                        {
-                            ASTPtr expr = makeASTFunction("any", std::make_shared<ASTIdentifier>(column));
-                            aggregations.emplace_back(column, std::move(expr));
-                        }
-                        else
-                        {
-                            ASTPtr expr = makeASTFunction("argMax", std::make_shared<ASTIdentifier>(column), value->clone());
-                            aggregations.emplace_back(column, std::move(expr));
-                        }
-                    }
+                    ASTPtr expr = makeASTFunction("any", primary_key_expressions[i]->clone());
+                    aggregations.emplace_back(pk_columns[i], std::move(expr));
+                    aggregation_columns_set.insert(pk_columns[i]);
                 }
             }
 
-            for (const auto & column : columns.getAllPhysical())
+            /// Wrap with 'any' aggregate function other columns, which was not set explicitly.
+            for (const auto & column : columns.getOrdinary())
             {
-                if (!primary_key_columns_set.count(column.name) && !aggregation_columns_set.count(column.name))
+                if (!aggregation_columns_set.count(column.name) && !used_primary_key_columns_set.count(column.name))
                 {
                     ASTPtr expr = makeASTFunction("any", std::make_shared<ASTIdentifier>(column.name));
                     aggregations.emplace_back(column.name, std::move(expr));
@@ -280,8 +294,6 @@ TTLDescription TTLDescription::getTTLFromAST(
     }
 
     checkTTLExpression(result.expression, result.result_column);
-
-
     return result;
 }
 
@@ -289,8 +301,10 @@ TTLDescription TTLDescription::getTTLFromAST(
 TTLTableDescription::TTLTableDescription(const TTLTableDescription & other)
  : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
  , rows_ttl(other.rows_ttl)
+ , rows_where_ttl(other.rows_where_ttl)
  , move_ttl(other.move_ttl)
  , recompression_ttl(other.recompression_ttl)
+ , group_by_ttl(other.group_by_ttl)
 {
 }
 
@@ -305,8 +319,10 @@ TTLTableDescription & TTLTableDescription::operator=(const TTLTableDescription &
         definition_ast.reset();
 
     rows_ttl = other.rows_ttl;
+    rows_where_ttl = other.rows_where_ttl;
     move_ttl = other.move_ttl;
     recompression_ttl = other.recompression_ttl;
+    group_by_ttl = other.group_by_ttl;
 
     return *this;
 }
@@ -323,21 +339,33 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
 
     result.definition_ast = definition_ast->clone();
 
-    bool seen_delete_ttl = false;
+    bool have_unconditional_delete_ttl = false;
     for (const auto & ttl_element_ptr : definition_ast->children)
     {
         auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key);
-        if (ttl.mode == TTLMode::DELETE || ttl.mode == TTLMode::GROUP_BY)
+        if (ttl.mode == TTLMode::DELETE)
         {
-            if (seen_delete_ttl)
-                throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION);
-            result.rows_ttl = ttl;
-            seen_delete_ttl = true;
+            if (!ttl.where_expression)
+            {
+                if (have_unconditional_delete_ttl)
+                    throw Exception("More than one DELETE TTL expression without WHERE expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION);
+
+                have_unconditional_delete_ttl = true;
+                result.rows_ttl = ttl;
+            }
+            else
+            {
+                result.rows_where_ttl.emplace_back(std::move(ttl));
+            }
         }
         else if (ttl.mode == TTLMode::RECOMPRESS)
         {
             result.recompression_ttl.emplace_back(std::move(ttl));
         }
+        else if (ttl.mode == TTLMode::GROUP_BY)
+        {
+            result.group_by_ttl.emplace_back(std::move(ttl));
+        }
         else
         {
             result.move_ttl.emplace_back(std::move(ttl));
diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h
index 4b0d4370a70..a2340ad6bcd 100644
--- a/src/Storages/TTLDescription.h
+++ b/src/Storages/TTLDescription.h
@@ -99,14 +99,19 @@ struct TTLTableDescription
     /// ^~~~~~~~~~~~~~~definition~~~~~~~~~~~~~~~^
     ASTPtr definition_ast;
 
-    /// Rows removing TTL
+    /// Unconditional main removing rows TTL. Can be only one for table.
     TTLDescription rows_ttl;
 
+    /// Conditional removing rows TTLs.
+    TTLDescriptions rows_where_ttl;
+
     /// Moving data TTL (to other disks or volumes)
     TTLDescriptions move_ttl;
 
     TTLDescriptions recompression_ttl;
 
+    TTLDescriptions group_by_ttl;
+
     TTLTableDescription() = default;
     TTLTableDescription(const TTLTableDescription & other);
     TTLTableDescription & operator=(const TTLTableDescription & other);
diff --git a/src/Storages/tests/CMakeLists.txt b/src/Storages/tests/CMakeLists.txt
index 292f7603838..b58fed9edf5 100644
--- a/src/Storages/tests/CMakeLists.txt
+++ b/src/Storages/tests/CMakeLists.txt
@@ -29,4 +29,7 @@ target_link_libraries (transform_part_zk_nodes
 if (ENABLE_FUZZING)
     add_executable (mergetree_checksum_fuzzer mergetree_checksum_fuzzer.cpp)
     target_link_libraries (mergetree_checksum_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
+
+    add_executable (columns_description_fuzzer columns_description_fuzzer.cpp)
+    target_link_libraries (columns_description_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
 endif ()
diff --git a/src/Storages/tests/columns_description_fuzzer.cpp b/src/Storages/tests/columns_description_fuzzer.cpp
new file mode 100644
index 00000000000..44fd667ff1c
--- /dev/null
+++ b/src/Storages/tests/columns_description_fuzzer.cpp
@@ -0,0 +1,15 @@
+#include <Storages/ColumnsDescription.h>
+
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
+try
+{
+    using namespace DB;
+    ColumnsDescription columns = ColumnsDescription::parse(std::string(reinterpret_cast<const char *>(data), size));
+    std::cerr << columns.toString() << "\n";
+    return 0;
+}
+catch (...)
+{
+    return 1;
+}
diff --git a/src/Storages/tests/gtest_background_executor.cpp b/src/Storages/tests/gtest_background_executor.cpp
index bf9a305ccc9..0ddf2d9ea2a 100644
--- a/src/Storages/tests/gtest_background_executor.cpp
+++ b/src/Storages/tests/gtest_background_executor.cpp
@@ -32,7 +32,7 @@ protected:
 
     std::optional<JobAndPool> getBackgroundJob() override
     {
-        return JobAndPool{[] { std::this_thread::sleep_for(1s); counter++; }, PoolType::MERGE_MUTATE};
+        return JobAndPool{[] { std::this_thread::sleep_for(1s); counter++; return true; }, PoolType::MERGE_MUTATE};
     }
 };
 
diff --git a/src/Storages/ya.make b/src/Storages/ya.make
index 69e319cbad5..e3e1807c566 100644
--- a/src/Storages/ya.make
+++ b/src/Storages/ya.make
@@ -24,6 +24,7 @@ SRCS(
     KeyDescription.cpp
     LiveView/StorageLiveView.cpp
     LiveView/TemporaryLiveViewCleaner.cpp
+    MemorySettings.cpp
     MergeTree/ActiveDataPartSet.cpp
     MergeTree/AllMergeSelector.cpp
     MergeTree/BackgroundJobsExecutor.cpp
@@ -48,6 +49,7 @@ SRCS(
     MergeTree/MergeTreeDataPartInMemory.cpp
     MergeTree/MergeTreeDataPartTTLInfo.cpp
     MergeTree/MergeTreeDataPartType.cpp
+    MergeTree/MergeTreeDataPartUUID.cpp
     MergeTree/MergeTreeDataPartWide.cpp
     MergeTree/MergeTreeDataPartWriterCompact.cpp
     MergeTree/MergeTreeDataPartWriterInMemory.cpp
diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp
index 4658165735a..594075b1c82 100644
--- a/src/TableFunctions/TableFunctionNumbers.cpp
+++ b/src/TableFunctions/TableFunctionNumbers.cpp
@@ -6,6 +6,7 @@
 #include <Common/typeid_cast.h>
 #include <Storages/System/StorageSystemNumbers.h>
 #include <Interpreters/evaluateConstantExpression.h>
+#include <Interpreters/convertFieldToType.h>
 #include <Interpreters/Context.h>
 #include <DataTypes/DataTypesNumber.h>
 #include "registerTableFunctions.h"
@@ -17,6 +18,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 
@@ -56,7 +58,16 @@ void registerTableFunctionNumbers(TableFunctionFactory & factory)
 template <bool multithreaded>
 UInt64 TableFunctionNumbers<multithreaded>::evaluateArgument(const Context & context, ASTPtr & argument) const
 {
-    return evaluateConstantExpressionOrIdentifierAsLiteral(argument, context)->as<ASTLiteral &>().value.safeGet<UInt64>();
+    const auto & [field, type] = evaluateConstantExpression(argument, context);
+
+    if (!isNativeNumber(type))
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} expression, must be numeric type", type->getName());
+
+    Field converted = convertFieldToType(field, DataTypeUInt64());
+    if (converted.isNull())
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The value {} is not representable as UInt64", applyVisitor(FieldVisitorToString(), field));
+
+    return converted.safeGet<UInt64>();
 }
 
 }
diff --git a/src/ya.make b/src/ya.make
index c3e6b41b9b9..5361c8a5695 100644
--- a/src/ya.make
+++ b/src/ya.make
@@ -9,6 +9,7 @@ PEERDIR(
     clickhouse/src/Columns
     clickhouse/src/Common
     clickhouse/src/Compression
+    clickhouse/src/Coordination
     clickhouse/src/Core
     clickhouse/src/Databases
     clickhouse/src/DataStreams
diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json
index 44b35d61601..0e467319285 100644
--- a/tests/ci/ci_config.json
+++ b/tests/ci/ci_config.json
@@ -261,6 +261,18 @@
                 "with_coverage": false
             }
         },
+        "Functional stateful tests (release, DatabaseReplicated)": {
+            "required_build_properties": {
+                "compiler": "clang-11",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
         "Functional stateless tests (address)": {
             "required_build_properties": {
                 "compiler": "clang-11",
@@ -381,6 +393,18 @@
                 "with_coverage": false
             }
         },
+        "Functional stateless tests (release, DatabaseReplicated)": {
+            "required_build_properties": {
+                "compiler": "clang-11",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
         "Stress test (address)": {
             "required_build_properties": {
                 "compiler": "clang-11",
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index e168f9372de..64a93416c41 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -105,7 +105,9 @@ def remove_control_characters(s):
     s = re.sub(r"[\x00-\x08\x0b\x0e-\x1f\x7f]", "", s)
     return s
 
-def get_db_engine(args):
+def get_db_engine(args, database_name):
+    if args.replicated_database:
+        return " ENGINE=Replicated('/test/clickhouse/db/{}', 's1', 'r1')".format(database_name)
     if args.db_engine:
         return " ENGINE=" + args.db_engine
     return ""   # Will use default engine
@@ -128,13 +130,16 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
 
         clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
         try:
-            clickhouse_proc_create.communicate(("CREATE DATABASE " + database + get_db_engine(args)), timeout=args.timeout)
+            clickhouse_proc_create.communicate(("CREATE DATABASE " + database + get_db_engine(args, database)), timeout=args.timeout)
         except TimeoutExpired:
             total_time = (datetime.now() - start_time).total_seconds()
             return clickhouse_proc_create, "", "Timeout creating database {} before test".format(database), total_time
 
         os.environ["CLICKHOUSE_DATABASE"] = database
 
+    # This is for .sh tests
+    os.environ.setdefault("CLICKHOUSE_LOG_COMMENT", case_file)
+
     params = {
         'client': args.client + ' --database=' + database,
         'logs_level': server_logs_level,
@@ -158,7 +163,12 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
     while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None:
         sleep(0.01)
 
-    if not args.database:
+    need_drop_database = not args.database
+    if need_drop_database and args.no_drop_if_fail:
+        maybe_passed = (proc.returncode == 0) and (proc.stderr is None) and (proc.stdout is None or 'Exception' not in proc.stdout)
+        need_drop_database = not maybe_passed
+
+    if need_drop_database:
         clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
         seconds_left = max(args.timeout - (datetime.now() - start_time).total_seconds(), 10)
         try:
@@ -179,7 +189,8 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
 
     # Normalize randomized database names in stdout, stderr files.
     os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=database, file=stdout_file))
-    os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=database, file=stderr_file))
+    if not args.show_db_name:
+        os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=database, file=stderr_file))
 
     stdout = open(stdout_file, 'rb').read() if os.path.exists(stdout_file) else b''
     stdout = str(stdout, errors='replace', encoding='utf-8')
@@ -345,9 +356,10 @@ def run_tests_array(all_tests_with_params):
                             SERVER_DIED = True
                             break
 
+                    file_suffix = ('.' + str(os.getpid())) if is_concurrent and args.test_runs > 1 else ''
                     reference_file = os.path.join(suite_dir, name) + '.reference'
-                    stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout'
-                    stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr'
+                    stdout_file = os.path.join(suite_tmp_dir, name) + file_suffix + '.stdout'
+                    stderr_file = os.path.join(suite_tmp_dir, name) + file_suffix + '.stderr'
 
                     proc, stdout, stderr, total_time = run_single_test(args, ext, server_logs_level, client_options, case_file, stdout_file, stderr_file)
 
@@ -424,15 +436,26 @@ def run_tests_array(all_tests_with_params):
                                 status += print_test_time(total_time)
                                 status += " - result differs with reference:\n{}\n".format(diff)
                             else:
-                                passed_total += 1
-                                failures_chain = 0
-                                status += MSG_OK
-                                status += print_test_time(total_time)
-                                status += "\n"
-                                if os.path.exists(stdout_file):
-                                    os.remove(stdout_file)
-                                if os.path.exists(stderr_file):
-                                    os.remove(stderr_file)
+                                if args.test_runs > 1 and total_time > 30 and 'long' not in name:
+                                    # We're in Flaky Check mode, check the run time as well while we're at it.
+                                    failures += 1
+                                    failures_chain += 1
+                                    status += MSG_FAIL
+                                    status += print_test_time(total_time)
+                                    status += " - Long test not marked as 'long'"
+                                else:
+                                    passed_total += 1
+                                    failures_chain = 0
+                                    status += MSG_OK
+                                    status += print_test_time(total_time)
+                                    status += "\n"
+                                    if os.path.exists(stdout_file):
+                                        os.remove(stdout_file)
+                                    if os.path.exists(stderr_file):
+                                        os.remove(stderr_file)
+
+            if status and not status.endswith('\n'):
+                status += '\n'
 
             sys.stdout.write(status)
             sys.stdout.flush()
@@ -511,6 +534,8 @@ class BuildFlags():
     RELEASE = 'release-build'
     DATABASE_ORDINARY = 'database-ordinary'
     POLYMORPHIC_PARTS = 'polymorphic-parts'
+    ANTLR = 'antlr'
+    DATABASE_REPLICATED = 'database-replicated'
 
 
 def collect_build_flags(client):
@@ -592,7 +617,9 @@ def main(args):
 
     build_flags = collect_build_flags(args.client)
     if args.antlr:
-        build_flags.append('antlr')
+        build_flags.append(BuildFlags.ANTLR)
+    if args.replicated_database:
+        build_flags.append(BuildFlags.DATABASE_REPLICATED)
 
     if args.use_skip_list:
         tests_to_skip_from_list = collect_tests_to_skip(args.skip_list_path, build_flags)
@@ -645,10 +672,10 @@ def main(args):
 
     if args.database and args.database != "test":
         clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
-        clickhouse_proc_create.communicate(("CREATE DATABASE IF NOT EXISTS " + args.database + get_db_engine(args)))
+        clickhouse_proc_create.communicate(("CREATE DATABASE IF NOT EXISTS " + args.database + get_db_engine(args, args.database)))
 
     clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
-    clickhouse_proc_create.communicate(("CREATE DATABASE IF NOT EXISTS test" + get_db_engine(args)))
+    clickhouse_proc_create.communicate(("CREATE DATABASE IF NOT EXISTS test" + get_db_engine(args, 'test')))
 
     def is_test_from_dir(suite_dir, case):
         case_file = os.path.join(suite_dir, case)
@@ -721,6 +748,7 @@ def main(args):
             all_tests = [case for case in all_tests if is_test_from_dir(suite_dir, case)]
             if args.test:
                 all_tests = [t for t in all_tests if any([re.search(r, t) for r in args.test])]
+            all_tests = all_tests * args.test_runs
             all_tests.sort(key=key_func)
 
             jobs = args.jobs
@@ -891,13 +919,17 @@ if __name__ == '__main__':
     parser.add_argument('--hung-check', action='store_true', default=False)
     parser.add_argument('--force-color', action='store_true', default=False)
     parser.add_argument('--database', help='Database for tests (random name test_XXXXXX by default)')
+    parser.add_argument('--no-drop-if-fail', action='store_true', help='Do not drop database for test if test has failed')
+    parser.add_argument('--show-db-name', action='store_true', help='Do not replace random database name with "default"')
     parser.add_argument('--parallel', default='1/1', help='One parallel test run number/total')
     parser.add_argument('-j', '--jobs', default=1, nargs='?', type=int, help='Run all tests in parallel')
+    parser.add_argument('--test-runs', default=1, nargs='?', type=int, help='Run each test many times (useful for e.g. flaky check)')
     parser.add_argument('-U', '--unified', default=3, type=int, help='output NUM lines of unified context')
     parser.add_argument('-r', '--server-check-retries', default=30, type=int, help='Num of tries to execute SELECT 1 before tests started')
     parser.add_argument('--skip-list-path', help="Path to skip-list file")
     parser.add_argument('--use-skip-list', action='store_true', default=False, help="Use skip list to skip tests if found")
     parser.add_argument('--db-engine', help='Database engine name')
+    parser.add_argument('--replicated-database', action='store_true', default=False, help='Run tests with Replicated database engine')
 
     parser.add_argument('--antlr', action='store_true', default=False, dest='antlr', help='Use new ANTLR parser in tests')
     parser.add_argument('--no-stateless', action='store_true', help='Disable all stateless tests')
diff --git a/tests/config/config.d/test_keeper_port.xml b/tests/config/config.d/test_keeper_port.xml
index 79e993b41f7..97c6d7c2e33 100644
--- a/tests/config/config.d/test_keeper_port.xml
+++ b/tests/config/config.d/test_keeper_port.xml
@@ -1,7 +1,21 @@
 <yandex>
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
-        <operation_timeout_ms>10000</operation_timeout_ms>
-        <session_timeout_ms>30000</session_timeout_ms>
+        <server_id>1</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>10000</operation_timeout_ms>
+            <session_timeout_ms>30000</session_timeout_ms>
+            <snapshot_distance>0</snapshot_distance>
+            <reserved_log_items>0</reserved_log_items>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>localhost</hostname>
+                <port>44444</port>
+            </server>
+        </raft_configuration>
     </test_keeper_server>
 </yandex>
diff --git a/tests/config/executable_dictionary.xml b/tests/config/executable_dictionary.xml
index 50df32e2ec6..c5a4a0947bc 100644
--- a/tests/config/executable_dictionary.xml
+++ b/tests/config/executable_dictionary.xml
@@ -105,4 +105,152 @@
     </structure>
 </dictionary>
 
+<dictionary>
+    <name>simple_executable_cache_dictionary_no_implicit_key</name>
+
+    <structure>
+        <id>
+            <name>id</name>
+            <type>UInt64</type>
+        </id>
+
+        <attribute>
+            <name>value</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+
+    <source>
+        <executable>
+            <command>echo "1\tValue"</command>
+            <format>TabSeparated</format>
+            <implicit_key>false</implicit_key>
+        </executable>
+    </source>
+
+    <layout>
+        <cache>
+            <size_in_cells>10000</size_in_cells>
+        </cache>
+    </layout>
+
+    <lifetime>300</lifetime>
+</dictionary>
+
+<dictionary>
+    <name>simple_executable_cache_dictionary_implicit_key</name>
+
+    <structure>
+        <id>
+            <name>id</name>
+            <type>UInt64</type>
+        </id>
+
+        <attribute>
+            <name>value</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+
+    <source>
+        <executable>
+            <command>echo "Value"</command>
+            <format>TabSeparated</format>
+            <implicit_key>true</implicit_key>
+        </executable>
+    </source>
+
+    <layout>
+        <cache>
+            <size_in_cells>10000</size_in_cells>
+        </cache>
+    </layout>
+
+    <lifetime>300</lifetime>
+</dictionary>
+
+<dictionary>
+    <name>complex_executable_cache_dictionary_no_implicit_key</name>
+
+    <structure>
+        <key>
+            <attribute>
+                <name>id</name>
+                <type>UInt64</type>
+                <null_value></null_value>
+            </attribute>
+            <attribute>
+                <name>id_key</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </key>
+
+        <attribute>
+            <name>value</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+
+    <source>
+        <executable>
+            <command>echo "1\tFirstKey\tValue"</command>
+            <format>TabSeparated</format>
+            <implicit_key>false</implicit_key>
+        </executable>
+    </source>
+
+    <layout>
+        <complex_key_cache>
+            <size_in_cells>10000</size_in_cells>
+        </complex_key_cache>
+    </layout>
+
+    <lifetime>300</lifetime>
+</dictionary>
+
+<dictionary>
+    <name>complex_executable_cache_dictionary_implicit_key</name>
+
+    <structure>
+        <key>
+            <attribute>
+                <name>id</name>
+                <type>UInt64</type>
+                <null_value></null_value>
+            </attribute>
+            <attribute>
+                <name>id_key</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </key>
+
+        <attribute>
+            <name>value</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+
+    <source>
+        <executable>
+            <command>echo "Value"</command>
+            <format>TabSeparated</format>
+            <implicit_key>true</implicit_key>
+        </executable>
+    </source>
+
+    <layout>
+        <complex_key_cache>
+            <size_in_cells>10000</size_in_cells>
+        </complex_key_cache>
+    </layout>
+
+    <lifetime>300</lifetime>
+</dictionary>
+
 </dictionaries>
diff --git a/tests/config/install.sh b/tests/config/install.sh
index 9965e1fb1ad..de6ba2a7a09 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -61,5 +61,8 @@ fi
 if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then
     ln -sf $SRC_PATH/users.d/database_ordinary.xml $DEST_SERVER_PATH/users.d/
 fi
+if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
+    ln -sf $SRC_PATH/users.d/database_replicated.xml $DEST_SERVER_PATH/users.d/
+fi
 
 ln -sf $SRC_PATH/client_config.xml $DEST_CLIENT_PATH/config.xml
diff --git a/tests/config/users.d/database_replicated.xml b/tests/config/users.d/database_replicated.xml
new file mode 100644
index 00000000000..23801d00154
--- /dev/null
+++ b/tests/config/users.d/database_replicated.xml
@@ -0,0 +1,10 @@
+<yandex>
+    <profiles>
+        <default>
+            <allow_experimental_database_replicated>1</allow_experimental_database_replicated>
+            <database_replicated_ddl_output>0</database_replicated_ddl_output>
+            <database_replicated_initial_query_timeout_sec>30</database_replicated_initial_query_timeout_sec>
+            <distributed_ddl_task_timeout>30</distributed_ddl_task_timeout>
+        </default>
+    </profiles>
+</yandex>
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 4d8f3c68025..16ceb823f2e 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -20,9 +20,8 @@ import psycopg2
 import pymongo
 import pymysql
 import requests
-import xml.dom.minidom
+from dict2xml import dict2xml
 from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient
-from dicttoxml import dicttoxml
 from kazoo.client import KazooClient
 from kazoo.exceptions import KazooException
 from minio import Minio
@@ -45,13 +44,13 @@ def _create_env_file(path, variables, fname=DEFAULT_ENV_NAME):
             f.write("=".join([var, value]) + "\n")
     return full_path
 
-def run_and_check(args, env=None, shell=False):
-    res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, shell=shell)
+def run_and_check(args, env=None, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE):
+    res = subprocess.run(args, stdout=stdout, stderr=stderr, env=env, shell=shell)
     if res.returncode != 0:
         # check_call(...) from subprocess does not print stderr, so we do it manually
-        print('Stderr:\n{}\n'.format(res.stderr))
-        print('Stdout:\n{}\n'.format(res.stdout))
-        raise Exception('Command {} return non-zero code {}: {}'.format(args, res.returncode, res.stderr))
+        print('Stderr:\n{}\n'.format(res.stderr.decode('utf-8')))
+        print('Stdout:\n{}\n'.format(res.stdout.decode('utf-8')))
+        raise Exception('Command {} return non-zero code {}: {}'.format(args, res.returncode, res.stderr.decode('utf-8')))
 
 
 def subprocess_check_call(args):
@@ -114,9 +113,9 @@ class ClickHouseCluster:
         self.zookeeper_config_path = p.join(self.base_dir, zookeeper_config_path) if zookeeper_config_path else p.join(
             HELPERS_DIR, 'zookeeper_config.xml')
 
-        self.project_name = pwd.getpwuid(os.getuid()).pw_name + p.basename(self.base_dir) + self.name
+        project_name = pwd.getpwuid(os.getuid()).pw_name + p.basename(self.base_dir) + self.name
         # docker-compose removes everything non-alphanumeric from project names so we do it too.
-        self.project_name = re.sub(r'[^a-z0-9]', '', self.project_name.lower())
+        self.project_name = re.sub(r'[^a-z0-9]', '', project_name.lower())
         self.instances_dir = p.join(self.base_dir, '_instances' + ('' if not self.name else '_' + self.name))
         self.docker_logs_path = p.join(self.instances_dir, 'docker.log')
 
@@ -127,8 +126,8 @@ class ClickHouseCluster:
         self.base_cmd = ['docker-compose']
         if custom_dockerd_host:
             self.base_cmd += ['--host', custom_dockerd_host]
+        self.base_cmd += ['--project-name', self.project_name]
 
-        self.base_cmd += ['--project-directory', self.base_dir, '--project-name', self.project_name]
         self.base_zookeeper_cmd = None
         self.base_mysql_cmd = []
         self.base_kafka_cmd = []
@@ -155,6 +154,7 @@ class ClickHouseCluster:
         self.minio_certs_dir = None
         self.minio_host = "minio1"
         self.minio_bucket = "root"
+        self.minio_bucket_2 = "root2"
         self.minio_port = 9001
         self.minio_client = None  # type: Minio
         self.minio_redirect_host = "proxy1"
@@ -261,25 +261,23 @@ class ClickHouseCluster:
             self.with_zookeeper = True
             self.zookeeper_use_tmpfs = zookeeper_use_tmpfs
             self.base_cmd.extend(['--file', zookeeper_docker_compose_path])
-            self.base_zookeeper_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                       self.project_name, '--file', zookeeper_docker_compose_path]
+            self.base_zookeeper_cmd = ['docker-compose', '--project-name', self.project_name,
+                                       '--file', zookeeper_docker_compose_path]
             cmds.append(self.base_zookeeper_cmd)
 
         if with_mysql and not self.with_mysql:
             self.with_mysql = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')])
-            self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file',
-                                   p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
+            self.base_mysql_cmd = ['docker-compose', '--project-name', self.project_name,
+                                   '--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
 
             cmds.append(self.base_mysql_cmd)
 
         if with_postgres and not self.with_postgres:
             self.with_postgres = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')])
-            self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                      self.project_name, '--file',
-                                      p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
+            self.base_postgres_cmd = ['docker-compose', '--project-name', self.project_name,
+                                      '--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
             cmds.append(self.base_postgres_cmd)
 
         if with_odbc_drivers and not self.with_odbc_drivers:
@@ -287,64 +285,57 @@ class ClickHouseCluster:
             if not self.with_mysql:
                 self.with_mysql = True
                 self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')])
-                self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                       self.project_name, '--file',
-                                       p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
+                self.base_mysql_cmd = ['docker-compose', '--project-name', self.project_name,
+                                       '--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
                 cmds.append(self.base_mysql_cmd)
 
             if not self.with_postgres:
                 self.with_postgres = True
                 self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')])
-                self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                          self.project_name, '--file',
-                                          p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
+                self.base_postgres_cmd = ['docker-compose', '--project-name', self.project_name,
+                                          '--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
                 cmds.append(self.base_postgres_cmd)
 
         if with_kafka and not self.with_kafka:
             self.with_kafka = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')])
-            self.base_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file',
-                                   p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')]
+            self.base_kafka_cmd = ['docker-compose', '--project-name', self.project_name,
+                                   '--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')]
             cmds.append(self.base_kafka_cmd)
 
         if with_kerberized_kafka and not self.with_kerberized_kafka:
             self.with_kerberized_kafka = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')])
-            self.base_kerberized_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')]
+            self.base_kerberized_kafka_cmd = ['docker-compose','--project-name', self.project_name,
+                                              '--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')]
             cmds.append(self.base_kerberized_kafka_cmd)
 
         if with_rabbitmq and not self.with_rabbitmq:
             self.with_rabbitmq = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')])
-            self.base_rabbitmq_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                      self.project_name, '--file',
-                                      p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')]
+            self.base_rabbitmq_cmd = ['docker-compose', '--project-name', self.project_name,
+                                      '--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')]
             cmds.append(self.base_rabbitmq_cmd)
 
         if with_hdfs and not self.with_hdfs:
             self.with_hdfs = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')])
-            self.base_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                  self.project_name, '--file',
-                                  p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')]
+            self.base_hdfs_cmd = ['docker-compose', '--project-name', self.project_name,
+                                  '--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')]
             cmds.append(self.base_hdfs_cmd)
 
         if with_kerberized_hdfs and not self.with_kerberized_hdfs:
             self.with_kerberized_hdfs = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')])
-            self.base_kerberized_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                             self.project_name, '--file',
-                                             p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')]
+            self.base_kerberized_hdfs_cmd = ['docker-compose', '--project-name', self.project_name,
+                                             '--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')]
             cmds.append(self.base_kerberized_hdfs_cmd)
 
         if with_mongo and not self.with_mongo:
             self.with_mongo = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')])
-            self.base_mongo_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file',
-                                   p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]
+            self.base_mongo_cmd = ['docker-compose', '--project-name', self.project_name,
+                                   '--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]
             cmds.append(self.base_mongo_cmd)
 
         if self.with_net_trics:
@@ -354,26 +345,25 @@ class ClickHouseCluster:
         if with_redis and not self.with_redis:
             self.with_redis = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')])
-            self.base_redis_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file',
-                                   p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')]
+            self.base_redis_cmd = ['docker-compose', '--project-name', self.project_name,
+                                   '--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')]
 
         if with_minio and not self.with_minio:
             self.with_minio = True
             self.minio_certs_dir = minio_certs_dir
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')])
-            self.base_minio_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file',
-                                   p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')]
+            self.base_minio_cmd = ['docker-compose', '--project-name', self.project_name,
+                                   '--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')]
             cmds.append(self.base_minio_cmd)
 
         if with_cassandra and not self.with_cassandra:
             self.with_cassandra = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')])
-            self.base_cassandra_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                       self.project_name, '--file',
-                                       p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')]
+            self.base_cassandra_cmd = ['docker-compose', '--project-name', self.project_name,
+                                       '--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')]
 
+        print("Cluster name:{} project_name:{}. Added instance name:{} tag:{} base_cmd:{} docker_compose_yml_dir:{}".format(
+            self.name, self.project_name, name, tag, self.base_cmd, docker_compose_yml_dir))
         return instance
 
     def get_instance_docker_id(self, instance_name):
@@ -408,7 +398,10 @@ class ClickHouseCluster:
         return node
 
     def get_instance_ip(self, instance_name):
+        print("get_instance_ip instance_name={}".format(instance_name))
         docker_id = self.get_instance_docker_id(instance_name)
+        # for cont in self.docker_client.containers.list():
+        #     print("CONTAINERS LIST: ID={} NAME={} STATUS={}".format(cont.id, cont.name, cont.status))
         handle = self.docker_client.containers.get(docker_id)
         return list(handle.attrs['NetworkSettings']['Networks'].values())[0]['IPAddress']
 
@@ -563,17 +556,18 @@ class ClickHouseCluster:
 
                 print("Connected to Minio.")
 
-                if minio_client.bucket_exists(self.minio_bucket):
-                    minio_client.remove_bucket(self.minio_bucket)
+                buckets = [self.minio_bucket, self.minio_bucket_2]
 
-                minio_client.make_bucket(self.minio_bucket)
-
-                print(("S3 bucket '%s' created", self.minio_bucket))
+                for bucket in buckets:
+                    if minio_client.bucket_exists(bucket):
+                        minio_client.remove_bucket(bucket)
+                    minio_client.make_bucket(bucket)
+                    print("S3 bucket '%s' created", bucket)
 
                 self.minio_client = minio_client
                 return
             except Exception as ex:
-                print(("Can't connect to Minio: %s", str(ex)))
+                print("Can't connect to Minio: %s", str(ex))
                 time.sleep(1)
 
         raise Exception("Can't wait Minio to start")
@@ -736,7 +730,7 @@ class ClickHouseCluster:
 
             clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate']
             print(("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd))))
-            subprocess.check_output(clickhouse_start_cmd)
+            subprocess_check_call(clickhouse_start_cmd)
             print("ClickHouse instance created")
 
             start_deadline = time.time() + 20.0  # seconds
@@ -937,7 +931,7 @@ class ClickHouseInstance:
         self.with_cassandra = with_cassandra
 
         self.path = p.join(self.cluster.instances_dir, name)
-        self.docker_compose_path = p.join(self.path, 'docker_compose.yml')
+        self.docker_compose_path = p.join(self.path, 'docker-compose.yml')
         self.env_variables = env_variables or {}
         if with_odbc_drivers:
             self.odbc_ini_path = self.path + "/odbc.ini:/etc/odbc.ini"
@@ -1056,32 +1050,25 @@ class ClickHouseInstance:
         return self.http_query(sql=sql, data=data, params=params, user=user, password=password,
                                expect_fail_and_get_error=True)
 
-    def kill_clickhouse(self, stop_start_wait_sec=5):
-        pid = self.get_process_pid("clickhouse")
-        if not pid:
-            raise Exception("No clickhouse found")
-        self.exec_in_container(["bash", "-c", "kill -9 {}".format(pid)], user='root')
-        time.sleep(stop_start_wait_sec)
-
-    def restore_clickhouse(self, retries=100):
-        pid = self.get_process_pid("clickhouse")
-        if pid:
-            raise Exception("ClickHouse has already started")
-        self.exec_in_container(["bash", "-c", "{} --daemon".format(CLICKHOUSE_START_COMMAND)], user=str(os.getuid()))
-        from helpers.test_tools import assert_eq_with_retry
-        # wait start
-        assert_eq_with_retry(self, "select 1", "1", retry_count=retries)
-
-    def restart_clickhouse(self, stop_start_wait_sec=5, kill=False):
+    def stop_clickhouse(self, start_wait_sec=5, kill=False):
         if not self.stay_alive:
-            raise Exception("clickhouse can be restarted only with stay_alive=True instance")
+            raise Exception("clickhouse can be stopped only with stay_alive=True instance")
 
         self.exec_in_container(["bash", "-c", "pkill {} clickhouse".format("-9" if kill else "")], user='root')
-        time.sleep(stop_start_wait_sec)
+        time.sleep(start_wait_sec)
+
+    def start_clickhouse(self, stop_wait_sec=5):
+        if not self.stay_alive:
+            raise Exception("clickhouse can be started again only with stay_alive=True instance")
+
         self.exec_in_container(["bash", "-c", "{} --daemon".format(CLICKHOUSE_START_COMMAND)], user=str(os.getuid()))
         # wait start
         from helpers.test_tools import assert_eq_with_retry
-        assert_eq_with_retry(self, "select 1", "1", retry_count=int(stop_start_wait_sec / 0.5), sleep_time=0.5)
+        assert_eq_with_retry(self, "select 1", "1", retry_count=int(stop_wait_sec / 0.5), sleep_time=0.5)
+
+    def restart_clickhouse(self, stop_start_wait_sec=5, kill=False):
+        self.stop_clickhouse(stop_start_wait_sec, kill)
+        self.start_clickhouse(stop_start_wait_sec)
 
     def exec_in_container(self, cmd, detach=False, nothrow=False, **kwargs):
         container_id = self.get_docker_handle().id
@@ -1164,14 +1151,14 @@ class ClickHouseInstance:
             if status == 'exited':
                 raise Exception(
                     "Instance `{}' failed to start. Container status: {}, logs: {}".format(self.name, status,
-                                                                                           handle.logs()))
+                                                                                           handle.logs().decode('utf-8')))
 
             current_time = time.time()
             time_left = deadline - current_time
             if deadline is not None and current_time >= deadline:
                 raise Exception("Timed out while waiting for instance `{}' with ip address {} to start. "
                                 "Container status: {}, logs: {}".format(self.name, self.ip_address, status,
-                                                                        handle.logs()))
+                                                                        handle.logs().decode('utf-8')))
 
             # Repeatedly poll the instance address until there is something that listens there.
             # Usually it means that ClickHouse is ready to accept queries.
@@ -1192,8 +1179,8 @@ class ClickHouseInstance:
 
     @staticmethod
     def dict_to_xml(dictionary):
-        xml_str = dicttoxml(dictionary, custom_root="yandex", attr_type=False)
-        return xml.dom.minidom.parseString(xml_str).toprettyxml()
+        xml_str = dict2xml(dictionary, wrap="yandex", indent="  ", newlines=True)
+        return xml_str
 
     @property
     def odbc_drivers(self):
@@ -1419,7 +1406,7 @@ class ClickHouseKiller(object):
         self.clickhouse_node = clickhouse_node
 
     def __enter__(self):
-        self.clickhouse_node.kill_clickhouse()
+        self.clickhouse_node.stop_clickhouse(kill=True)
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        self.clickhouse_node.restore_clickhouse()
+        self.clickhouse_node.start_clickhouse()
diff --git a/tests/integration/helpers/test_tools.py b/tests/integration/helpers/test_tools.py
index 75ae8f67f7a..5fedadd3380 100644
--- a/tests/integration/helpers/test_tools.py
+++ b/tests/integration/helpers/test_tools.py
@@ -38,26 +38,29 @@ class TSV:
     def __str__(self):
         return '\n'.join(self.lines)
 
+    def __len__(self):
+        return len(self.lines)
+
     @staticmethod
     def toMat(contents):
         return [line.split("\t") for line in contents.split("\n") if line.strip()]
 
 
 def assert_eq_with_retry(instance, query, expectation, retry_count=20, sleep_time=0.5, stdin=None, timeout=None,
-                         settings=None, user=None, ignore_error=False):
+                         settings=None, user=None, ignore_error=False, get_result=lambda x: x):
     expectation_tsv = TSV(expectation)
     for i in range(retry_count):
         try:
-            if TSV(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
-                                  ignore_error=ignore_error)) == expectation_tsv:
+            if TSV(get_result(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
+                                  ignore_error=ignore_error))) == expectation_tsv:
                 break
             time.sleep(sleep_time)
         except Exception as ex:
             print(("assert_eq_with_retry retry {} exception {}".format(i + 1, ex)))
             time.sleep(sleep_time)
     else:
-        val = TSV(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
-                                 ignore_error=ignore_error))
+        val = TSV(get_result(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
+                                 ignore_error=ignore_error)))
         if expectation_tsv != val:
             raise AssertionError("'{}' != '{}'\n{}".format(expectation_tsv, val, '\n'.join(
                 expectation_tsv.diff(val, n1="expectation", n2="query"))))
diff --git a/tests/integration/test_access_control_on_cluster/test.py b/tests/integration/test_access_control_on_cluster/test.py
index e804be2c94e..bc740402161 100644
--- a/tests/integration/test_access_control_on_cluster/test.py
+++ b/tests/integration/test_access_control_on_cluster/test.py
@@ -18,22 +18,22 @@ def started_cluster():
 
 
 def test_access_control_on_cluster():
-    ch1.query("CREATE USER Alex ON CLUSTER 'cluster'")
+    ch1.query_with_retry("CREATE USER Alex ON CLUSTER 'cluster'", retry_count=3)
     assert ch1.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
     assert ch2.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
     assert ch3.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
 
-    ch2.query("GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex")
+    ch2.query_with_retry("GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex", retry_count=3)
     assert ch1.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
     assert ch2.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
     assert ch3.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
 
-    ch3.query("REVOKE ON CLUSTER 'cluster' SELECT ON *.* FROM Alex")
+    ch3.query_with_retry("REVOKE ON CLUSTER 'cluster' SELECT ON *.* FROM Alex", retry_count=3)
     assert ch1.query("SHOW GRANTS FOR Alex") == ""
     assert ch2.query("SHOW GRANTS FOR Alex") == ""
     assert ch3.query("SHOW GRANTS FOR Alex") == ""
 
-    ch2.query("DROP USER Alex ON CLUSTER 'cluster'")
+    ch2.query_with_retry("DROP USER Alex ON CLUSTER 'cluster'", retry_count=3)
     assert "There is no user `Alex`" in ch1.query_and_get_error("SHOW CREATE USER Alex")
     assert "There is no user `Alex`" in ch2.query_and_get_error("SHOW CREATE USER Alex")
     assert "There is no user `Alex`" in ch3.query_and_get_error("SHOW CREATE USER Alex")
diff --git a/tests/integration/test_broken_part_during_merge/__init__.py b/tests/integration/test_broken_part_during_merge/__init__.py
new file mode 100644
index 00000000000..e5a0d9b4834
--- /dev/null
+++ b/tests/integration/test_broken_part_during_merge/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/tests/integration/test_broken_part_during_merge/test.py b/tests/integration/test_broken_part_during_merge/test.py
new file mode 100644
index 00000000000..33719166f4a
--- /dev/null
+++ b/tests/integration/test_broken_part_during_merge/test.py
@@ -0,0 +1,61 @@
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from multiprocessing.dummy import Pool
+from helpers.network import PartitionManager
+import time
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance('node1', with_zookeeper=True)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        node1.query('''
+            CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
+            ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') ORDER BY id;
+                '''.format(replica=node1.name))
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+def corrupt_data_part_on_disk(node, table, part_name):
+    part_path = node.query(
+        "SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(table, part_name)).strip()
+    node.exec_in_container(['bash', '-c',
+                            'cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c \'echo "1" >> $1\' -- {{}}'.format(
+                                p=part_path)], privileged=True)
+
+
+def test_merge_and_part_corruption(started_cluster):
+    node1.query("SYSTEM STOP REPLICATION QUEUES replicated_mt")
+    for i in range(4):
+        node1.query("INSERT INTO replicated_mt SELECT toDate('2019-10-01'), number, number * number FROM numbers ({f}, 100000)".format(f=i*100000))
+
+    assert node1.query("SELECT COUNT() FROM system.parts WHERE table='replicated_mt' AND active=1") == "4\n"
+
+    # Need to corrupt "border part" (left or right). If we will corrupt something in the middle
+    # clickhouse will not consider merge as broken, because we have parts with the same min and max
+    # block numbers.
+    corrupt_data_part_on_disk(node1, 'replicated_mt', 'all_3_3_0')
+
+    with Pool(1) as p:
+        def optimize_with_delay(x):
+            node1.query("OPTIMIZE TABLE replicated_mt FINAL", timeout=30)
+
+        # corrupt part after merge already assigned, but not started
+        res_opt = p.apply_async(optimize_with_delay, (1,))
+        node1.query("CHECK TABLE replicated_mt", settings={"check_query_single_value_result": 0})
+        # start merge
+        node1.query("SYSTEM START REPLICATION QUEUES replicated_mt")
+        res_opt.get()
+
+        # will hung if checked bug not fixed
+        node1.query("ALTER TABLE replicated_mt UPDATE value = 7 WHERE 1", settings={"mutations_sync": 2}, timeout=30)
+        assert node1.query("SELECT sum(value) FROM replicated_mt") == "2100000\n"
diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.reference b/tests/integration/test_buffer_profile/__init__.py
similarity index 100%
rename from tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.reference
rename to tests/integration/test_buffer_profile/__init__.py
diff --git a/tests/integration/test_buffer_profile/configs/buffer_profile.xml b/tests/integration/test_buffer_profile/configs/buffer_profile.xml
new file mode 100644
index 00000000000..6ce6de70e63
--- /dev/null
+++ b/tests/integration/test_buffer_profile/configs/buffer_profile.xml
@@ -0,0 +1,3 @@
+<yandex>
+    <buffer_profile>buffer_profile</buffer_profile>
+</yandex>
diff --git a/tests/integration/test_buffer_profile/configs/users.d/buffer_profile.xml b/tests/integration/test_buffer_profile/configs/users.d/buffer_profile.xml
new file mode 100644
index 00000000000..2edd2b63dc6
--- /dev/null
+++ b/tests/integration/test_buffer_profile/configs/users.d/buffer_profile.xml
@@ -0,0 +1,8 @@
+<yandex>
+    <profiles>
+        <buffer_profile>
+            <max_partitions_per_insert_block>1</max_partitions_per_insert_block>
+        </buffer_profile>
+    </profiles>
+</yandex>
+
diff --git a/tests/integration/test_buffer_profile/test.py b/tests/integration/test_buffer_profile/test.py
new file mode 100644
index 00000000000..ae9220898ab
--- /dev/null
+++ b/tests/integration/test_buffer_profile/test.py
@@ -0,0 +1,54 @@
+# pylint: disable=unused-argument
+# pylint: disable=redefined-outer-name
+# pylint: disable=line-too-long
+
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from helpers.client import QueryRuntimeException
+
+cluster = ClickHouseCluster(__file__)
+
+node_default = cluster.add_instance('node_default')
+node_buffer_profile = cluster.add_instance('node_buffer_profile',
+    main_configs=['configs/buffer_profile.xml'],
+    user_configs=['configs/users.d/buffer_profile.xml'])
+
+@pytest.fixture(scope='module', autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+def bootstrap(node):
+    node.query("""
+    CREATE TABLE data (key Int) Engine=MergeTree()
+    ORDER BY key
+    PARTITION BY key % 2;
+
+    CREATE TABLE buffer AS data Engine=Buffer(currentDatabase(), data,
+            /* settings for manual flush only */
+            1,    /* num_layers */
+            10e6, /* min_time, placeholder */
+            10e6, /* max_time, placeholder */
+            0,    /* min_rows   */
+            10e6, /* max_rows   */
+            0,    /* min_bytes  */
+            80e6  /* max_bytes  */
+    );
+
+    INSERT INTO buffer SELECT * FROM numbers(100);
+    """)
+
+def test_default_profile():
+    bootstrap(node_default)
+    # flush the buffer
+    node_default.query('OPTIMIZE TABLE buffer')
+
+def test_buffer_profile():
+    bootstrap(node_buffer_profile)
+    with pytest.raises(QueryRuntimeException, match='Too many partitions for single INSERT block'):
+        # flush the buffer
+        node_buffer_profile.query('OPTIMIZE TABLE buffer')
diff --git a/tests/integration/test_check_table/test.py b/tests/integration/test_check_table/test.py
index 916b2ead7f7..d204f6c5810 100644
--- a/tests/integration/test_check_table/test.py
+++ b/tests/integration/test_check_table/test.py
@@ -13,18 +13,6 @@ def started_cluster():
     try:
         cluster.start()
 
-        for node in [node1, node2]:
-            node.query('''
-            CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
-            ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
-                '''.format(replica=node.name))
-
-        node1.query('''
-            CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
-            ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
-            SETTINGS min_bytes_for_wide_part=0;
-        ''')
-
         yield cluster
 
     finally:
@@ -54,6 +42,14 @@ def remove_part_from_disk(node, table, part_name):
 
 
 def test_check_normal_table_corruption(started_cluster):
+    node1.query("DROP TABLE IF EXISTS non_replicated_mt")
+
+    node1.query('''
+        CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
+        ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
+        SETTINGS min_bytes_for_wide_part=0;
+    ''')
+
     node1.query("INSERT INTO non_replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
     assert node1.query("CHECK TABLE non_replicated_mt PARTITION 201902",
                        settings={"check_query_single_value_result": 0}) == "201902_1_1_0\t1\t\n"
@@ -94,8 +90,14 @@ def test_check_normal_table_corruption(started_cluster):
 
 
 def test_check_replicated_table_simple(started_cluster):
-    node1.query("TRUNCATE TABLE replicated_mt")
-    node2.query("SYSTEM SYNC REPLICA replicated_mt")
+    for node in [node1, node2]:
+        node.query("DROP TABLE IF EXISTS replicated_mt")
+
+        node.query('''
+        CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
+        ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
+            '''.format(replica=node.name))
+
     node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
     node2.query("SYSTEM SYNC REPLICA replicated_mt")
 
@@ -119,34 +121,40 @@ def test_check_replicated_table_simple(started_cluster):
 
 
 def test_check_replicated_table_corruption(started_cluster):
-    node1.query("TRUNCATE TABLE replicated_mt")
-    node2.query("SYSTEM SYNC REPLICA replicated_mt")
-    node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
-    node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)")
-    node2.query("SYSTEM SYNC REPLICA replicated_mt")
+    for node in [node1, node2]:
+        node.query("DROP TABLE IF EXISTS replicated_mt_1")
 
-    assert node1.query("SELECT count() from replicated_mt") == "4\n"
-    assert node2.query("SELECT count() from replicated_mt") == "4\n"
+        node.query('''
+        CREATE TABLE replicated_mt_1(date Date, id UInt32, value Int32)
+        ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_1', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
+            '''.format(replica=node.name))
+
+    node1.query("INSERT INTO replicated_mt_1 VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
+    node1.query("INSERT INTO replicated_mt_1 VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)")
+    node2.query("SYSTEM SYNC REPLICA replicated_mt_1")
+
+    assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
+    assert node2.query("SELECT count() from replicated_mt_1") == "4\n"
 
     part_name = node1.query(
-        "SELECT name from system.parts where table = 'replicated_mt' and partition_id = '201901' and active = 1").strip()
+        "SELECT name from system.parts where table = 'replicated_mt_1' and partition_id = '201901' and active = 1").strip()
 
-    corrupt_data_part_on_disk(node1, "replicated_mt", part_name)
-    assert node1.query("CHECK TABLE replicated_mt PARTITION 201901", settings={
-        "check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and queueing a fetch.\n".format(
+    corrupt_data_part_on_disk(node1, "replicated_mt_1", part_name)
+    assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901", settings={
+        "check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format(
         p=part_name)
 
-    node1.query("SYSTEM SYNC REPLICA replicated_mt")
-    assert node1.query("CHECK TABLE replicated_mt PARTITION 201901",
+    node1.query("SYSTEM SYNC REPLICA replicated_mt_1")
+    assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901",
                        settings={"check_query_single_value_result": 0}) == "{}\t1\t\n".format(part_name)
-    assert node1.query("SELECT count() from replicated_mt") == "4\n"
+    assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
 
-    remove_part_from_disk(node2, "replicated_mt", part_name)
-    assert node2.query("CHECK TABLE replicated_mt PARTITION 201901", settings={
-        "check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and queueing a fetch.\n".format(
+    remove_part_from_disk(node2, "replicated_mt_1", part_name)
+    assert node2.query("CHECK TABLE replicated_mt_1 PARTITION 201901", settings={
+        "check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format(
         p=part_name)
 
-    node1.query("SYSTEM SYNC REPLICA replicated_mt")
-    assert node1.query("CHECK TABLE replicated_mt PARTITION 201901",
+    node1.query("SYSTEM SYNC REPLICA replicated_mt_1")
+    assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901",
                        settings={"check_query_single_value_result": 0}) == "{}\t1\t\n".format(part_name)
-    assert node1.query("SELECT count() from replicated_mt") == "4\n"
+    assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py
index 65bc3828b38..ba5ed9f0758 100644
--- a/tests/integration/test_concurrent_ttl_merges/test.py
+++ b/tests/integration/test_concurrent_ttl_merges/test.py
@@ -2,7 +2,7 @@ import time
 
 import pytest
 from helpers.cluster import ClickHouseCluster
-from helpers.test_tools import assert_eq_with_retry
+from helpers.test_tools import assert_eq_with_retry, TSV
 
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance('node1', main_configs=['configs/fast_background_pool.xml', 'configs/log_conf.xml'], with_zookeeper=True)
@@ -28,12 +28,13 @@ def count_ttl_merges_in_queue(node, table):
     return int(result.strip())
 
 
-def count_ttl_merges_in_background_pool(node, table):
-    result = node.query(
-        "SELECT count() FROM system.merges WHERE merge_type = 'TTL_DELETE' and table = '{}'".format(table))
-    if not result:
-        return 0
-    return int(result.strip())
+def count_ttl_merges_in_background_pool(node, table, level):
+    result = TSV(node.query(
+        "SELECT * FROM system.merges WHERE merge_type = 'TTL_DELETE' and table = '{}'".format(table)))
+    count = len(result)
+    if count >= level:
+        print("count_ttl_merges_in_background_pool: merges more than warn level:\n{}".format(result))
+    return count
 
 
 def count_regular_merges_in_background_pool(node, table):
@@ -67,7 +68,7 @@ def test_no_ttl_merges_in_busy_pool(started_cluster):
 
     while count_running_mutations(node1, "test_ttl") < 6:
         print("Mutations count", count_running_mutations(node1, "test_ttl"))
-        assert count_ttl_merges_in_background_pool(node1, "test_ttl") == 0
+        assert count_ttl_merges_in_background_pool(node1, "test_ttl", 1) == 0
         time.sleep(0.5)
 
     node1.query("SYSTEM START TTL MERGES")
@@ -100,7 +101,7 @@ def test_limited_ttl_merges_in_empty_pool(started_cluster):
 
     merges_with_ttl_count = set({})
     while True:
-        merges_with_ttl_count.add(count_ttl_merges_in_background_pool(node1, "test_ttl_v2"))
+        merges_with_ttl_count.add(count_ttl_merges_in_background_pool(node1, "test_ttl_v2", 3))
         time.sleep(0.01)
         if node1.query("SELECT COUNT() FROM test_ttl_v2") == "0\n":
             break
@@ -124,7 +125,7 @@ def test_limited_ttl_merges_in_empty_pool_replicated(started_cluster):
     merges_with_ttl_count = set({})
     entries_with_ttl_count = set({})
     while True:
-        merges_with_ttl_count.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl"))
+        merges_with_ttl_count.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl", 3))
         entries_with_ttl_count.add(count_ttl_merges_in_queue(node1, "replicated_ttl"))
         time.sleep(0.01)
         if node1.query("SELECT COUNT() FROM replicated_ttl") == "0\n":
@@ -159,8 +160,8 @@ def test_limited_ttl_merges_two_replicas(started_cluster):
     merges_with_ttl_count_node1 = set({})
     merges_with_ttl_count_node2 = set({})
     while True:
-        merges_with_ttl_count_node1.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl_2"))
-        merges_with_ttl_count_node2.add(count_ttl_merges_in_background_pool(node2, "replicated_ttl_2"))
+        merges_with_ttl_count_node1.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl_2", 3))
+        merges_with_ttl_count_node2.add(count_ttl_merges_in_background_pool(node2, "replicated_ttl_2", 3))
         if node1.query("SELECT COUNT() FROM replicated_ttl_2") == "0\n" and node2.query(
                 "SELECT COUNT() FROM replicated_ttl_2") == "0\n":
             break
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py
index 0c69b7f7cbb..81f9db1964b 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -43,13 +43,13 @@ def setup_module(module):
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
     main_configs.append(os.path.join('configs', 'log_conf.xml'))
-   
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries, with_cassandra=True)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -70,14 +70,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_local.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_local.py
index 1adc02ba6aa..3d56746be6e 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_local.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_local.py
@@ -69,14 +69,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
     
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_remote.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_remote.py
index 4e7f307b959..374e620e1c3 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_remote.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_remote.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -47,10 +47,10 @@ def setup_module(module):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
     cluster.add_instance('clickhouse1', main_configs=main_configs)
-    
+
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -71,14 +71,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", list(set(LAYOUTS_SIMPLE).difference(set("cache"))) )
+@pytest.mark.parametrize("layout_name", sorted(list(set(LAYOUTS_SIMPLE).difference(set("cache"))) ))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", list(set(LAYOUTS_COMPLEX).difference(set("complex_key_cache"))))
+@pytest.mark.parametrize("layout_name", sorted(list(set(LAYOUTS_COMPLEX).difference(set("complex_key_cache")))))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_executable_hashed.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_executable_hashed.py
index 03af42bb1d4..dfcc35c54f8 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_executable_hashed.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_executable_hashed.py
@@ -77,6 +77,6 @@ def test_simple(started_cluster, layout_name):
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_file.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_file.py
index f786bda847f..aa81cca466b 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_file.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_file.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -42,13 +42,13 @@ def setup_module(module):
     dictionaries = []
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
-    
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -69,14 +69,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", set(LAYOUTS_SIMPLE).difference({'cache', 'direct'}) )
+@pytest.mark.parametrize("layout_name", sorted(set(LAYOUTS_SIMPLE).difference({'cache', 'direct'})) )
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", list(set(LAYOUTS_COMPLEX).difference({'complex_key_cache', 'complex_key_direct'})))
+@pytest.mark.parametrize("layout_name", sorted(list(set(LAYOUTS_COMPLEX).difference({'complex_key_cache', 'complex_key_direct'}))))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_http.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_http.py
index 80baee5ee45..7c8b5a41b01 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_http.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_http.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -42,7 +42,7 @@ def setup_module(module):
     dictionaries = []
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
-    
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
@@ -50,7 +50,7 @@ def setup_module(module):
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -71,14 +71,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_https.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_https.py
index ccac2cfd268..44950f013b3 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_https.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_https.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -42,7 +42,7 @@ def setup_module(module):
     dictionaries = []
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
-    
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
@@ -50,7 +50,7 @@ def setup_module(module):
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -71,14 +71,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py
index ffa376dcdb3..7d808845854 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -42,13 +42,13 @@ def setup_module(module):
     dictionaries = []
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
-    
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries, with_mongo=True)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -69,14 +69,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py
index 77d16e901a9..7a6b0b7ce8d 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -43,13 +43,13 @@ def setup_module(module):
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
     main_configs.append(os.path.join('configs', 'log_conf.xml'))
-    
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries, with_mysql=True)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -70,14 +70,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml
index 097fc7cf503..74936c04db3 100644
--- a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml
+++ b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml
@@ -11,7 +11,8 @@
                 <table>dep_z</table>
             </clickhouse>
         </source>
-        <lifetime>5</lifetime>
+        <!-- ExternalLoader::PeriodicUpdater::check_period_sec=5 anyway -->
+        <lifetime>4</lifetime>
         <layout>
             <flat/>
         </layout>
diff --git a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml
index 227d87ca92a..ed7f66b1b41 100644
--- a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml
+++ b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml
@@ -11,7 +11,8 @@
                <table>elements</table>
            </clickhouse>
        </source>
-       <lifetime>5</lifetime>
+       <!-- ExternalLoader::PeriodicUpdater::check_period_sec=5 anyway -->
+       <lifetime>4</lifetime>
        <layout>
            <flat/>
        </layout>
diff --git a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml
index 8eff3a6407b..d2d7dff61ad 100644
--- a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml
+++ b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml
@@ -12,7 +12,8 @@
                <invalidate_query>SELECT intDiv(count(), 5) from dict.dep_y</invalidate_query>
            </clickhouse>
        </source>
-       <lifetime>5</lifetime>
+       <!-- ExternalLoader::PeriodicUpdater::check_period_sec=5 anyway -->
+       <lifetime>4</lifetime>
        <layout>
            <flat/>
        </layout>
diff --git a/tests/integration/test_dictionaries_dependency_xml/test.py b/tests/integration/test_dictionaries_dependency_xml/test.py
index d5453bb4814..b8ebcc6cc4b 100644
--- a/tests/integration/test_dictionaries_dependency_xml/test.py
+++ b/tests/integration/test_dictionaries_dependency_xml/test.py
@@ -65,7 +65,7 @@ def test_get_data(started_cluster):
     assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n"
     assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n"
 
-    # dep_x and dep_z are updated only when there `intDiv(count(), 4)`  is changed.
+    # dep_x and dep_z are updated only when there `intDiv(count(), 5)`  is changed.
     query("INSERT INTO test.elements VALUES (4, 'ether', 404, 0.001)")
     assert_eq_with_retry(instance, "SELECT dictHas('dep_x', toUInt64(4))", "1", sleep_time=2, retry_count=10)
     assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "fire\n"
diff --git a/tests/integration/test_distributed_ddl/cluster.py b/tests/integration/test_distributed_ddl/cluster.py
index 811eb94bad4..24f11fec547 100644
--- a/tests/integration/test_distributed_ddl/cluster.py
+++ b/tests/integration/test_distributed_ddl/cluster.py
@@ -10,8 +10,8 @@ from helpers.test_tools import TSV
 
 
 class ClickHouseClusterWithDDLHelpers(ClickHouseCluster):
-    def __init__(self, base_path, config_dir):
-        ClickHouseCluster.__init__(self, base_path)
+    def __init__(self, base_path, config_dir, testcase_name):
+        ClickHouseCluster.__init__(self, base_path, name=testcase_name)
 
         self.test_config_dir = config_dir
 
@@ -104,8 +104,8 @@ class ClickHouseClusterWithDDLHelpers(ClickHouseCluster):
     def ddl_check_there_are_no_dublicates(instance):
         query = "SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/* ddl_entry=query-%' GROUP BY query)"
         rows = instance.query(query)
-        assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}, query {}".format(instance.name,
-                                                                                           instance.ip_address, query)
+        assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}: {}".format(instance.name,
+                                                                                           instance.ip_address, rows)
 
     @staticmethod
     def insert_reliable(instance, query_insert):
diff --git a/tests/integration/test_distributed_ddl/test.py b/tests/integration/test_distributed_ddl/test.py
index f0e78dfec41..58e1d0d06f7 100755
--- a/tests/integration/test_distributed_ddl/test.py
+++ b/tests/integration/test_distributed_ddl/test.py
@@ -14,7 +14,7 @@ from .cluster import ClickHouseClusterWithDDLHelpers
 
 @pytest.fixture(scope="module", params=["configs", "configs_secure"])
 def test_cluster(request):
-    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param)
+    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param, request.param)
 
     try:
         cluster.prepare()
diff --git a/tests/integration/test_distributed_ddl/test_replicated_alter.py b/tests/integration/test_distributed_ddl/test_replicated_alter.py
index bd95f5660b7..148ad5fca5e 100644
--- a/tests/integration/test_distributed_ddl/test_replicated_alter.py
+++ b/tests/integration/test_distributed_ddl/test_replicated_alter.py
@@ -12,7 +12,7 @@ from .cluster import ClickHouseClusterWithDDLHelpers
 
 @pytest.fixture(scope="module", params=["configs", "configs_secure"])
 def test_cluster(request):
-    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param)
+    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param, "alters_" + request.param)
 
     try:
         # TODO: Fix ON CLUSTER alters when nodes have different configs. Need to canonicalize node identity.
diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.reference b/tests/integration/test_insert_distributed_async_extra_dirs/__init__.py
similarity index 100%
rename from tests/queries/0_stateless/01641_memory_tracking_insert_optimize.reference
rename to tests/integration/test_insert_distributed_async_extra_dirs/__init__.py
diff --git a/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml b/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml
new file mode 100644
index 00000000000..1df72377ce6
--- /dev/null
+++ b/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml
@@ -0,0 +1,13 @@
+<yandex>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <replica>
+                    <host>node</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</yandex>
+
diff --git a/tests/integration/test_insert_distributed_async_extra_dirs/test.py b/tests/integration/test_insert_distributed_async_extra_dirs/test.py
new file mode 100644
index 00000000000..8365fce298d
--- /dev/null
+++ b/tests/integration/test_insert_distributed_async_extra_dirs/test.py
@@ -0,0 +1,43 @@
+# pylint: disable=unused-argument
+# pylint: disable=redefined-outer-name
+# pylint: disable=line-too-long
+
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+
+node = cluster.add_instance('node', main_configs=['configs/remote_servers.xml'], stay_alive=True)
+
+@pytest.fixture(scope='module', autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+def test_insert_distributed_async_send_success():
+    node.query('CREATE TABLE data (key Int, value String) Engine=Null()')
+    node.query("""
+    CREATE TABLE dist AS data
+    Engine=Distributed(
+        test_cluster,
+        currentDatabase(),
+        data,
+        key
+    )
+    """)
+
+    node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard10000_replica10000'])
+    node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard10000_replica10000/1.bin'])
+
+    node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard1_replica10000'])
+    node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard1_replica10000/1.bin'])
+
+    node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard10000_replica1'])
+    node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard10000_replica1/1.bin'])
+
+    # will check that clickhouse-server is alive
+    node.restart_clickhouse()
diff --git a/tests/integration/test_insert_distributed_async_send/test.py b/tests/integration/test_insert_distributed_async_send/test.py
index 7f6a2887c3b..b469da4e2e1 100644
--- a/tests/integration/test_insert_distributed_async_send/test.py
+++ b/tests/integration/test_insert_distributed_async_send/test.py
@@ -175,38 +175,43 @@ def test_insert_distributed_async_send_different_header(batch):
     create_tables('insert_distributed_async_send_cluster_two_shards')
 
     node = get_node(batch)
-    node.query("INSERT INTO dist VALUES (0, '')", settings={
+    node.query("INSERT INTO dist VALUES (0, 'f')", settings={
         'prefer_localhost_replica': 0,
     })
-    node.query('ALTER TABLE dist MODIFY COLUMN value Nullable(String)')
-    node.query("INSERT INTO dist VALUES (2, '')", settings={
+    node.query('ALTER TABLE dist MODIFY COLUMN value UInt64')
+    node.query("INSERT INTO dist VALUES (2, 1)", settings={
         'prefer_localhost_replica': 0,
     })
 
+    n1.query('ALTER TABLE data MODIFY COLUMN value UInt64', settings={
+        'mutations_sync': 1,
+    })
+
     if batch:
-        # first batch with Nullable(String)
-        n1.query('ALTER TABLE data MODIFY COLUMN value Nullable(String)', settings={
-            'mutations_sync': 1,
-        })
-        # but only one batch will be sent
-        with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot convert: String to Nullable\(String\)\. Stack trace:"):
+        # but only one batch will be sent, and first is with UInt64 column, so
+        # one rows inserted, and for string ('f') exception will be throw.
+        with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot parse string 'f' as UInt64: syntax error at begin of string"):
             node.query('SYSTEM FLUSH DISTRIBUTED dist')
         assert int(n1.query('SELECT count() FROM data')) == 1
-        # second batch with String
-        n1.query('ALTER TABLE data MODIFY COLUMN value String', settings={
-            'mutations_sync': 1,
-        })
+        # but once underlying column String, implicit conversion will do the
+        # thing, and insert left batch.
+        n1.query("""
+        DROP TABLE data SYNC;
+        CREATE TABLE data (key Int, value String) Engine=MergeTree() ORDER BY key;
+        """)
         node.query('SYSTEM FLUSH DISTRIBUTED dist')
-        assert int(n1.query('SELECT count() FROM data')) == 2
-    else:
-        # first send with String
-        with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot convert: Nullable\(String\) to String\. Stack trace:"):
-            node.query('SYSTEM FLUSH DISTRIBUTED dist')
         assert int(n1.query('SELECT count() FROM data')) == 1
-        # second send with Nullable(String)
-        n1.query('ALTER TABLE data MODIFY COLUMN value Nullable(String)', settings={
-            'mutations_sync': 1,
-        })
+    else:
+        # first send with String ('f'), so zero rows will be inserted
+        with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot parse string 'f' as UInt64: syntax error at begin of string"):
+            node.query('SYSTEM FLUSH DISTRIBUTED dist')
+        assert int(n1.query('SELECT count() FROM data')) == 0
+        # but once underlying column String, implicit conversion will do the
+        # thing, and insert 2 rows (mixed UInt64 and String).
+        n1.query("""
+        DROP TABLE data SYNC;
+        CREATE TABLE data (key Int, value String) Engine=MergeTree() ORDER BY key;
+        """)
         node.query('SYSTEM FLUSH DISTRIBUTED dist')
         assert int(n1.query('SELECT count() FROM data')) == 2
 
diff --git a/tests/integration/test_limited_replicated_fetches/test.py b/tests/integration/test_limited_replicated_fetches/test.py
index 2091c65857e..9b9b8befd67 100644
--- a/tests/integration/test_limited_replicated_fetches/test.py
+++ b/tests/integration/test_limited_replicated_fetches/test.py
@@ -69,3 +69,6 @@ def test_limited_fetches(started_cluster):
 
     assert max([len(parts) for parts in fetches_result]) == 3, "Strange, but we don't utilize max concurrent threads for fetches"
     assert(max(background_fetches_metric)) == 3, "Just checking metric consistent with table"
+
+    node1.query("DROP TABLE IF EXISTS t SYNC")
+    node2.query("DROP TABLE IF EXISTS t SYNC")
\ No newline at end of file
diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
index 8bb1fdb84e0..c9be2387fc7 100644
--- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
+++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
@@ -10,6 +10,7 @@ import random
 
 import threading
 from multiprocessing.dummy import Pool
+from helpers.test_tools import assert_eq_with_retry
 
 def check_query(clickhouse_node, query, result_set, retry_count=60, interval_seconds=3):
     lastest_result = ''
@@ -79,9 +80,9 @@ def dml_with_materialize_mysql_database(clickhouse_node, mysql_node, service_nam
 
     check_query(clickhouse_node, """
         SELECT key, unsigned_tiny_int, tiny_int, unsigned_small_int,
-         small_int, unsigned_medium_int, medium_int, unsigned_int, _int, unsigned_integer, _integer, 
+         small_int, unsigned_medium_int, medium_int, unsigned_int, _int, unsigned_integer, _integer,
          unsigned_bigint, _bigint, unsigned_float, _float, unsigned_double, _double, _varchar, _char, binary_col,
-         _date, _datetime, /* exclude it, because ON UPDATE CURRENT_TIMESTAMP _timestamp, */ 
+         _date, _datetime, /* exclude it, because ON UPDATE CURRENT_TIMESTAMP _timestamp, */
          _bool FROM test_database.test_table_1 ORDER BY key FORMAT TSV
         """,
         "1\t2\t-1\t2\t-2\t3\t-3\t4\t-4\t5\t-5\t6\t-6\t3.2\t-3.2\t3.4\t-3.4\tvarchar\tchar\tbinary\\0\\0\t2020-01-01\t"
@@ -485,7 +486,7 @@ def select_without_columns(clickhouse_node, mysql_node, service_name):
     check_query(clickhouse_node, "SELECT count((_sign, _version)) FROM db.t FORMAT TSV", res[0])
 
     assert clickhouse_node.query("SELECT count(_sign) FROM db.t FORMAT TSV") == res[1]
-    assert clickhouse_node.query("SELECT count(_version) FROM db.t FORMAT TSV") == res[2]
+    assert_eq_with_retry(clickhouse_node, "SELECT count(_version) FROM db.t", res[2].strip(), sleep_time=2, retry_count=3)
 
     assert clickhouse_node.query("SELECT count() FROM db.t FORMAT TSV") == "1\n"
     assert clickhouse_node.query("SELECT count(*) FROM db.t FORMAT TSV") == "1\n"
@@ -720,7 +721,7 @@ def clickhouse_killed_while_insert(clickhouse_node, mysql_node, service_name):
 
     t = threading.Thread(target=insert, args=(1000,))
     t.start()
-    
+
     # TODO: add clickhouse_node.restart_clickhouse(20, kill=False) test
     clickhouse_node.restart_clickhouse(20, kill=True)
     t.join()
@@ -732,3 +733,50 @@ def clickhouse_killed_while_insert(clickhouse_node, mysql_node, service_name):
 
     mysql_node.query("DROP DATABASE kill_clickhouse_while_insert")
     clickhouse_node.query("DROP DATABASE kill_clickhouse_while_insert")
+
+def utf8mb4_test(clickhouse_node, mysql_node, service_name):
+    mysql_node.query("DROP DATABASE IF EXISTS utf8mb4_test")
+    clickhouse_node.query("DROP DATABASE IF EXISTS utf8mb4_test")
+    mysql_node.query("CREATE DATABASE utf8mb4_test")
+    mysql_node.query("CREATE TABLE utf8mb4_test.test (id INT(11) NOT NULL PRIMARY KEY, name VARCHAR(255)) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4")
+    mysql_node.query("INSERT INTO utf8mb4_test.test VALUES(1, '🦄'),(2, '\u2601')")
+    clickhouse_node.query("CREATE DATABASE utf8mb4_test ENGINE = MaterializeMySQL('{}:3306', 'utf8mb4_test', 'root', 'clickhouse')".format(service_name))
+    check_query(clickhouse_node, "SELECT id, name FROM utf8mb4_test.test ORDER BY id", "1\t\U0001F984\n2\t\u2601\n")
+
+def system_parts_test(clickhouse_node, mysql_node, service_name):
+    mysql_node.query("DROP DATABASE IF EXISTS system_parts_test")
+    clickhouse_node.query("DROP DATABASE IF EXISTS system_parts_test")
+    mysql_node.query("CREATE DATABASE system_parts_test")
+    mysql_node.query("CREATE TABLE system_parts_test.test ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;")
+    mysql_node.query("INSERT INTO system_parts_test.test VALUES(1),(2),(3)")
+    def check_active_parts(num):
+        check_query(clickhouse_node, "SELECT count() FROM system.parts WHERE database = 'system_parts_test' AND table = 'test' AND active = 1", "{}\n".format(num))
+    clickhouse_node.query("CREATE DATABASE system_parts_test ENGINE = MaterializeMySQL('{}:3306', 'system_parts_test', 'root', 'clickhouse')".format(service_name))
+    check_active_parts(1)
+    mysql_node.query("INSERT INTO system_parts_test.test VALUES(4),(5),(6)")
+    check_active_parts(2)
+    clickhouse_node.query("OPTIMIZE TABLE system_parts_test.test")
+    check_active_parts(1)
+
+def multi_table_update_test(clickhouse_node, mysql_node, service_name):
+    mysql_node.query("DROP DATABASE IF EXISTS multi_table_update")
+    clickhouse_node.query("DROP DATABASE IF EXISTS multi_table_update")
+    mysql_node.query("CREATE DATABASE multi_table_update")
+    mysql_node.query("CREATE TABLE multi_table_update.a (id INT(11) NOT NULL PRIMARY KEY, value VARCHAR(255))")
+    mysql_node.query("CREATE TABLE multi_table_update.b (id INT(11) NOT NULL PRIMARY KEY, othervalue VARCHAR(255))")
+    mysql_node.query("INSERT INTO multi_table_update.a VALUES(1, 'foo')")
+    mysql_node.query("INSERT INTO multi_table_update.b VALUES(1, 'bar')")
+    clickhouse_node.query("CREATE DATABASE multi_table_update ENGINE = MaterializeMySQL('{}:3306', 'multi_table_update', 'root', 'clickhouse')".format(service_name))
+    check_query(clickhouse_node, "SHOW TABLES FROM multi_table_update", "a\nb\n")
+    mysql_node.query("UPDATE multi_table_update.a, multi_table_update.b SET value='baz', othervalue='quux' where a.id=b.id")
+
+    check_query(clickhouse_node, "SELECT * FROM multi_table_update.a", "1\tbaz\n")
+    check_query(clickhouse_node, "SELECT * FROM multi_table_update.b", "1\tquux\n")
+
+def system_tables_test(clickhouse_node, mysql_node, service_name):
+    mysql_node.query("DROP DATABASE IF EXISTS system_tables_test")
+    clickhouse_node.query("DROP DATABASE IF EXISTS system_tables_test")
+    mysql_node.query("CREATE DATABASE system_tables_test")
+    mysql_node.query("CREATE TABLE system_tables_test.test (id int NOT NULL PRIMARY KEY) ENGINE=InnoDB")
+    clickhouse_node.query("CREATE DATABASE system_tables_test ENGINE = MaterializeMySQL('{}:3306', 'system_tables_test', 'root', 'clickhouse')".format(service_name))
+    check_query(clickhouse_node, "SELECT partition_key, sorting_key, primary_key FROM system.tables WHERE database = 'system_tables_test' AND name = 'test'", "intDiv(id, 4294967)\tid\tid\n")
diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py
index dbd6e894987..ced9a978d02 100644
--- a/tests/integration/test_materialize_mysql_database/test.py
+++ b/tests/integration/test_materialize_mysql_database/test.py
@@ -37,6 +37,12 @@ class MySQLNodeInstance:
         self.docker_compose = docker_compose
         self.project_name = project_name
 
+        self.base_dir = p.dirname(__file__)
+        self.instances_dir = p.join(self.base_dir, '_instances_mysql')
+        if not os.path.exists(self.instances_dir):
+            os.mkdir(self.instances_dir)
+        self.docker_logs_path = p.join(self.instances_dir, 'docker_mysql.log')
+
 
     def alloc_connection(self):
         if self.mysql_connection is None:
@@ -71,10 +77,28 @@ class MySQLNodeInstance:
             cursor.execute(executio_query)
             return cursor.fetchall()
 
+    def start_and_wait(self):
+        run_and_check(['docker-compose',
+            '-p', cluster.project_name,
+            '-f', self.docker_compose,
+            'up', '--no-recreate', '-d',
+        ])
+        self.wait_mysql_to_start(120)
+
     def close(self):
         if self.mysql_connection is not None:
             self.mysql_connection.close()
 
+        with open(self.docker_logs_path, "w+") as f:
+            try:
+                run_and_check([
+                    'docker-compose',
+                    '-p', cluster.project_name,
+                    '-f', self.docker_compose, 'logs',
+                ], stdout=f)
+            except Exception as e:
+                print("Unable to get logs from docker mysql.")
+
     def wait_mysql_to_start(self, timeout=60):
         start = time.time()
         while time.time() - start < timeout:
@@ -95,9 +119,7 @@ def started_mysql_5_7():
     mysql_node = MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', 3308, docker_compose)
 
     try:
-        run_and_check(
-            ['docker-compose', '-p', cluster.project_name, '-f', docker_compose, 'up', '--no-recreate', '-d'])
-        mysql_node.wait_mysql_to_start(120)
+        mysql_node.start_and_wait()
         yield mysql_node
     finally:
         mysql_node.close()
@@ -111,9 +133,7 @@ def started_mysql_8_0():
     mysql_node = MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', 33308, docker_compose)
 
     try:
-        run_and_check(
-            ['docker-compose', '-p', cluster.project_name, '-f', docker_compose, 'up', '--no-recreate', '-d'])
-        mysql_node.wait_mysql_to_start(120)
+        mysql_node.start_and_wait()
         yield mysql_node
     finally:
         mysql_node.close()
@@ -228,3 +248,21 @@ def test_clickhouse_killed_while_insert_5_7(started_cluster, started_mysql_5_7,
 @pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_atomic])
 def test_clickhouse_killed_while_insert_8_0(started_cluster, started_mysql_8_0, clickhouse_node):
     materialize_with_ddl.clickhouse_killed_while_insert(clickhouse_node, started_mysql_8_0, "mysql8_0")
+
+@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
+def test_utf8mb4(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
+    materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql1")
+    materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql8_0")
+
+@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
+def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node):
+    materialize_with_ddl.system_parts_test(clickhouse_node, started_mysql_8_0, "mysql8_0")
+
+@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
+def test_multi_table_update(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
+    materialize_with_ddl.multi_table_update_test(clickhouse_node, started_mysql_5_7, "mysql1")
+    materialize_with_ddl.multi_table_update_test(clickhouse_node, started_mysql_8_0, "mysql8_0")
+
+@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
+def test_system_tables_table(started_cluster, started_mysql_8_0, clickhouse_node):
+    materialize_with_ddl.system_tables_test(clickhouse_node, started_mysql_8_0, "mysql8_0")
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/bg_processing_pool_conf.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/bg_processing_pool_conf.xml
new file mode 100644
index 00000000000..a756c4434ea
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/bg_processing_pool_conf.xml
@@ -0,0 +1,5 @@
+<yandex>
+    <background_processing_pool_thread_sleep_seconds>0.5</background_processing_pool_thread_sleep_seconds>
+    <background_processing_pool_task_sleep_seconds_when_no_work_min>0.5</background_processing_pool_task_sleep_seconds_when_no_work_min>
+    <background_processing_pool_task_sleep_seconds_when_no_work_max>0.5</background_processing_pool_task_sleep_seconds_when_no_work_max>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml
new file mode 100644
index 00000000000..318a6bca95d
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml
@@ -0,0 +1,12 @@
+<yandex>
+    <shutdown_wait_unfinished>3</shutdown_wait_unfinished>
+    <logger>
+        <level>trace</level>
+        <log>/var/log/clickhouse-server/log.log</log>
+        <errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
+        <size>1000M</size>
+        <count>10</count>
+        <stderr>/var/log/clickhouse-server/stderr.log</stderr>
+        <stdout>/var/log/clickhouse-server/stdout.log</stdout>
+    </logger>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf.xml
new file mode 100644
index 00000000000..9361a21efca
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf.xml
@@ -0,0 +1,34 @@
+<yandex>
+    <storage_configuration>
+        <disks>
+            <s3>
+                <type>s3</type>
+                <endpoint>http://minio1:9001/root/data/</endpoint>
+                <access_key_id>minio</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+                <send_metadata>true</send_metadata>
+                <list_object_keys_size>1</list_object_keys_size> <!-- To effectively test restore parallelism -->
+            </s3>
+            <hdd>
+                <type>local</type>
+                <path>/</path>
+            </hdd>
+        </disks>
+        <policies>
+            <s3>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                    <external>
+                        <disk>hdd</disk>
+                    </external>
+                </volumes>
+            </s3>
+        </policies>
+    </storage_configuration>
+
+    <merge_tree>
+        <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
+    </merge_tree>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml
new file mode 100644
index 00000000000..645d1111ab8
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml
@@ -0,0 +1,34 @@
+<yandex>
+    <storage_configuration>
+        <disks>
+            <s3>
+                <type>s3</type>
+                <endpoint>http://minio1:9001/root2/data/</endpoint>
+                <access_key_id>minio</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+                <send_metadata>true</send_metadata>
+                <list_object_keys_size>1</list_object_keys_size> <!-- To effectively test restore parallelism -->
+            </s3>
+            <hdd>
+                <type>local</type>
+                <path>/</path>
+            </hdd>
+        </disks>
+        <policies>
+            <s3>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                    <external>
+                        <disk>hdd</disk>
+                    </external>
+                </volumes>
+            </s3>
+        </policies>
+    </storage_configuration>
+
+    <merge_tree>
+        <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
+    </merge_tree>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket_path.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket_path.xml
new file mode 100644
index 00000000000..42207674c79
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket_path.xml
@@ -0,0 +1,34 @@
+<yandex>
+    <storage_configuration>
+        <disks>
+            <s3>
+                <type>s3</type>
+                <endpoint>http://minio1:9001/root2/another_data/</endpoint>
+                <access_key_id>minio</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+                <send_metadata>true</send_metadata>
+                <list_object_keys_size>1</list_object_keys_size> <!-- To effectively test restore parallelism -->
+            </s3>
+            <hdd>
+                <type>local</type>
+                <path>/</path>
+            </hdd>
+        </disks>
+        <policies>
+            <s3>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                    <external>
+                        <disk>hdd</disk>
+                    </external>
+                </volumes>
+            </s3>
+        </policies>
+    </storage_configuration>
+
+    <merge_tree>
+        <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
+    </merge_tree>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/users.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/users.xml
new file mode 100644
index 00000000000..797113053f4
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/users.xml
@@ -0,0 +1,5 @@
+<yandex>
+    <profiles>
+        <default/>
+    </profiles>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.xml
new file mode 100644
index 00000000000..24b7344df3a
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<yandex>
+    <tcp_port>9000</tcp_port>
+    <listen_host>127.0.0.1</listen_host>
+
+    <openSSL>
+        <client>
+            <cacheSessions>true</cacheSessions>
+            <verificationMode>none</verificationMode>
+            <invalidCertificateHandler>
+                <name>AcceptCertificateHandler</name>
+            </invalidCertificateHandler>
+        </client>
+    </openSSL>
+
+    <max_concurrent_queries>500</max_concurrent_queries>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <path>./clickhouse/</path>
+    <users_config>users.xml</users_config>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py
new file mode 100644
index 00000000000..346d9aced3f
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/test.py
@@ -0,0 +1,313 @@
+import logging
+import random
+import string
+import time
+
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+logging.getLogger().setLevel(logging.INFO)
+logging.getLogger().addHandler(logging.StreamHandler())
+
+
+@pytest.fixture(scope="module")
+def cluster():
+    try:
+        cluster = ClickHouseCluster(__file__)
+        cluster.add_instance("node", main_configs=[
+            "configs/config.d/storage_conf.xml",
+            "configs/config.d/bg_processing_pool_conf.xml",
+            "configs/config.d/log_conf.xml"], user_configs=[], with_minio=True, stay_alive=True)
+        cluster.add_instance("node_another_bucket", main_configs=[
+            "configs/config.d/storage_conf_another_bucket.xml",
+            "configs/config.d/bg_processing_pool_conf.xml",
+            "configs/config.d/log_conf.xml"], user_configs=[], stay_alive=True)
+        cluster.add_instance("node_another_bucket_path", main_configs=[
+            "configs/config.d/storage_conf_another_bucket_path.xml",
+            "configs/config.d/bg_processing_pool_conf.xml",
+            "configs/config.d/log_conf.xml"], user_configs=[], stay_alive=True)
+        logging.info("Starting cluster...")
+        cluster.start()
+        logging.info("Cluster started")
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def random_string(length):
+    letters = string.ascii_letters
+    return ''.join(random.choice(letters) for i in range(length))
+
+
+def generate_values(date_str, count, sign=1):
+    data = [[date_str, sign * (i + 1), random_string(10)] for i in range(count)]
+    data.sort(key=lambda tup: tup[1])
+    return ",".join(["('{}',{},'{}',{})".format(x, y, z, 0) for x, y, z in data])
+
+
+def create_table(node, table_name, additional_settings=None):
+    node.query("CREATE DATABASE IF NOT EXISTS s3 ENGINE = Ordinary")
+
+    create_table_statement = """
+        CREATE TABLE s3.{} (
+            dt Date,
+            id Int64,
+            data String,
+            counter Int64,
+            INDEX min_max (id) TYPE minmax GRANULARITY 3
+        ) ENGINE=MergeTree()
+        PARTITION BY dt
+        ORDER BY (dt, id)
+        SETTINGS
+            storage_policy='s3',
+            old_parts_lifetime=600,
+            index_granularity=512
+        """.format(table_name)
+
+    if additional_settings:
+        create_table_statement += ","
+        create_table_statement += additional_settings
+
+    node.query(create_table_statement)
+
+
+def purge_s3(cluster, bucket):
+    minio = cluster.minio_client
+    for obj in list(minio.list_objects(bucket, recursive=True)):
+        minio.remove_object(bucket, obj.object_name)
+
+
+def drop_s3_metadata(node):
+    node.exec_in_container(['bash', '-c', 'rm -rf /var/lib/clickhouse/disks/s3/*'], user='root')
+
+
+def drop_shadow_information(node):
+    node.exec_in_container(['bash', '-c', 'rm -rf /var/lib/clickhouse/shadow/*'], user='root')
+
+
+def create_restore_file(node, revision=0, bucket=None, path=None):
+    add_restore_option = 'echo -en "{}\n" >> /var/lib/clickhouse/disks/s3/restore'
+    node.exec_in_container(['bash', '-c', add_restore_option.format(revision)], user='root')
+    if bucket:
+        node.exec_in_container(['bash', '-c', add_restore_option.format(bucket)], user='root')
+    if path:
+        node.exec_in_container(['bash', '-c', add_restore_option.format(path)], user='root')
+
+
+def get_revision_counter(node, backup_number):
+    return int(node.exec_in_container(['bash', '-c', 'cat /var/lib/clickhouse/disks/s3/shadow/{}/revision.txt'.format(backup_number)], user='root'))
+
+
+@pytest.fixture(autouse=True)
+def drop_table(cluster):
+    yield
+
+    node_names = ["node", "node_another_bucket", "node_another_bucket_path"]
+
+    for node_name in node_names:
+        node = cluster.instances[node_name]
+        node.query("DROP TABLE IF EXISTS s3.test NO DELAY")
+
+        drop_s3_metadata(node)
+        drop_shadow_information(node)
+
+    buckets = [cluster.minio_bucket, cluster.minio_bucket_2]
+    for bucket in buckets:
+        purge_s3(cluster, bucket)
+
+
+def test_full_restore(cluster):
+    node = cluster.instances["node"]
+
+    create_table(node, "test")
+
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-04', 4096, -1)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096, -1)))
+
+    # To ensure parts have merged
+    node.query("OPTIMIZE TABLE s3.test")
+
+    assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+
+    node.stop_clickhouse()
+    drop_s3_metadata(node)
+    node.start_clickhouse()
+
+    # All data is removed.
+    assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(0)
+
+    node.stop_clickhouse()
+    create_restore_file(node)
+    node.start_clickhouse(10)
+
+    assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+
+
+def test_restore_another_bucket_path(cluster):
+    node = cluster.instances["node"]
+
+    create_table(node, "test")
+
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-04', 4096, -1)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096, -1)))
+
+    # To ensure parts have merged
+    node.query("OPTIMIZE TABLE s3.test")
+
+    assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+
+    node_another_bucket = cluster.instances["node_another_bucket"]
+
+    create_table(node_another_bucket, "test")
+
+    node_another_bucket.stop_clickhouse()
+    create_restore_file(node_another_bucket, bucket="root")
+    node_another_bucket.start_clickhouse(10)
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+
+    node_another_bucket_path = cluster.instances["node_another_bucket_path"]
+
+    create_table(node_another_bucket_path, "test")
+
+    node_another_bucket_path.stop_clickhouse()
+    create_restore_file(node_another_bucket_path, bucket="root2", path="data")
+    node_another_bucket_path.start_clickhouse(10)
+
+    assert node_another_bucket_path.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node_another_bucket_path.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+
+
+def test_restore_different_revisions(cluster):
+    node = cluster.instances["node"]
+
+    create_table(node, "test")
+
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-04', 4096, -1)))
+
+    node.query("ALTER TABLE s3.test FREEZE")
+    revision1 = get_revision_counter(node, 1)
+
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096, -1)))
+
+    node.query("ALTER TABLE s3.test FREEZE")
+    revision2 = get_revision_counter(node, 2)
+
+    # To ensure parts have merged
+    node.query("OPTIMIZE TABLE s3.test")
+
+    node.query("ALTER TABLE s3.test FREEZE")
+    revision3 = get_revision_counter(node, 3)
+
+    assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node.query("SELECT count(*) from system.parts where table = 'test'") == '5\n'
+
+    node_another_bucket = cluster.instances["node_another_bucket"]
+
+    create_table(node_another_bucket, "test")
+
+    # Restore to revision 1 (2 parts).
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    create_restore_file(node_another_bucket, revision=revision1, bucket="root")
+    node_another_bucket.start_clickhouse(10)
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT count(*) from system.parts where table = 'test'") == '2\n'
+
+    # Restore to revision 2 (4 parts).
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    create_restore_file(node_another_bucket, revision=revision2, bucket="root")
+    node_another_bucket.start_clickhouse(10)
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT count(*) from system.parts where table = 'test'") == '4\n'
+
+    # Restore to revision 3 (4 parts + 1 merged).
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    create_restore_file(node_another_bucket, revision=revision3, bucket="root")
+    node_another_bucket.start_clickhouse(10)
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT count(*) from system.parts where table = 'test'") == '5\n'
+
+
+def test_restore_mutations(cluster):
+    node = cluster.instances["node"]
+
+    create_table(node, "test")
+
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096, -1)))
+
+    node.query("ALTER TABLE s3.test FREEZE")
+    revision_before_mutation = get_revision_counter(node, 1)
+
+    node.query("ALTER TABLE s3.test UPDATE counter = 1 WHERE 1", settings={"mutations_sync": 2})
+
+    node.query("ALTER TABLE s3.test FREEZE")
+    revision_after_mutation = get_revision_counter(node, 2)
+
+    node_another_bucket = cluster.instances["node_another_bucket"]
+
+    create_table(node_another_bucket, "test")
+
+    # Restore to revision before mutation.
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    create_restore_file(node_another_bucket, revision=revision_before_mutation, bucket="root")
+    node_another_bucket.start_clickhouse(10)
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT sum(counter) FROM s3.test FORMAT Values") == "({})".format(0)
+
+    # Restore to revision after mutation.
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    create_restore_file(node_another_bucket, revision=revision_after_mutation, bucket="root")
+    node_another_bucket.start_clickhouse(10)
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT sum(counter) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(counter) FROM s3.test WHERE id > 0 FORMAT Values") == "({})".format(4096)
+
+    # Restore to revision in the middle of mutation.
+    # Unfinished mutation should be completed after table startup.
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    revision = (revision_before_mutation + revision_after_mutation) // 2
+    create_restore_file(node_another_bucket, revision=revision, bucket="root")
+    node_another_bucket.start_clickhouse(10)
+
+    # Wait for unfinished mutation completion.
+    time.sleep(3)
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT sum(counter) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(counter) FROM s3.test WHERE id > 0 FORMAT Values") == "({})".format(4096)
diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py
index 9532d4b8ba2..7f7d59674bc 100644
--- a/tests/integration/test_mysql_protocol/test.py
+++ b/tests/integration/test_mysql_protocol/test.py
@@ -217,7 +217,7 @@ def test_mysql_replacement_query(mysql_client, server_address):
         --password=123 -e "select database();"
     '''.format(host=server_address, port=server_port), demux=True)
     assert code == 0
-    assert stdout.decode() == 'database()\ndefault\n'
+    assert stdout.decode() == 'DATABASE()\ndefault\n'
 
     code, (stdout, stderr) = mysql_client.exec_run('''
         mysql --protocol tcp -h {host} -P {port} default -u default
diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py
index 0ec89be9413..6bb6a6ee777 100644
--- a/tests/integration/test_odbc_interaction/test.py
+++ b/tests/integration/test_odbc_interaction/test.py
@@ -262,18 +262,20 @@ def test_sqlite_odbc_cached_dictionary(started_cluster):
     assert_eq_with_retry(node1, "select dictGetUInt8('sqlite3_odbc_cached', 'Z', toUInt64(1))", "12")
 
 
-def test_postgres_odbc_hached_dictionary_with_schema(started_cluster):
+def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster):
     conn = get_postgres_conn()
     cursor = conn.cursor()
+    cursor.execute("truncate table clickhouse.test_table")
     cursor.execute("insert into clickhouse.test_table values(1, 'hello'),(2, 'world')")
     node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed")
     assert_eq_with_retry(node1, "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))", "hello")
     assert_eq_with_retry(node1, "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(2))", "world")
 
 
-def test_postgres_odbc_hached_dictionary_no_tty_pipe_overflow(started_cluster):
+def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster):
     conn = get_postgres_conn()
     cursor = conn.cursor()
+    cursor.execute("truncate table clickhouse.test_table")
     cursor.execute("insert into clickhouse.test_table values(3, 'xxx')")
     for i in range(100):
         try:
@@ -340,3 +342,25 @@ def test_bridge_dies_with_parent(started_cluster):
 
     assert clickhouse_pid is None
     assert bridge_pid is None
+
+
+def test_odbc_postgres_date_data_type(started_cluster):
+    conn = get_postgres_conn();
+    cursor = conn.cursor()
+    cursor.execute("CREATE TABLE IF NOT EXISTS clickhouse.test_date (column1 integer, column2 date)")
+
+    cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, '2020-12-01')")
+    cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, '2020-12-02')")
+    cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, '2020-12-03')")
+    conn.commit()
+
+    node1.query(
+        '''
+        CREATE TABLE test_date (column1 UInt64, column2 Date)
+        ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')''')
+
+    expected = '1\t2020-12-01\n2\t2020-12-02\n3\t2020-12-03\n'
+    result = node1.query('SELECT * FROM test_date');
+    assert(result == expected)
+
+
diff --git a/tests/integration/test_query_deduplication/__init__.py b/tests/integration/test_query_deduplication/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_query_deduplication/configs/deduplication_settings.xml b/tests/integration/test_query_deduplication/configs/deduplication_settings.xml
new file mode 100644
index 00000000000..8369c916848
--- /dev/null
+++ b/tests/integration/test_query_deduplication/configs/deduplication_settings.xml
@@ -0,0 +1,5 @@
+<yandex>
+    <merge_tree>
+        <assign_part_uuids>1</assign_part_uuids>
+    </merge_tree>
+</yandex>
diff --git a/tests/integration/test_query_deduplication/configs/remote_servers.xml b/tests/integration/test_query_deduplication/configs/remote_servers.xml
new file mode 100644
index 00000000000..f12558ca529
--- /dev/null
+++ b/tests/integration/test_query_deduplication/configs/remote_servers.xml
@@ -0,0 +1,24 @@
+<yandex>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>node3</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</yandex>
diff --git a/tests/integration/test_query_deduplication/test.py b/tests/integration/test_query_deduplication/test.py
new file mode 100644
index 00000000000..8d935b98579
--- /dev/null
+++ b/tests/integration/test_query_deduplication/test.py
@@ -0,0 +1,165 @@
+import uuid
+
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import TSV
+
+DUPLICATED_UUID = uuid.uuid4()
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance(
+    'node1',
+    main_configs=['configs/remote_servers.xml', 'configs/deduplication_settings.xml'])
+
+node2 = cluster.add_instance(
+    'node2',
+    main_configs=['configs/remote_servers.xml', 'configs/deduplication_settings.xml'])
+
+node3 = cluster.add_instance(
+    'node3',
+    main_configs=['configs/remote_servers.xml', 'configs/deduplication_settings.xml'])
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def prepare_node(node, parts_uuid=None):
+    node.query("""
+    CREATE TABLE t(_prefix UInt8 DEFAULT 0, key UInt64, value UInt64)
+    ENGINE MergeTree()
+    ORDER BY tuple()
+    PARTITION BY _prefix
+    SETTINGS index_granularity = 1
+    """)
+
+    node.query("""
+    CREATE TABLE d AS t ENGINE=Distributed(test_cluster, default, t)
+    """)
+
+    # Stop merges while populating test data
+    node.query("SYSTEM STOP MERGES")
+
+    # Create 5 parts
+    for i in range(1, 6):
+        node.query("INSERT INTO t VALUES ({}, {}, {})".format(i, i, i))
+
+    node.query("DETACH TABLE t")
+
+    if parts_uuid:
+        for part, part_uuid in parts_uuid:
+            script = """
+            echo -n '{}' > /var/lib/clickhouse/data/default/t/{}/uuid.txt
+            """.format(part_uuid, part)
+            node.exec_in_container(["bash", "-c", script])
+
+    # Attach table back
+    node.query("ATTACH TABLE t")
+
+    # NOTE:
+    # due to absence of the ability to lock part, need to operate on parts with preventin merges
+    # node.query("SYSTEM START MERGES")
+    # node.query("OPTIMIZE TABLE t FINAL")
+
+    print(node.name)
+    print(node.query("SELECT name, uuid, partition FROM system.parts WHERE table = 't' AND active ORDER BY name"))
+
+    assert '5' == node.query("SELECT count() FROM system.parts WHERE table = 't' AND active").strip()
+    if parts_uuid:
+        for part, part_uuid in parts_uuid:
+            assert '1' == node.query(
+                "SELECT count() FROM system.parts WHERE table = 't' AND uuid = '{}' AND active".format(
+                    part_uuid)).strip()
+
+
+@pytest.fixture(scope="module")
+def prepared_cluster(started_cluster):
+    print("duplicated UUID: {}".format(DUPLICATED_UUID))
+    prepare_node(node1, parts_uuid=[("3_3_3_0", DUPLICATED_UUID)])
+    prepare_node(node2, parts_uuid=[("3_3_3_0", DUPLICATED_UUID)])
+    prepare_node(node3)
+
+
+def test_virtual_column(prepared_cluster):
+    # Part containing `key=3` has the same fingerprint on both nodes,
+    #   we expect it to be included only once in the end result.;
+    # select query is using virtucal column _part_fingerprint to filter out part in one shard
+    expected = """
+    1	2
+    2	2
+    3	1
+    4	2
+    5	2
+    """
+    assert TSV(expected) == TSV(node1.query("""
+    SELECT
+        key,
+        count() AS c
+    FROM d
+    WHERE ((_shard_num = 1) AND (_part_uuid != '{}')) OR (_shard_num = 2)
+    GROUP BY key
+    ORDER BY
+        key ASC
+    """.format(DUPLICATED_UUID)))
+
+
+def test_with_deduplication(prepared_cluster):
+    # Part containing `key=3` has the same fingerprint on both nodes,
+    # we expect it to be included only once in the end result
+    expected = """
+1	3
+2	3
+3	2
+4	3
+5	3
+"""
+    assert TSV(expected) == TSV(node1.query(
+        "SET allow_experimental_query_deduplication=1; SELECT key, count() c FROM d GROUP BY key ORDER BY key"))
+
+
+def test_no_merge_with_deduplication(prepared_cluster):
+    # Part containing `key=3` has the same fingerprint on both nodes,
+    # we expect it to be included only once in the end result.
+    # even with distributed_group_by_no_merge=1 the duplicated part should be excluded from the final result
+    expected = """
+1	1
+2	1
+3	1
+4	1
+5	1
+1	1
+2	1
+3	1
+4	1
+5	1
+1	1
+2	1
+4	1
+5	1
+"""
+    assert TSV(expected) == TSV(node1.query("SELECT key, count() c FROM d GROUP BY key ORDER BY key", settings={
+        "allow_experimental_query_deduplication": 1,
+        "distributed_group_by_no_merge": 1,
+    }))
+
+
+def test_without_deduplication(prepared_cluster):
+    # Part containing `key=3` has the same fingerprint on both nodes,
+    # but allow_experimental_query_deduplication is disabled,
+    # so it will not be excluded
+    expected = """
+1	3
+2	3
+3	3
+4	3
+5	3
+"""
+    assert TSV(expected) == TSV(node1.query(
+        "SET allow_experimental_query_deduplication=0; SELECT key, count() c FROM d GROUP BY key ORDER BY key"))
diff --git a/tests/integration/test_quota/configs/users.d/assign_myquota.xml b/tests/integration/test_quota/configs/users.d/assign_myquota_to_default_user.xml
similarity index 100%
rename from tests/integration/test_quota/configs/users.d/assign_myquota.xml
rename to tests/integration/test_quota/configs/users.d/assign_myquota_to_default_user.xml
diff --git a/tests/integration/test_quota/configs/users.d/quota.xml b/tests/integration/test_quota/configs/users.d/myquota.xml
similarity index 100%
rename from tests/integration/test_quota/configs/users.d/quota.xml
rename to tests/integration/test_quota/configs/users.d/myquota.xml
diff --git a/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml b/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml
new file mode 100644
index 00000000000..70f51cfff43
--- /dev/null
+++ b/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml
@@ -0,0 +1,10 @@
+<yandex>
+    <users>
+        <user_with_no_quota>
+            <no_password/>
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+        </user_with_no_quota>
+    </users>
+</yandex>
diff --git a/tests/integration/test_quota/normal_limits.xml b/tests/integration/test_quota/normal_limits.xml
index b7c3a67b5cc..e32043ef5ec 100644
--- a/tests/integration/test_quota/normal_limits.xml
+++ b/tests/integration/test_quota/normal_limits.xml
@@ -8,6 +8,8 @@
 
                 <!-- Normal limits. -->
                 <queries>1000</queries>
+                <query_selects>500</query_selects>
+                <query_inserts>500</query_inserts>
                 <errors>0</errors>
                 <read_rows>1000</read_rows>
                 <result_rows>0</result_rows>
diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py
index 0614150ee07..353d776c0f3 100644
--- a/tests/integration/test_quota/test.py
+++ b/tests/integration/test_quota/test.py
@@ -7,9 +7,10 @@ from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import assert_eq_with_retry, TSV
 
 cluster = ClickHouseCluster(__file__)
-instance = cluster.add_instance('instance', user_configs=["configs/users.d/assign_myquota.xml",
+instance = cluster.add_instance('instance', user_configs=["configs/users.d/assign_myquota_to_default_user.xml",
                                                           "configs/users.d/drop_default_quota.xml",
-                                                          "configs/users.d/quota.xml"])
+                                                          "configs/users.d/myquota.xml",
+                                                          "configs/users.d/user_with_no_quota.xml"])
 
 
 def check_system_quotas(canonical):
@@ -28,7 +29,7 @@ def system_quota_limits(canonical):
 
 def system_quota_usage(canonical):
     canonical_tsv = TSV(canonical)
-    query = "SELECT quota_name, quota_key, duration, queries, max_queries, errors, max_errors, result_rows, max_result_rows," \
+    query = "SELECT quota_name, quota_key, duration, queries, max_queries, query_selects, max_query_selects, query_inserts, max_query_inserts, errors, max_errors, result_rows, max_result_rows," \
             "result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time " \
             "FROM system.quota_usage ORDER BY duration"
     r = TSV(instance.query(query))
@@ -38,7 +39,7 @@ def system_quota_usage(canonical):
 
 def system_quotas_usage(canonical):
     canonical_tsv = TSV(canonical)
-    query = "SELECT quota_name, quota_key, is_current, duration, queries, max_queries, errors, max_errors, result_rows, max_result_rows, " \
+    query = "SELECT quota_name, quota_key, is_current, duration, queries, max_queries, query_selects, max_query_selects, query_inserts, max_query_inserts, errors, max_errors, result_rows, max_result_rows, " \
             "result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time " \
             "FROM system.quotas_usage ORDER BY quota_name, quota_key, duration"
     r = TSV(instance.query(query))
@@ -49,9 +50,11 @@ def system_quotas_usage(canonical):
 def copy_quota_xml(local_file_name, reload_immediately=True):
     script_dir = os.path.dirname(os.path.realpath(__file__))
     instance.copy_file_to_container(os.path.join(script_dir, local_file_name),
-                                    '/etc/clickhouse-server/users.d/quota.xml')
+                                    '/etc/clickhouse-server/users.d/myquota.xml')
     if reload_immediately:
-        instance.query("SYSTEM RELOAD CONFIG")
+         # We use the special user 'user_with_no_quota' here because
+         # we don't want SYSTEM RELOAD CONFIG to mess our quota consuming checks.
+        instance.query("SYSTEM RELOAD CONFIG", user='user_with_no_quota')
 
 
 @pytest.fixture(scope="module", autouse=True)
@@ -71,28 +74,29 @@ def started_cluster():
 @pytest.fixture(autouse=True)
 def reset_quotas_and_usage_info():
     try:
-        yield
-    finally:
         instance.query("DROP QUOTA IF EXISTS qA, qB")
         copy_quota_xml('simpliest.xml')  # To reset usage info.
         copy_quota_xml('normal_limits.xml')
+        yield
+    finally:
+        pass
 
 
 def test_quota_from_users_xml():
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
     system_quotas_usage(
-        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
 
     instance.query("SELECT COUNT() from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 2, 1000, 0, "\\N", 51, "\\N", 208, "\\N", 50, 1000, 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 51, "\\N", 208, "\\N", 50, 1000, 200, "\\N", "\\N"]])
 
 
 def test_simpliest_quota():
@@ -102,11 +106,11 @@ def test_simpliest_quota():
                           "['default']", "[]"]])
     system_quota_limits("")
     system_quota_usage(
-        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
+        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
+        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
 
 
 def test_tracking_quota():
@@ -114,16 +118,16 @@ def test_tracking_quota():
     copy_quota_xml('tracking.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 1, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 1, "\\N", 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
 
     instance.query("SELECT COUNT() from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 2, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 2, "\\N", 2, "\\N", 0, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
 
 
 def test_exceed_quota():
@@ -131,55 +135,55 @@ def test_exceed_quota():
     copy_quota_xml('tiny_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, 1, 0, 1, 0, 1, 0, "\\N", 0, 1, 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, "\\N", 0, 1, 0, "\\N", "\\N"]])
 
     assert re.search("Quota.*has\ been\ exceeded", instance.query_and_get_error("SELECT * from test_table"))
-    system_quota_usage([["myQuota", "default", 31556952, 1, 1, 1, 1, 0, 1, 0, "\\N", 50, 1, 0, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, "\\N", 50, 1, 0, "\\N", "\\N"]])
 
     # Change quota, now the limits are enough to execute queries.
     copy_quota_xml('normal_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 1, 1000, 1, "\\N", 0, "\\N", 0, "\\N", 50, 1000, 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 1, "\\N", 0, "\\N", 0, "\\N", 50, 1000, 0, "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 2, 1000, 1, "\\N", 50, "\\N", 200, "\\N", 100, 1000, 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 1, "\\N", 50, "\\N", 200, "\\N", 100, 1000, 200, "\\N", "\\N"]])
 
 
 def test_add_remove_interval():
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
     # Add interval.
     copy_quota_xml('two_intervals.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']",
                           "[31556952,63113904]", 0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
-                         ["myQuota", 63113904, 1, "\\N", "\\N", "\\N", 30000, "\\N", 20000, 120]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"],
-                        ["myQuota", "default", 63113904, 0, "\\N", 0, "\\N", 0, "\\N", 0, 30000, 0, "\\N", 0, 20000, 120]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
+                         ["myQuota", 63113904, 1, "\\N", "\\N", "\\N", "\\N", "\\N", 30000, "\\N", 20000, 120]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"],
+                        ["myQuota", "default", 63113904, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 30000, 0, "\\N", 0, 20000, 120]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"],
-         ["myQuota", "default", 63113904, 1, "\\N", 0, "\\N", 50, "\\N", 200, 30000, 50, "\\N", 200, 20000, 120]])
+        [["myQuota", "default", 31556952, 1, 1000, 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"],
+         ["myQuota", "default", 63113904, 1, "\\N", 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, 30000, 50, "\\N", 200, 20000, 120]])
 
     # Remove interval.
     copy_quota_xml('normal_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
     system_quota_usage(
-        [["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 2, 1000, 0, "\\N", 100, "\\N", 400, "\\N", 100, 1000, 400, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 100, "\\N", 400, "\\N", 100, 1000, 400, "\\N", "\\N"]])
 
     # Remove all intervals.
     copy_quota_xml('simpliest.xml')
@@ -187,26 +191,26 @@ def test_add_remove_interval():
                           "['default']", "[]"]])
     system_quota_limits("")
     system_quota_usage(
-        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
+        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
+        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
 
     # Add one interval back.
     copy_quota_xml('normal_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
 
 def test_add_remove_quota():
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
     system_quotas_usage(
-        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
     # Add quota.
     copy_quota_xml('two_quotas.xml')
@@ -214,19 +218,19 @@ def test_add_remove_quota():
                           0, "['default']", "[]"],
                          ["myQuota2", "4590510c-4d13-bf21-ec8a-c2187b092e73", "users.xml", "['client_key','user_name']",
                           "[3600,2629746]", 0, "[]", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
-                         ["myQuota2", 3600, 1, "\\N", "\\N", 4000, 400000, 4000, 400000, 60],
-                         ["myQuota2", 2629746, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", 1800]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
+                         ["myQuota2", 3600, 1, "\\N", "\\N", "\\N", "\\N", 4000, 400000, 4000, 400000, 60],
+                         ["myQuota2", 2629746, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", 1800]])
     system_quotas_usage(
-        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
     # Drop quota.
     copy_quota_xml('normal_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
     system_quotas_usage(
-        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
     # Drop all quotas.
     copy_quota_xml('no_quotas.xml')
@@ -238,15 +242,15 @@ def test_add_remove_quota():
     copy_quota_xml('normal_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
     system_quotas_usage(
-        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
 
 def test_reload_users_xml_by_timer():
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
 
     time.sleep(1)  # The modification time of the 'quota.xml' file should be different,
     # because config files are reload by timer only when the modification time is changed.
@@ -255,25 +259,25 @@ def test_reload_users_xml_by_timer():
         ["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", ['user_name'], "[31556952]", 0, "['default']",
          "[]"]])
     assert_eq_with_retry(instance, "SELECT * FROM system.quota_limits",
-                         [["myQuota", 31556952, 0, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
+                         [["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
 
 
 def test_dcl_introspection():
     assert instance.query("SHOW QUOTAS") == "myQuota\n"
     assert instance.query(
-        "SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n"
+        "SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n"
     assert instance.query(
-        "SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n"
+        "SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n"
     assert re.match(
-        "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t1000\\t0\\t\\\\N\\t.*\\t\\\\N\n",
+        "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t500\\t0\\t500\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t1000\\t0\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("SELECT * from test_table")
     assert re.match(
-        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
-    expected_access = "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n"
+    expected_access = "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n"
     assert expected_access in instance.query("SHOW ACCESS")
 
     # Add interval.
@@ -282,8 +286,8 @@ def test_dcl_introspection():
     assert instance.query(
         "SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000, FOR RANDOMIZED INTERVAL 2 year MAX result_bytes = 30000, read_bytes = 20000, execution_time = 120 TO default\n"
     assert re.match(
-        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n"
-        "myQuota\\tdefault\\t.*\\t63113904\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t30000\\t0\\t\\\\N\\t0\\t20000\\t.*\\t120",
+        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n"
+        "myQuota\\tdefault\\t.*\\t63113904\\t0\\t\\\\N\t0\\t\\\\N\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t30000\\t0\\t\\\\N\\t0\\t20000\\t.*\\t120",
         instance.query("SHOW QUOTA"))
 
     # Drop interval, add quota.
@@ -297,7 +301,7 @@ def test_dcl_introspection():
         "SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" \
                                  "CREATE QUOTA myQuota2 KEYED BY client_key, user_name FOR RANDOMIZED INTERVAL 1 hour MAX result_rows = 4000, result_bytes = 400000, read_rows = 4000, read_bytes = 400000, execution_time = 60, FOR INTERVAL 1 month MAX execution_time = 1800\n"
     assert re.match(
-        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     # Drop all quotas.
@@ -315,12 +319,12 @@ def test_dcl_management():
     assert instance.query(
         "SHOW CREATE QUOTA qA") == "CREATE QUOTA qA FOR INTERVAL 5 quarter MAX queries = 123 TO default\n"
     assert re.match(
-        "qA\\t\\t.*\\t39446190\\t0\\t123\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t39446190\\t0\\t123\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("SELECT * from test_table")
     assert re.match(
-        "qA\\t\\t.*\\t39446190\\t1\\t123\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t39446190\\t1\\t123\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query(
@@ -328,37 +332,37 @@ def test_dcl_management():
     assert instance.query(
         "SHOW CREATE QUOTA qA") == "CREATE QUOTA qA FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default\n"
     assert re.match(
-        "qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n"
-        "qA\\t\\t.*\\t39446190\\t1\\t321\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n"
+        "qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("SELECT * from test_table")
     assert re.match(
-        "qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\n"
-        "qA\\t\\t.*\\t39446190\\t2\\t321\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\n"
+        "qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query(
         "ALTER QUOTA qA FOR INTERVAL 15 MONTH NO LIMITS, FOR RANDOMIZED INTERVAL 16 MONTH TRACKING ONLY, FOR INTERVAL 1800 SECOND NO LIMITS")
     assert re.match(
-        "qA\\t\\t.*\\t42075936\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t42075936\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("SELECT * from test_table")
     assert re.match(
-        "qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("ALTER QUOTA qA RENAME TO qB")
     assert instance.query(
         "SHOW CREATE QUOTA qB") == "CREATE QUOTA qB FOR RANDOMIZED INTERVAL 16 month TRACKING ONLY TO default\n"
     assert re.match(
-        "qB\\t\\t.*\\t42075936\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "qB\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("SELECT * from test_table")
     assert re.match(
-        "qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
+        "qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("DROP QUOTA qB")
@@ -367,3 +371,22 @@ def test_dcl_management():
 
 def test_users_xml_is_readonly():
     assert re.search("storage is readonly", instance.query_and_get_error("DROP QUOTA myQuota"))
+
+def test_query_inserts():
+    check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
+                          0, "['default']", "[]"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+    system_quotas_usage(
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+
+    instance.query("INSERT INTO test_table values(1)")
+    system_quota_usage(
+        [["myQuota", "default", 31556952, 1, 1000, 0, 500, 1, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+
+def test_consumption_show_tables_quota():
+    instance.query("SHOW TABLES")
+
+    assert re.match(
+        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t1\\t\\\\N\\t19\\t\\\\N\\t1\\t1000\\t35\\t\\\\N\\t.*\\t\\\\N\n",
+        instance.query("SHOW QUOTA"))
diff --git a/tests/integration/test_quota/tiny_limits.xml b/tests/integration/test_quota/tiny_limits.xml
index 3ab8858738a..4797c360ddd 100644
--- a/tests/integration/test_quota/tiny_limits.xml
+++ b/tests/integration/test_quota/tiny_limits.xml
@@ -8,6 +8,8 @@
 
                 <!-- Tiny limits. -->
                 <queries>1</queries>
+                <query_selects>1</query_selects>
+                <query_inserts>1</query_inserts>
                 <errors>1</errors>
                 <read_rows>1</read_rows>
                 <result_rows>1</result_rows>
diff --git a/tests/integration/test_quota/tracking.xml b/tests/integration/test_quota/tracking.xml
index 47e12bf8005..c5e7c993edc 100644
--- a/tests/integration/test_quota/tracking.xml
+++ b/tests/integration/test_quota/tracking.xml
@@ -8,6 +8,8 @@
 
                 <!-- No limits. Just calculate resource usage for time interval. -->
                 <queries>0</queries>
+                <query_selects>0</query_selects>
+                <query_inserts>0</query_inserts>
                 <errors>0</errors>
                 <read_rows>0</read_rows>
                 <result_rows>0</result_rows>
diff --git a/tests/integration/test_read_temporary_tables_on_failure/test.py b/tests/integration/test_read_temporary_tables_on_failure/test.py
index f7df52f67e9..e62c7c9eaec 100644
--- a/tests/integration/test_read_temporary_tables_on_failure/test.py
+++ b/tests/integration/test_read_temporary_tables_on_failure/test.py
@@ -19,7 +19,7 @@ def start_cluster():
 
 def test_different_versions(start_cluster):
     with pytest.raises(QueryTimeoutExceedException):
-        node.query("SELECT sleep(3)", timeout=1)
+        node.query("SELECT sleepEachRow(3) FROM numbers(10)", timeout=5)
     with pytest.raises(QueryRuntimeException):
         node.query("SELECT 1", settings={'max_concurrent_queries_for_user': 1})
     assert node.contains_in_log('Too many simultaneous queries for user')
diff --git a/tests/integration/test_replicated_database/__init__.py b/tests/integration/test_replicated_database/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml
new file mode 100644
index 00000000000..ebceee3aa5c
--- /dev/null
+++ b/tests/integration/test_replicated_database/configs/config.xml
@@ -0,0 +1,34 @@
+<yandex>
+    <database_atomic_delay_before_drop_table_sec>10</database_atomic_delay_before_drop_table_sec>
+
+    <remote_servers>
+        <cluster>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>main_node</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>dummy_node</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>competing_node</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>snapshotting_node</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>snapshot_recovering_node</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </cluster>
+    </remote_servers>
+</yandex>
diff --git a/tests/integration/test_replicated_database/configs/settings.xml b/tests/integration/test_replicated_database/configs/settings.xml
new file mode 100644
index 00000000000..e0f7e8691e6
--- /dev/null
+++ b/tests/integration/test_replicated_database/configs/settings.xml
@@ -0,0 +1,12 @@
+<yandex>
+    <profiles>
+        <default>
+            <allow_experimental_database_replicated>1</allow_experimental_database_replicated>
+        </default>
+    </profiles>
+    <users>
+        <default>
+            <profile>default</profile>
+        </default>
+    </users>
+</yandex>
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
new file mode 100644
index 00000000000..99e7d6077f8
--- /dev/null
+++ b/tests/integration/test_replicated_database/test.py
@@ -0,0 +1,278 @@
+import time
+import re
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import assert_eq_with_retry, assert_logs_contain
+from helpers.network import PartitionManager
+
+cluster = ClickHouseCluster(__file__)
+
+main_node = cluster.add_instance('main_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 1})
+dummy_node = cluster.add_instance('dummy_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 2})
+competing_node = cluster.add_instance('competing_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 3})
+snapshotting_node = cluster.add_instance('snapshotting_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 1})
+snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 2})
+
+all_nodes = [main_node, dummy_node, competing_node, snapshotting_node, snapshot_recovering_node]
+
+uuid_regex = re.compile("[0-9a-f]{8}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{12}")
+def assert_create_query(nodes, table_name, expected):
+    replace_uuid = lambda x: re.sub(uuid_regex, "uuid", x)
+    query = "show create table {}".format(table_name)
+    for node in nodes:
+        assert_eq_with_retry(node, query, expected, get_result=replace_uuid)
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');")
+        dummy_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');")
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+def test_create_replicated_table(started_cluster):
+    assert "Old syntax is not allowed" in \
+           main_node.query_and_get_error("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/test/tmp', 'r', d, k, 8192);")
+
+    main_node.query("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);")
+
+    expected = "CREATE TABLE testdb.replicated_table\\n(\\n    `d` Date,\\n    `k` UInt64,\\n    `i32` Int32\\n)\\n" \
+               "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\n" \
+               "PARTITION BY toYYYYMM(d)\\nORDER BY k\\nSETTINGS index_granularity = 8192"
+    assert_create_query([main_node, dummy_node], "testdb.replicated_table", expected)
+    # assert without replacing uuid
+    assert main_node.query("show create testdb.replicated_table") == dummy_node.query("show create testdb.replicated_table")
+
+@pytest.mark.parametrize("engine", ['MergeTree', 'ReplicatedMergeTree'])
+def test_simple_alter_table(started_cluster, engine):
+    # test_simple_alter_table
+    name  = "testdb.alter_test_{}".format(engine)
+    main_node.query("CREATE TABLE {} "
+                    "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
+                    "ENGINE = {} PARTITION BY StartDate ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID);".format(name, engine))
+    main_node.query("ALTER TABLE {} ADD COLUMN Added0 UInt32;".format(name))
+    main_node.query("ALTER TABLE {} ADD COLUMN Added2 UInt32;".format(name))
+    main_node.query("ALTER TABLE {} ADD COLUMN Added1 UInt32 AFTER Added0;".format(name))
+    main_node.query("ALTER TABLE {} ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;".format(name))
+    main_node.query("ALTER TABLE {} ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;".format(name))
+    main_node.query("ALTER TABLE {} ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;".format(name))
+
+    full_engine = engine if not "Replicated" in engine else engine + "(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')"
+    expected = "CREATE TABLE {}\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n" \
+               "    `ToDrop` UInt32,\\n    `Added0` UInt32,\\n    `Added1` UInt32,\\n    `Added2` UInt32,\\n" \
+               "    `AddedNested1.A` Array(UInt32),\\n    `AddedNested1.B` Array(UInt64),\\n    `AddedNested1.C` Array(String),\\n" \
+               "    `AddedNested2.A` Array(UInt32),\\n    `AddedNested2.B` Array(UInt64)\\n)\\n" \
+               "ENGINE = {}\\nPARTITION BY StartDate\\nORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)\\n" \
+               "SETTINGS index_granularity = 8192".format(name, full_engine)
+
+    assert_create_query([main_node, dummy_node], name, expected)
+
+    # test_create_replica_after_delay
+    competing_node.query("CREATE DATABASE IF NOT EXISTS testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');")
+
+    name  = "testdb.alter_test_{}".format(engine)
+    main_node.query("ALTER TABLE {} ADD COLUMN Added3 UInt32;".format(name))
+    main_node.query("ALTER TABLE {} DROP COLUMN AddedNested1;".format(name))
+    main_node.query("ALTER TABLE {} RENAME COLUMN Added1 TO AddedNested1;".format(name))
+
+    full_engine = engine if not "Replicated" in engine else engine + "(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')"
+    expected = "CREATE TABLE {}\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n" \
+               "    `ToDrop` UInt32,\\n    `Added0` UInt32,\\n    `AddedNested1` UInt32,\\n    `Added2` UInt32,\\n" \
+               "    `AddedNested2.A` Array(UInt32),\\n    `AddedNested2.B` Array(UInt64),\\n    `Added3` UInt32\\n)\\n" \
+               "ENGINE = {}\\nPARTITION BY StartDate\\nORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)\\n" \
+               "SETTINGS index_granularity = 8192".format(name, full_engine)
+
+    assert_create_query([main_node, dummy_node, competing_node], name, expected)
+
+
+def test_alters_from_different_replicas(started_cluster):
+    # test_alters_from_different_replicas
+    competing_node.query("CREATE DATABASE IF NOT EXISTS testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');")
+
+    main_node.query("CREATE TABLE testdb.concurrent_test "
+                    "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
+                    "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
+
+    main_node.query("CREATE TABLE testdb.dist AS testdb.concurrent_test ENGINE = Distributed(cluster, testdb, concurrent_test, CounterID)")
+
+    dummy_node.stop_clickhouse(kill=True)
+
+    settings = {"distributed_ddl_task_timeout": 10}
+    assert "There are 1 unfinished hosts (0 of them are currently active)" in \
+        competing_node.query_and_get_error("ALTER TABLE testdb.concurrent_test ADD COLUMN Added0 UInt32;", settings=settings)
+    dummy_node.start_clickhouse()
+    main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added2 UInt32;")
+    competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added1 UInt32 AFTER Added0;")
+    main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;")
+    competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;")
+    main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
+
+    expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32,\\n" \
+               "    `Added0` UInt32,\\n    `Added1` UInt32,\\n    `Added2` UInt32,\\n    `AddedNested1.A` Array(UInt32),\\n" \
+               "    `AddedNested1.B` Array(UInt64),\\n    `AddedNested1.C` Array(String),\\n    `AddedNested2.A` Array(UInt32),\\n" \
+               "    `AddedNested2.B` Array(UInt64)\\n)\\n" \
+               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
+
+    assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
+
+    # test_create_replica_after_delay
+    main_node.query("DROP TABLE testdb.concurrent_test")
+    main_node.query("CREATE TABLE testdb.concurrent_test "
+                    "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
+                    "ENGINE = ReplicatedMergeTree ORDER BY CounterID;")
+
+    expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
+               "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192"
+
+    assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
+
+    main_node.query("INSERT INTO testdb.dist (CounterID, StartDate, UserID) SELECT number, addDays(toDate('2020-02-02'), number), intHash32(number) FROM numbers(10)")
+
+    # test_replica_restart
+    main_node.restart_clickhouse()
+
+    expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
+               "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192"
+
+
+    # test_snapshot_and_snapshot_recover
+    snapshotting_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard2', 'replica1');")
+    snapshot_recovering_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard2', 'replica2');")
+    assert_create_query(all_nodes, "testdb.concurrent_test", expected)
+
+    main_node.query("SYSTEM FLUSH DISTRIBUTED testdb.dist")
+    main_node.query("ALTER TABLE testdb.concurrent_test UPDATE StartDate = addYears(StartDate, 1) WHERE 1")
+    res = main_node.query("ALTER TABLE testdb.concurrent_test DELETE WHERE UserID % 2")
+    assert "shard1|replica1" in res and "shard1|replica2" in res and "shard1|replica3" in res
+    assert "shard2|replica1" in res and "shard2|replica2" in res
+
+    expected = "1\t1\tmain_node\n" \
+               "1\t2\tdummy_node\n" \
+               "1\t3\tcompeting_node\n" \
+               "2\t1\tsnapshotting_node\n" \
+               "2\t2\tsnapshot_recovering_node\n"
+    assert main_node.query("SELECT shard_num, replica_num, host_name FROM system.clusters WHERE cluster='testdb'") == expected
+
+    # test_drop_and_create_replica
+    main_node.query("DROP DATABASE testdb SYNC")
+    main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');")
+
+    expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
+               "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192"
+
+    assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
+    assert_create_query(all_nodes, "testdb.concurrent_test", expected)
+
+    for node in all_nodes:
+        node.query("SYSTEM SYNC REPLICA testdb.concurrent_test")
+
+    expected = "0\t2021-02-02\t4249604106\n" \
+               "1\t2021-02-03\t1343103100\n" \
+               "4\t2021-02-06\t3902320246\n" \
+               "7\t2021-02-09\t3844986530\n" \
+               "9\t2021-02-11\t1241149650\n"
+
+    assert_eq_with_retry(dummy_node, "SELECT CounterID, StartDate, UserID FROM testdb.dist ORDER BY CounterID", expected)
+
+def test_recover_staled_replica(started_cluster):
+    main_node.query("CREATE DATABASE recover ENGINE = Replicated('/clickhouse/databases/recover', 'shard1', 'replica1');")
+    started_cluster.get_kazoo_client('zoo1').set('/clickhouse/databases/recover/logs_to_keep', b'10')
+    dummy_node.query("CREATE DATABASE recover ENGINE = Replicated('/clickhouse/databases/recover', 'shard1', 'replica2');")
+
+    settings = {"distributed_ddl_task_timeout": 0}
+    main_node.query("CREATE TABLE recover.t1 (n int) ENGINE=Memory", settings=settings)
+    dummy_node.query("CREATE TABLE recover.t2 (s String) ENGINE=Memory", settings=settings)
+    main_node.query("CREATE TABLE recover.mt1 (n int) ENGINE=MergeTree order by n", settings=settings)
+    dummy_node.query("CREATE TABLE recover.mt2 (n int) ENGINE=MergeTree order by n", settings=settings)
+    main_node.query("CREATE TABLE recover.rmt1 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings)
+    dummy_node.query("CREATE TABLE recover.rmt2 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings)
+    main_node.query("CREATE TABLE recover.rmt3 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings)
+    dummy_node.query("CREATE TABLE recover.rmt5 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings)
+    main_node.query("CREATE DICTIONARY recover.d1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())")
+    dummy_node.query("CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt2' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())")
+
+    for table in ['t1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2', 'rmt3', 'rmt5']:
+        main_node.query("INSERT INTO recover.{} VALUES (42)".format(table))
+    for table in ['t1', 't2', 'mt1', 'mt2']:
+        dummy_node.query("INSERT INTO recover.{} VALUES (42)".format(table))
+    for table in ['rmt1', 'rmt2', 'rmt3', 'rmt5']:
+        main_node.query("SYSTEM SYNC REPLICA recover.{}".format(table))
+
+    with PartitionManager() as pm:
+        pm.drop_instance_zk_connections(dummy_node)
+        dummy_node.query_and_get_error("RENAME TABLE recover.t1 TO recover.m1")
+        main_node.query("RENAME TABLE recover.t1 TO recover.m1", settings=settings)
+        main_node.query("ALTER TABLE recover.mt1  ADD COLUMN m int", settings=settings)
+        main_node.query("ALTER TABLE recover.rmt1 ADD COLUMN m int", settings=settings)
+        main_node.query("RENAME TABLE recover.rmt3 TO recover.rmt4", settings=settings)
+        main_node.query("DROP TABLE recover.rmt5", settings=settings)
+        main_node.query("DROP DICTIONARY recover.d2", settings=settings)
+        main_node.query("CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());", settings=settings)
+
+        main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings)
+        main_node.query("DROP TABLE recover.tmp", settings=settings)
+        main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings)
+        main_node.query("DROP TABLE recover.tmp", settings=settings)
+        main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings)
+        main_node.query("DROP TABLE recover.tmp", settings=settings)
+        main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings)
+
+    assert main_node.query("SELECT name FROM system.tables WHERE database='recover' ORDER BY name") == "d1\nd2\nm1\nmt1\nmt2\nrmt1\nrmt2\nrmt4\nt2\ntmp\n"
+    query = "SELECT name, uuid, create_table_query FROM system.tables WHERE database='recover' ORDER BY name"
+    expected = main_node.query(query)
+    assert_eq_with_retry(dummy_node, query, expected)
+
+    for table in ['m1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2', 'rmt4', 'd1', 'd2']:
+        assert main_node.query("SELECT (*,).1 FROM recover.{}".format(table)) == "42\n"
+    for table in ['t2', 'rmt1', 'rmt2', 'rmt4', 'd1', 'd2', 'mt2']:
+        assert dummy_node.query("SELECT (*,).1 FROM recover.{}".format(table)) == "42\n"
+    for table in ['m1', 'mt1']:
+        assert dummy_node.query("SELECT count() FROM recover.{}".format(table)) == "0\n"
+
+    assert dummy_node.query("SELECT count() FROM system.tables WHERE database='recover_broken_tables'") == "2\n"
+    table = dummy_node.query("SHOW TABLES FROM recover_broken_tables LIKE 'mt1_26_%'").strip()
+    assert dummy_node.query("SELECT (*,).1 FROM recover_broken_tables.{}".format(table)) == "42\n"
+    table = dummy_node.query("SHOW TABLES FROM recover_broken_tables LIKE 'rmt5_26_%'").strip()
+    assert dummy_node.query("SELECT (*,).1 FROM recover_broken_tables.{}".format(table)) == "42\n"
+
+    expected = "Cleaned 4 outdated objects: dropped 1 dictionaries and 1 tables, moved 2 tables"
+    assert_logs_contain(dummy_node, expected)
+
+    dummy_node.query("DROP TABLE recover.tmp")
+    assert_eq_with_retry(main_node, "SELECT count() FROM system.tables WHERE database='recover' AND name='tmp'", "0\n")
+
+def test_startup_without_zk(started_cluster):
+    main_node.query("DROP DATABASE IF EXISTS testdb SYNC")
+    main_node.query("DROP DATABASE IF EXISTS recover SYNC")
+    with PartitionManager() as pm:
+        pm.drop_instance_zk_connections(main_node)
+        err = main_node.query_and_get_error("CREATE DATABASE startup ENGINE = Replicated('/clickhouse/databases/startup', 'shard1', 'replica1');")
+        assert "ZooKeeper" in err
+    main_node.query("CREATE DATABASE startup ENGINE = Replicated('/clickhouse/databases/startup', 'shard1', 'replica1');")
+    #main_node.query("CREATE TABLE startup.rmt (n int) ENGINE=ReplicatedMergeTree order by n")
+    main_node.query("CREATE TABLE startup.rmt (n int) ENGINE=MergeTree order by n")
+    main_node.query("INSERT INTO startup.rmt VALUES (42)")
+    with PartitionManager() as pm:
+        pm.drop_instance_zk_connections(main_node)
+        main_node.restart_clickhouse(stop_start_wait_sec=30)
+        assert main_node.query("SELECT (*,).1 FROM startup.rmt") == "42\n"
+
+    for _ in range(10):
+        try:
+            main_node.query("CREATE TABLE startup.m (n int) ENGINE=Memory")
+            break
+        except:
+            time.sleep(1)
+
+    main_node.query("EXCHANGE TABLES startup.rmt AND startup.m")
+    assert main_node.query("SELECT (*,).1 FROM startup.m") == "42\n"
diff --git a/tests/integration/test_send_crash_reports/test.py b/tests/integration/test_send_crash_reports/test.py
index a3c35ca1537..65d49637b13 100644
--- a/tests/integration/test_send_crash_reports/test.py
+++ b/tests/integration/test_send_crash_reports/test.py
@@ -24,14 +24,17 @@ def started_node():
 
 
 def test_send_segfault(started_node, ):
+    if started_node.is_built_with_thread_sanitizer():
+        pytest.skip("doesn't fit in timeouts for stacktrace generation")
+
     started_node.copy_file_to_container(os.path.join(SCRIPT_DIR, "fake_sentry_server.py"), "/fake_sentry_server.py")
     started_node.exec_in_container(["bash", "-c", "python3 /fake_sentry_server.py > /fake_sentry_server.log 2>&1"], detach=True, user="root")
-    time.sleep(0.5)
+    time.sleep(1)
     started_node.exec_in_container(["bash", "-c", "pkill -11 clickhouse"], user="root")
 
     result = None
     for attempt in range(1, 6):
-        time.sleep(0.25 * attempt)
+        time.sleep(attempt)
         result = started_node.exec_in_container(['cat', fake_sentry_server.RESULT_PATH], user='root')
         if result == 'OK':
             break
diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py
index 3ceef9f25cf..1945875bf53 100644
--- a/tests/integration/test_settings_profile/test.py
+++ b/tests/integration/test_settings_profile/test.py
@@ -46,7 +46,7 @@ def reset_after_test():
 
 
 def test_smoke():
-    # Set settings and constraints via CREATE SETTINGS PROFILE ... TO user 
+    # Set settings and constraints via CREATE SETTINGS PROFILE ... TO user
     instance.query(
         "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin")
     assert instance.query(
@@ -194,13 +194,13 @@ def test_show_profiles():
 
     assert instance.query("SHOW CREATE PROFILE xyz") == "CREATE SETTINGS PROFILE xyz\n"
     assert instance.query(
-        "SHOW CREATE SETTINGS PROFILE default") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n"
+        "SHOW CREATE SETTINGS PROFILE default") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n"
     assert instance.query(
-        "SHOW CREATE PROFILES") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n" \
+        "SHOW CREATE PROFILES") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n" \
                                    "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n" \
                                    "CREATE SETTINGS PROFILE xyz\n"
 
-    expected_access = "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n" \
+    expected_access = "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n" \
                       "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n" \
                       "CREATE SETTINGS PROFILE xyz\n"
     assert expected_access in instance.query("SHOW ACCESS")
@@ -210,7 +210,7 @@ def test_allow_ddl():
     assert "it's necessary to have grant" in instance.query_and_get_error("CREATE TABLE tbl(a Int32) ENGINE=Log", user="robin")
     assert "it's necessary to have grant" in instance.query_and_get_error("GRANT CREATE ON tbl TO robin", user="robin")
     assert "DDL queries are prohibited" in instance.query_and_get_error("CREATE TABLE tbl(a Int32) ENGINE=Log", settings={"allow_ddl": 0})
-    
+
     instance.query("GRANT CREATE ON tbl TO robin")
     instance.query("CREATE TABLE tbl(a Int32) ENGINE=Log", user="robin")
     instance.query("DROP TABLE tbl")
diff --git a/tests/integration/test_storage_kafka/configs/kafka_macros.xml b/tests/integration/test_storage_kafka/configs/kafka_macros.xml
deleted file mode 100644
index 7f6cfb5eb1f..00000000000
--- a/tests/integration/test_storage_kafka/configs/kafka_macros.xml
+++ /dev/null
@@ -1,13 +0,0 @@
-<?xml version="1.0" ?>
-<yandex>
-	<macros>
-        <kafka_broker>kafka1</kafka_broker>
-        <kafka_topic_old>old</kafka_topic_old>
-        <kafka_group_name_old>old</kafka_group_name_old>
-
-        <kafka_topic_new>new</kafka_topic_new>
-        <kafka_group_name_new>new</kafka_group_name_new>
-        <kafka_client_id>instance</kafka_client_id>
-        <kafka_format_json_each_row>JSONEachRow</kafka_format_json_each_row>
-	</macros>
-</yandex>
\ No newline at end of file
diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py
index 1f31cbdbbc7..5f2726832cc 100644
--- a/tests/integration/test_storage_kafka/test.py
+++ b/tests/integration/test_storage_kafka/test.py
@@ -39,9 +39,16 @@ from . import social_pb2
 
 cluster = ClickHouseCluster(__file__)
 instance = cluster.add_instance('instance',
-                                main_configs=['configs/kafka.xml', 'configs/log_conf.xml', 'configs/kafka_macros.xml'],
+                                main_configs=['configs/kafka.xml', 'configs/log_conf.xml'],
                                 with_kafka=True,
                                 with_zookeeper=True,
+                                macros={"kafka_broker":"kafka1",
+                                        "kafka_topic_old":"old",
+                                        "kafka_group_name_old":"old",
+                                        "kafka_topic_new":"new",
+                                        "kafka_group_name_new":"new",
+                                        "kafka_client_id":"instance",
+                                        "kafka_format_json_each_row":"JSONEachRow"},
                                 clickhouse_path_dir='clickhouse_path')
 kafka_id = ''
 
@@ -1732,6 +1739,11 @@ def test_kafka_produce_key_timestamp(kafka_cluster):
 
 @pytest.mark.timeout(600)
 def test_kafka_flush_by_time(kafka_cluster):
+    admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092")
+    topic_list = []
+    topic_list.append(NewTopic(name="flush_by_time", num_partitions=1, replication_factor=1))
+    admin_client.create_topics(new_topics=topic_list, validate_only=False)
+
     instance.query('''
         DROP TABLE IF EXISTS test.view;
         DROP TABLE IF EXISTS test.consumer;
@@ -1771,7 +1783,7 @@ def test_kafka_flush_by_time(kafka_cluster):
 
     time.sleep(18)
 
-    result = instance.query('SELECT uniqExact(ts) = 2, count() > 15 FROM test.view')
+    result = instance.query('SELECT uniqExact(ts) = 2, count() >= 15 FROM test.view')
 
     cancel.set()
     kafka_thread.join()
@@ -2357,9 +2369,9 @@ def test_premature_flush_on_eof(kafka_cluster):
     ''')
 
     # messages created here will be consumed immedeately after MV creation
-    # reaching topic EOF. 
+    # reaching topic EOF.
     # But we should not do flush immedeately after reaching EOF, because
-    # next poll can return more data, and we should respect kafka_flush_interval_ms 
+    # next poll can return more data, and we should respect kafka_flush_interval_ms
     # and try to form bigger block
     messages = [json.dumps({'key': j + 1, 'value': j + 1}) for j in range(1)]
     kafka_produce('premature_flush_on_eof', messages)
@@ -2379,11 +2391,11 @@ def test_premature_flush_on_eof(kafka_cluster):
 
     # all subscriptions/assignments done during select, so it start sending data to test.destination
     # immediately after creation of MV
-    
+
     time.sleep(1.5) # that sleep is needed to ensure that first poll finished, and at least one 'empty' polls happened.
                   # Empty poll before the fix were leading to premature flush.
-                  # TODO: wait for messages in log: "Polled batch of 1 messages", followed by "Stalled"  
-    
+                  # TODO: wait for messages in log: "Polled batch of 1 messages", followed by "Stalled"
+
     # produce more messages after delay
     kafka_produce('premature_flush_on_eof', messages)
 
diff --git a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh
index 38f098ae1e1..971491d4053 100755
--- a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh
+++ b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh
@@ -34,11 +34,11 @@ cat >> /usr/local/hadoop/etc/hadoop/core-site.xml << EOF
    </property>
   <property>
     <name>fs.defaultFS</name>
-    <value>hdfs://kerberizedhdfs1:9000</value>
+    <value>hdfs://kerberizedhdfs1:9010</value>
   </property>
   <property>
     <name>fs.default.name</name>
-    <value>hdfs://kerberizedhdfs1:9000</value>
+    <value>hdfs://kerberizedhdfs1:9010</value>
   </property>
   <!--
   <property>
diff --git a/tests/integration/test_storage_kerberized_hdfs/test.py b/tests/integration/test_storage_kerberized_hdfs/test.py
index a2a2a4ef88e..1fffd7a8c12 100644
--- a/tests/integration/test_storage_kerberized_hdfs/test.py
+++ b/tests/integration/test_storage_kerberized_hdfs/test.py
@@ -29,12 +29,12 @@ def test_read_table(started_cluster):
     api_read = started_cluster.hdfs_api.read_data("/simple_table_function")
     assert api_read == data
 
-    select_read = node1.query("select * from hdfs('hdfs://kerberizedhdfs1:9000/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')")
+    select_read = node1.query("select * from hdfs('hdfs://kerberizedhdfs1:9010/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')")
     assert select_read == data
 
 
 def test_read_write_storage(started_cluster):
-    node1.query("create table SimpleHDFSStorage2 (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9000/simple_storage1', 'TSV')")
+    node1.query("create table SimpleHDFSStorage2 (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9010/simple_storage1', 'TSV')")
     node1.query("insert into SimpleHDFSStorage2 values (1, 'Mark', 72.53)")
 
     api_read = started_cluster.hdfs_api.read_data("/simple_storage1")
@@ -45,7 +45,7 @@ def test_read_write_storage(started_cluster):
 
 
 def test_write_storage_not_expired(started_cluster):
-    node1.query("create table SimpleHDFSStorageNotExpired (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9000/simple_storage_not_expired', 'TSV')")
+    node1.query("create table SimpleHDFSStorageNotExpired (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9010/simple_storage_not_expired', 'TSV')")
 
     time.sleep(45)   # wait for ticket expiration
     node1.query("insert into SimpleHDFSStorageNotExpired values (1, 'Mark', 72.53)")
@@ -58,15 +58,15 @@ def test_write_storage_not_expired(started_cluster):
 
 
 def test_two_users(started_cluster):
-    node1.query("create table HDFSStorOne (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9000/storage_user_one', 'TSV')")
+    node1.query("create table HDFSStorOne (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9010/storage_user_one', 'TSV')")
     node1.query("insert into HDFSStorOne values (1, 'Real', 86.00)")
 
-    node1.query("create table HDFSStorTwo (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://suser@kerberizedhdfs1:9000/user/specuser/storage_user_two', 'TSV')")
+    node1.query("create table HDFSStorTwo (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://suser@kerberizedhdfs1:9010/user/specuser/storage_user_two', 'TSV')")
     node1.query("insert into HDFSStorTwo values (1, 'Ideal', 74.00)")
 
-    select_read_1 = node1.query("select * from hdfs('hdfs://kerberizedhdfs1:9000/user/specuser/storage_user_two', 'TSV', 'id UInt64, text String, number Float64')")
+    select_read_1 = node1.query("select * from hdfs('hdfs://kerberizedhdfs1:9010/user/specuser/storage_user_two', 'TSV', 'id UInt64, text String, number Float64')")
 
-    select_read_2 = node1.query("select * from hdfs('hdfs://suser@kerberizedhdfs1:9000/storage_user_one', 'TSV', 'id UInt64, text String, number Float64')")
+    select_read_2 = node1.query("select * from hdfs('hdfs://suser@kerberizedhdfs1:9010/storage_user_one', 'TSV', 'id UInt64, text String, number Float64')")
 
 def test_read_table_expired(started_cluster):
     data = "1\tSerialize\t555.222\n2\tData\t777.333\n"
@@ -76,7 +76,7 @@ def test_read_table_expired(started_cluster):
     time.sleep(45)
 
     try:
-        select_read = node1.query("select * from hdfs('hdfs://reloginuser&kerberizedhdfs1:9000/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')")
+        select_read = node1.query("select * from hdfs('hdfs://reloginuser&kerberizedhdfs1:9010/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')")
         assert False, "Exception have to be thrown"
     except Exception as ex:
         assert "DB::Exception: kinit failure:" in str(ex)
@@ -85,7 +85,7 @@ def test_read_table_expired(started_cluster):
 
 
 def test_prohibited(started_cluster):
-    node1.query("create table HDFSStorTwoProhibited (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://suser@kerberizedhdfs1:9000/storage_user_two_prohibited', 'TSV')")
+    node1.query("create table HDFSStorTwoProhibited (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://suser@kerberizedhdfs1:9010/storage_user_two_prohibited', 'TSV')")
     try:
         node1.query("insert into HDFSStorTwoProhibited values (1, 'SomeOne', 74.00)")
         assert False, "Exception have to be thrown"
@@ -94,7 +94,7 @@ def test_prohibited(started_cluster):
 
 
 def test_cache_path(started_cluster):
-    node1.query("create table HDFSStorCachePath (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://dedicatedcachepath@kerberizedhdfs1:9000/storage_dedicated_cache_path', 'TSV')")
+    node1.query("create table HDFSStorCachePath (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://dedicatedcachepath@kerberizedhdfs1:9010/storage_dedicated_cache_path', 'TSV')")
     try:
         node1.query("insert into HDFSStorCachePath values (1, 'FatMark', 92.53)")
         assert False, "Exception have to be thrown"
diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py
index 59fb043b546..865afc8b162 100644
--- a/tests/integration/test_storage_kerberized_kafka/test.py
+++ b/tests/integration/test_storage_kerberized_kafka/test.py
@@ -105,6 +105,8 @@ def test_kafka_json_as_string(kafka_cluster):
                      kafka_flush_interval_ms=1000;
         ''')
 
+    time.sleep(3)
+
     result = instance.query('SELECT * FROM test.kafka;')
     expected = '''\
 {"t": 123, "e": {"x": "woof"} }
diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 911f6d144f9..ca89ebdea0a 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -1912,6 +1912,59 @@ def test_rabbitmq_no_connection_at_startup(rabbitmq_cluster):
     assert int(result) == messages_num, 'ClickHouse lost some messages: {}'.format(result)
 
 
+@pytest.mark.timeout(120)
+def test_rabbitmq_format_factory_settings(rabbitmq_cluster):
+    instance.query('''
+        CREATE TABLE test.format_settings (
+            id String, date DateTime
+        ) ENGINE = RabbitMQ
+            SETTINGS rabbitmq_host_port = 'rabbitmq1:5672',
+                     rabbitmq_exchange_name = 'format_settings',
+                     rabbitmq_format = 'JSONEachRow',
+                     date_time_input_format = 'best_effort';
+        ''')
+
+    credentials = pika.PlainCredentials('root', 'clickhouse')
+    parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials)
+    connection = pika.BlockingConnection(parameters)
+    channel = connection.channel()
+
+    message = json.dumps({"id":"format_settings_test","date":"2021-01-19T14:42:33.1829214Z"})
+    expected = instance.query('''SELECT parseDateTimeBestEffort(CAST('2021-01-19T14:42:33.1829214Z', 'String'))''')
+
+    channel.basic_publish(exchange='format_settings', routing_key='', body=message)
+    result = ''
+    while True:
+        result = instance.query('SELECT date FROM test.format_settings')
+        if result == expected:
+            break;
+
+    instance.query('''
+        DROP TABLE IF EXISTS test.view;
+        DROP TABLE IF EXISTS test.consumer;
+        CREATE TABLE test.view (
+            id String, date DateTime
+        ) ENGINE = MergeTree ORDER BY id;
+        CREATE MATERIALIZED VIEW test.consumer TO test.view AS
+            SELECT * FROM test.format_settings;
+        ''')
+
+    channel.basic_publish(exchange='format_settings', routing_key='', body=message)
+    result = ''
+    while True:
+        result = instance.query('SELECT date FROM test.view')
+        if result == expected:
+            break;
+
+    connection.close()
+    instance.query('''
+        DROP TABLE test.consumer;
+        DROP TABLE test.format_settings;
+    ''')
+
+    assert(result == expected)
+
+
 if __name__ == '__main__':
     cluster.start()
     input("Cluster created, press any key to destroy...")
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index a97493b5def..1f445feb5a0 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -443,10 +443,14 @@ def test_infinite_redirect(cluster):
         assert exception_raised
 
 
-def test_storage_s3_get_gzip(cluster):
+@pytest.mark.parametrize("extension,method", [
+    ("bin", "gzip"),
+    ("gz", "auto")
+])
+def test_storage_s3_get_gzip(cluster, extension, method):
     bucket = cluster.minio_bucket
     instance = cluster.instances["dummy"]
-    filename = "test_get_gzip.bin"
+    filename = f"test_get_gzip.{extension}"
     name = "test_get_gzip"
     data = [
         "Sophia Intrieri,55",
@@ -473,13 +477,15 @@ def test_storage_s3_get_gzip(cluster):
     put_s3_file_content(cluster, bucket, filename, buf.getvalue())
 
     try:
-        run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format(
-            name, cluster.minio_host, cluster.minio_port, bucket, filename))
+        run_query(instance, f"""CREATE TABLE {name} (name String, id UInt32) ENGINE = S3(
+                                    'http://{cluster.minio_host}:{cluster.minio_port}/{bucket}/{filename}',
+                                    'CSV',
+                                    '{method}')""")
 
         run_query(instance, "SELECT sum(id) FROM {}".format(name)).splitlines() == ["565"]
 
     finally:
-        run_query(instance, "DROP TABLE {}".format(name))
+        run_query(instance, f"DROP TABLE {name}")
 
 
 def test_storage_s3_put_uncompressed(cluster):
@@ -515,13 +521,17 @@ def test_storage_s3_put_uncompressed(cluster):
         uncompressed_content = get_s3_file_content(cluster, bucket, filename)
         assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 753
     finally:
-        run_query(instance, "DROP TABLE {}".format(name))
+        run_query(instance, f"DROP TABLE {name}")
 
 
-def test_storage_s3_put_gzip(cluster):
+@pytest.mark.parametrize("extension,method", [
+    ("bin", "gzip"),
+    ("gz", "auto")
+])
+def test_storage_s3_put_gzip(cluster, extension, method):
     bucket = cluster.minio_bucket
     instance = cluster.instances["dummy"]
-    filename = "test_put_gzip.bin"
+    filename = f"test_put_gzip.{extension}"
     name = "test_put_gzip"
     data = [
         "'Joseph Tomlinson',5",
@@ -541,8 +551,10 @@ def test_storage_s3_put_gzip(cluster):
         "'Yolanda Joseph',89"
     ]
     try:
-        run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format(
-            name, cluster.minio_host, cluster.minio_port, bucket, filename))
+        run_query(instance, f"""CREATE TABLE {name} (name String, id UInt32) ENGINE = S3(
+                                    'http://{cluster.minio_host}:{cluster.minio_port}/{bucket}/{filename}',
+                                    'CSV',
+                                    '{method}')""")
 
         run_query(instance, "INSERT INTO {} VALUES ({})".format(name, "),(".join(data)))
 
@@ -553,4 +565,4 @@ def test_storage_s3_put_gzip(cluster):
         uncompressed_content = f.read().decode()
         assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708
     finally:
-        run_query(instance, "DROP TABLE {}".format(name))
+        run_query(instance, f"DROP TABLE {name}")
diff --git a/tests/integration/test_system_merges/test.py b/tests/integration/test_system_merges/test.py
index 1f2da606cd1..672b637f783 100644
--- a/tests/integration/test_system_merges/test.py
+++ b/tests/integration/test_system_merges/test.py
@@ -134,7 +134,9 @@ def test_mutation_simple(started_cluster, replicated):
         result_part = "all_{}_{}_0_{}".format(starting_block, starting_block, starting_block + 1)
 
         def alter():
-            node1.query("ALTER TABLE {name} UPDATE a = 42 WHERE sleep(2) OR 1".format(name=name))
+            node1.query("ALTER TABLE {name} UPDATE a = 42 WHERE sleep(2) OR 1".format(name=name), settings={
+                'mutations_sync': 1,
+            })
 
         t = threading.Thread(target=alter)
         t.start()
@@ -159,8 +161,6 @@ def test_mutation_simple(started_cluster, replicated):
         ]
         t.join()
 
-        time.sleep(1.5)
-
         assert node_check.query("SELECT * FROM system.merges WHERE table = '{name}'".format(name=table_name)) == ""
 
     finally:
diff --git a/tests/integration/test_system_replicated_fetches/test.py b/tests/integration/test_system_replicated_fetches/test.py
index cefb3256893..fcbdd4addd9 100644
--- a/tests/integration/test_system_replicated_fetches/test.py
+++ b/tests/integration/test_system_replicated_fetches/test.py
@@ -91,3 +91,6 @@ def test_system_replicated_fetches(started_cluster):
     for elem in fetches_result:
         assert elem['elapsed'] >= prev_elapsed, "Elapsed time decreasing prev {}, next {}? It's a bug".format(prev_elapsed, elem['elapsed'])
         prev_elapsed = elem['elapsed']
+
+    node1.query("DROP TABLE IF EXISTS t SYNC")
+    node2.query("DROP TABLE IF EXISTS t SYNC")
diff --git a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
index 79e993b41f7..1a441909998 100644
--- a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
+++ b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
@@ -1,7 +1,20 @@
 <yandex>
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
-        <operation_timeout_ms>10000</operation_timeout_ms>
-        <session_timeout_ms>30000</session_timeout_ms>
+        <server_id>1</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>localhost</hostname>
+                <port>44444</port>
+            </server>
+        </raft_configuration>
     </test_keeper_server>
 </yandex>
diff --git a/tests/integration/test_testkeeper_back_to_back/test.py b/tests/integration/test_testkeeper_back_to_back/test.py
index f74d6a4c646..8ec54f1a883 100644
--- a/tests/integration/test_testkeeper_back_to_back/test.py
+++ b/tests/integration/test_testkeeper_back_to_back/test.py
@@ -8,7 +8,7 @@ from multiprocessing.dummy import Pool
 
 cluster = ClickHouseCluster(__file__)
 node = cluster.add_instance('node', main_configs=['configs/enable_test_keeper.xml', 'configs/logs_conf.xml'], with_zookeeper=True)
-from kazoo.client import KazooClient
+from kazoo.client import KazooClient, KazooState
 
 _genuine_zk_instance = None
 _fake_zk_instance = None
@@ -25,7 +25,14 @@ def get_fake_zk():
     global _fake_zk_instance
     if not _fake_zk_instance:
         print("node", cluster.get_instance_ip("node"))
-        _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip("node") + ":9181")
+        _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip("node") + ":9181", timeout=30.0)
+        def reset_last_zxid_listener(state):
+            print("Fake zk callback called for state", state)
+            global _fake_zk_instance
+            if state != KazooState.CONNECTED:
+                _fake_zk_instance._reset()
+
+        _fake_zk_instance.add_listener(reset_last_zxid_listener)
         _fake_zk_instance.start()
     return _fake_zk_instance
 
diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/__init__.py b/tests/integration/test_testkeeper_multinode_blocade_leader/__init__.py
new file mode 100644
index 00000000000..e5a0d9b4834
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper1.xml
new file mode 100644
index 00000000000..4ad76889d1e
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper1.xml
@@ -0,0 +1,38 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>1</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper2.xml
new file mode 100644
index 00000000000..a1954a1e639
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper2.xml
@@ -0,0 +1,38 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>2</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper3.xml
new file mode 100644
index 00000000000..88d2358138f
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper3.xml
@@ -0,0 +1,38 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>3</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/configs/log_conf.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/log_conf.xml
new file mode 100644
index 00000000000..318a6bca95d
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/log_conf.xml
@@ -0,0 +1,12 @@
+<yandex>
+    <shutdown_wait_unfinished>3</shutdown_wait_unfinished>
+    <logger>
+        <level>trace</level>
+        <log>/var/log/clickhouse-server/log.log</log>
+        <errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
+        <size>1000M</size>
+        <count>10</count>
+        <stderr>/var/log/clickhouse-server/stderr.log</stderr>
+        <stdout>/var/log/clickhouse-server/stdout.log</stdout>
+    </logger>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/configs/use_test_keeper.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/use_test_keeper.xml
new file mode 100644
index 00000000000..b6139005d2f
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/use_test_keeper.xml
@@ -0,0 +1,16 @@
+<yandex>
+    <zookeeper>
+        <node index="1">
+            <host>node1</host>
+            <port>9181</port>
+        </node>
+        <node index="2">
+            <host>node2</host>
+            <port>9181</port>
+        </node>
+        <node index="3">
+            <host>node3</host>
+            <port>9181</port>
+        </node>
+    </zookeeper>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/test.py b/tests/integration/test_testkeeper_multinode_blocade_leader/test.py
new file mode 100644
index 00000000000..3b2867ef3c7
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/test.py
@@ -0,0 +1,321 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+import random
+import string
+import os
+import time
+from multiprocessing.dummy import Pool
+from helpers.network import PartitionManager
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+node2 = cluster.add_instance('node2', main_configs=['configs/enable_test_keeper2.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+node3 = cluster.add_instance('node3', main_configs=['configs/enable_test_keeper3.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+
+from kazoo.client import KazooClient, KazooState
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+def smaller_exception(ex):
+    return '\n'.join(str(ex).split('\n')[0:2])
+
+def wait_node(node):
+    for _ in range(100):
+        zk = None
+        try:
+            node.query("SELECT * FROM system.zookeeper WHERE path = '/'")
+            zk = get_fake_zk(node.name, timeout=30.0)
+            zk.create("/test", sequence=True)
+            print("node", node.name, "ready")
+            break
+        except Exception as ex:
+            time.sleep(0.2)
+            print("Waiting until", node.name, "will be ready, exception", ex)
+        finally:
+            if zk:
+                zk.stop()
+                zk.close()
+    else:
+        raise Exception("Can't wait node", node.name, "to become ready")
+
+def wait_nodes():
+    for node in [node1, node2, node3]:
+        wait_node(node)
+
+
+def get_fake_zk(nodename, timeout=30.0):
+    _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
+    def reset_listener(state):
+        nonlocal _fake_zk_instance
+        print("Fake zk callback called for state", state)
+        if state != KazooState.CONNECTED:
+            _fake_zk_instance._reset()
+
+    _fake_zk_instance.add_listener(reset_listener)
+    _fake_zk_instance.start()
+    return _fake_zk_instance
+
+
+# in extremely rare case it can take more than 5 minutes in debug build with sanitizer
+@pytest.mark.timeout(600)
+def test_blocade_leader(started_cluster):
+    wait_nodes()
+    for i, node in enumerate([node1, node2, node3]):
+        node.query("CREATE DATABASE IF NOT EXISTS ordinary ENGINE=Ordinary")
+        node.query("CREATE TABLE ordinary.t1 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t1', '{}') ORDER BY tuple()".format(i + 1))
+
+    node2.query("INSERT INTO ordinary.t1 SELECT number FROM numbers(10)")
+
+    node1.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
+    node3.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
+
+    assert node1.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
+    assert node2.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
+    assert node3.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
+
+    with PartitionManager() as pm:
+        pm.partition_instances(node2, node1)
+        pm.partition_instances(node3, node1)
+
+        for i in range(100):
+            try:
+                node2.query("SYSTEM RESTART REPLICA ordinary.t1")
+                node2.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
+                break
+            except Exception as ex:
+                try:
+                    node2.query("ATTACH TABLE ordinary.t1")
+                except Exception as attach_ex:
+                    print("Got exception node2", smaller_exception(attach_ex))
+                print("Got exception node2", smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
+            assert False, "Cannot insert anything node2"
+
+        for i in range(100):
+            try:
+                node3.query("SYSTEM RESTART REPLICA ordinary.t1")
+                node3.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
+                break
+            except Exception as ex:
+                try:
+                    node3.query("ATTACH TABLE ordinary.t1")
+                except Exception as attach_ex:
+                    print("Got exception node3", smaller_exception(attach_ex))
+                print("Got exception node3", smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
+            assert False, "Cannot insert anything node3"
+
+    for n, node in enumerate([node1, node2, node3]):
+        for i in range(100):
+            try:
+                node.query("SYSTEM RESTART REPLICA ordinary.t1")
+                break
+            except Exception as ex:
+                try:
+                    node.query("ATTACH TABLE ordinary.t1")
+                except Exception as attach_ex:
+                    print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
+
+                print("Got exception node{}".format(n + 1), smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            assert False, "Cannot reconnect for node{}".format(n + 1)
+
+    for i in range(100):
+        try:
+            node1.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
+            break
+        except Exception as ex:
+            print("Got exception node1", smaller_exception(ex))
+            time.sleep(0.5)
+    else:
+        for num, node in enumerate([node1, node2, node3]):
+            dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
+        assert False, "Cannot insert anything node1"
+
+    for n, node in enumerate([node1, node2, node3]):
+        for i in range(100):
+            try:
+                node.query("SYSTEM RESTART REPLICA ordinary.t1")
+                node.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
+                break
+            except Exception as ex:
+                try:
+                    node.query("ATTACH TABLE ordinary.t1")
+                except Exception as attach_ex:
+                    print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
+
+                print("Got exception node{}".format(n + 1), smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
+            assert False, "Cannot sync replica node{}".format(n+1)
+
+    if node1.query("SELECT COUNT() FROM ordinary.t1") != "310\n":
+        for num, node in enumerate([node1, node2, node3]):
+            dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
+
+    assert node1.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
+    assert node2.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
+    assert node3.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
+
+
+def dump_zk(node, zk_path, replica_path):
+    print(node.query("SELECT * FROM system.replication_queue FORMAT Vertical"))
+    print("Replicas")
+    print(node.query("SELECT * FROM system.replicas FORMAT Vertical"))
+    print("Replica 2 info")
+    print(node.query("SELECT * FROM system.zookeeper WHERE path = '{}' FORMAT Vertical".format(zk_path)))
+    print("Queue")
+    print(node.query("SELECT * FROM system.zookeeper WHERE path = '{}/queue' FORMAT Vertical".format(replica_path)))
+    print("Log")
+    print(node.query("SELECT * FROM system.zookeeper WHERE path = '{}/log' FORMAT Vertical".format(zk_path)))
+    print("Parts")
+    print(node.query("SELECT name FROM system.zookeeper WHERE path = '{}/parts' FORMAT Vertical".format(replica_path)))
+
+# in extremely rare case it can take more than 5 minutes in debug build with sanitizer
+@pytest.mark.timeout(600)
+def test_blocade_leader_twice(started_cluster):
+    wait_nodes()
+    for i, node in enumerate([node1, node2, node3]):
+        node.query("CREATE DATABASE IF NOT EXISTS ordinary ENGINE=Ordinary")
+        node.query("CREATE TABLE ordinary.t2 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t2', '{}') ORDER BY tuple()".format(i + 1))
+
+    node2.query("INSERT INTO ordinary.t2 SELECT number FROM numbers(10)")
+
+    node1.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
+    node3.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
+
+    assert node1.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
+    assert node2.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
+    assert node3.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
+
+    with PartitionManager() as pm:
+        pm.partition_instances(node2, node1)
+        pm.partition_instances(node3, node1)
+
+        for i in range(100):
+            try:
+                node2.query("SYSTEM RESTART REPLICA ordinary.t2")
+                node2.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
+                break
+            except Exception as ex:
+                try:
+                    node2.query("ATTACH TABLE ordinary.t2")
+                except Exception as attach_ex:
+                    print("Got exception node2", smaller_exception(attach_ex))
+                print("Got exception node2", smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
+            assert False, "Cannot reconnect for node2"
+
+        for i in range(100):
+            try:
+                node3.query("SYSTEM RESTART REPLICA ordinary.t2")
+                node3.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
+                break
+            except Exception as ex:
+                try:
+                    node3.query("ATTACH TABLE ordinary.t2")
+                except Exception as attach_ex:
+                    print("Got exception node3", smaller_exception(attach_ex))
+                print("Got exception node3", smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
+            assert False, "Cannot reconnect for node3"
+
+
+        # Total network partition
+        pm.partition_instances(node3, node2)
+
+        for i in range(10):
+            try:
+                node3.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
+                assert False, "Node3 became leader?"
+            except Exception as ex:
+                time.sleep(0.5)
+
+        for i in range(10):
+            try:
+                node2.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
+                assert False, "Node2 became leader?"
+            except Exception as ex:
+                time.sleep(0.5)
+
+
+    for n, node in enumerate([node1, node2, node3]):
+        for i in range(100):
+            try:
+                node.query("SYSTEM RESTART REPLICA ordinary.t2")
+                break
+            except Exception as ex:
+                try:
+                    node.query("ATTACH TABLE ordinary.t2")
+                except Exception as attach_ex:
+                    print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
+
+                print("Got exception node{}".format(n + 1), smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
+            assert False, "Cannot reconnect for node{}".format(n + 1)
+
+    for n, node in enumerate([node1, node2, node3]):
+        for i in range(100):
+            try:
+                node.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
+                break
+            except Exception as ex:
+                print("Got exception node{}".format(n + 1), smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
+            assert False, "Cannot reconnect for node{}".format(n + 1)
+
+    for n, node in enumerate([node1, node2, node3]):
+        for i in range(100):
+            try:
+                node.query("SYSTEM RESTART REPLICA ordinary.t2")
+                node.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
+                break
+            except Exception as ex:
+                try:
+                    node.query("ATTACH TABLE ordinary.t2")
+                except Exception as attach_ex:
+                    print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
+
+                print("Got exception node{}".format(n + 1), smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
+            assert False, "Cannot reconnect for node{}".format(n + 1)
+
+    assert node1.query("SELECT COUNT() FROM ordinary.t2") == "510\n"
+    if node2.query("SELECT COUNT() FROM ordinary.t2") != "510\n":
+        for num, node in enumerate([node1, node2, node3]):
+            dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
+
+    assert node2.query("SELECT COUNT() FROM ordinary.t2") == "510\n"
+    assert node3.query("SELECT COUNT() FROM ordinary.t2") == "510\n"
diff --git a/tests/integration/test_testkeeper_multinode_simple/__init__.py b/tests/integration/test_testkeeper_multinode_simple/__init__.py
new file mode 100644
index 00000000000..e5a0d9b4834
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper1.xml
new file mode 100644
index 00000000000..4ad76889d1e
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper1.xml
@@ -0,0 +1,38 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>1</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper2.xml
new file mode 100644
index 00000000000..a1954a1e639
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper2.xml
@@ -0,0 +1,38 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>2</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper3.xml
new file mode 100644
index 00000000000..88d2358138f
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper3.xml
@@ -0,0 +1,38 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>3</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/log_conf.xml b/tests/integration/test_testkeeper_multinode_simple/configs/log_conf.xml
new file mode 100644
index 00000000000..318a6bca95d
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/log_conf.xml
@@ -0,0 +1,12 @@
+<yandex>
+    <shutdown_wait_unfinished>3</shutdown_wait_unfinished>
+    <logger>
+        <level>trace</level>
+        <log>/var/log/clickhouse-server/log.log</log>
+        <errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
+        <size>1000M</size>
+        <count>10</count>
+        <stderr>/var/log/clickhouse-server/stderr.log</stderr>
+        <stdout>/var/log/clickhouse-server/stdout.log</stdout>
+    </logger>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/use_test_keeper.xml b/tests/integration/test_testkeeper_multinode_simple/configs/use_test_keeper.xml
new file mode 100644
index 00000000000..b6139005d2f
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/use_test_keeper.xml
@@ -0,0 +1,16 @@
+<yandex>
+    <zookeeper>
+        <node index="1">
+            <host>node1</host>
+            <port>9181</port>
+        </node>
+        <node index="2">
+            <host>node2</host>
+            <port>9181</port>
+        </node>
+        <node index="3">
+            <host>node3</host>
+            <port>9181</port>
+        </node>
+    </zookeeper>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_simple/test.py b/tests/integration/test_testkeeper_multinode_simple/test.py
new file mode 100644
index 00000000000..a7ece4bbd56
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/test.py
@@ -0,0 +1,239 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+import random
+import string
+import os
+import time
+from multiprocessing.dummy import Pool
+from helpers.network import PartitionManager
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+node2 = cluster.add_instance('node2', main_configs=['configs/enable_test_keeper2.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+node3 = cluster.add_instance('node3', main_configs=['configs/enable_test_keeper3.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+
+from kazoo.client import KazooClient, KazooState
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+def smaller_exception(ex):
+    return '\n'.join(str(ex).split('\n')[0:2])
+
+def wait_node(node):
+    for _ in range(100):
+        zk = None
+        try:
+            node.query("SELECT * FROM system.zookeeper WHERE path = '/'")
+            zk = get_fake_zk(node.name, timeout=30.0)
+            zk.create("/test", sequence=True)
+            print("node", node.name, "ready")
+            break
+        except Exception as ex:
+            time.sleep(0.2)
+            print("Waiting until", node.name, "will be ready, exception", ex)
+        finally:
+            if zk:
+                zk.stop()
+                zk.close()
+    else:
+        raise Exception("Can't wait node", node.name, "to become ready")
+
+def wait_nodes():
+    for node in [node1, node2, node3]:
+        wait_node(node)
+
+
+def get_fake_zk(nodename, timeout=30.0):
+    _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
+    def reset_listener(state):
+        nonlocal _fake_zk_instance
+        print("Fake zk callback called for state", state)
+        if state != KazooState.CONNECTED:
+            _fake_zk_instance._reset()
+
+    _fake_zk_instance.add_listener(reset_listener)
+    _fake_zk_instance.start()
+    return _fake_zk_instance
+
+def test_read_write_multinode(started_cluster):
+    try:
+        wait_nodes()
+        node1_zk = get_fake_zk("node1")
+        node2_zk = get_fake_zk("node2")
+        node3_zk = get_fake_zk("node3")
+
+        node1_zk.create("/test_read_write_multinode_node1", b"somedata1")
+        node2_zk.create("/test_read_write_multinode_node2", b"somedata2")
+        node3_zk.create("/test_read_write_multinode_node3", b"somedata3")
+
+        # stale reads are allowed
+        while node1_zk.exists("/test_read_write_multinode_node2") is None:
+            time.sleep(0.1)
+
+        while node1_zk.exists("/test_read_write_multinode_node3") is None:
+            time.sleep(0.1)
+
+        while node2_zk.exists("/test_read_write_multinode_node3") is None:
+            time.sleep(0.1)
+
+        assert node3_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
+        assert node2_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
+        assert node1_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
+
+        assert node3_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
+        assert node2_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
+        assert node1_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
+
+        assert node3_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
+        assert node2_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
+        assert node1_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node2_zk, node3_zk]:
+                zk_conn.stop()
+                zk_conn.close()
+        except:
+            pass
+
+
+def test_watch_on_follower(started_cluster):
+    try:
+        wait_nodes()
+        node1_zk = get_fake_zk("node1")
+        node2_zk = get_fake_zk("node2")
+        node3_zk = get_fake_zk("node3")
+
+        node1_zk.create("/test_data_watches")
+        node2_zk.set("/test_data_watches", b"hello")
+        node3_zk.set("/test_data_watches", b"world")
+
+        node1_data = None
+        def node1_callback(event):
+            print("node1 data watch called")
+            nonlocal node1_data
+            node1_data = event
+
+        node1_zk.get("/test_data_watches", watch=node1_callback)
+
+        node2_data = None
+        def node2_callback(event):
+            print("node2 data watch called")
+            nonlocal node2_data
+            node2_data = event
+
+        node2_zk.get("/test_data_watches", watch=node2_callback)
+
+        node3_data = None
+        def node3_callback(event):
+            print("node3 data watch called")
+            nonlocal node3_data
+            node3_data = event
+
+        node3_zk.get("/test_data_watches", watch=node3_callback)
+
+        node1_zk.set("/test_data_watches", b"somevalue")
+        time.sleep(3)
+
+        print(node1_data)
+        print(node2_data)
+        print(node3_data)
+
+        assert node1_data == node2_data
+        assert node3_data == node2_data
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node2_zk, node3_zk]:
+                zk_conn.stop()
+                zk_conn.close()
+        except:
+            pass
+
+
+def test_session_expiration(started_cluster):
+    try:
+        wait_nodes()
+        node1_zk = get_fake_zk("node1")
+        node2_zk = get_fake_zk("node2")
+        node3_zk = get_fake_zk("node3", timeout=3.0)
+        print("Node3 session id", node3_zk._session_id)
+
+        node3_zk.create("/test_ephemeral_node", b"world", ephemeral=True)
+
+        with PartitionManager() as pm:
+            pm.partition_instances(node3, node2)
+            pm.partition_instances(node3, node1)
+            node3_zk.stop()
+            node3_zk.close()
+            for _ in range(100):
+                if node1_zk.exists("/test_ephemeral_node") is None and node2_zk.exists("/test_ephemeral_node") is None:
+                    break
+                print("Node1 exists", node1_zk.exists("/test_ephemeral_node"))
+                print("Node2 exists", node2_zk.exists("/test_ephemeral_node"))
+                time.sleep(0.1)
+                node1_zk.sync("/")
+                node2_zk.sync("/")
+
+        assert node1_zk.exists("/test_ephemeral_node") is None
+        assert node2_zk.exists("/test_ephemeral_node") is None
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node2_zk, node3_zk]:
+                try:
+                    zk_conn.stop()
+                    zk_conn.close()
+                except:
+                    pass
+        except:
+            pass
+
+
+def test_follower_restart(started_cluster):
+    try:
+        wait_nodes()
+        node1_zk = get_fake_zk("node1")
+
+        node1_zk.create("/test_restart_node", b"hello")
+
+        node3.restart_clickhouse(kill=True)
+
+        node3_zk = get_fake_zk("node3")
+
+        # got data from log
+        assert node3_zk.get("/test_restart_node")[0] == b"hello"
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node3_zk]:
+                try:
+                    zk_conn.stop()
+                    zk_conn.close()
+                except:
+                    pass
+        except:
+            pass
+
+
+def test_simple_replicated_table(started_cluster):
+    wait_nodes()
+    for i, node in enumerate([node1, node2, node3]):
+        node.query("CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format(i + 1))
+
+    node2.query("INSERT INTO t SELECT number FROM numbers(10)")
+
+    node1.query("SYSTEM SYNC REPLICA t", timeout=10)
+    node3.query("SYSTEM SYNC REPLICA t", timeout=10)
+
+    assert node1.query("SELECT COUNT() FROM t") == "10\n"
+    assert node2.query("SELECT COUNT() FROM t") == "10\n"
+    assert node3.query("SELECT COUNT() FROM t") == "10\n"
diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py
index 9418aeaaf01..389e249790f 100644
--- a/tests/integration/test_ttl_replicated/test.py
+++ b/tests/integration/test_ttl_replicated/test.py
@@ -9,6 +9,11 @@ cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance('node1', with_zookeeper=True)
 node2 = cluster.add_instance('node2', with_zookeeper=True)
 
+node3 = cluster.add_instance('node3', with_zookeeper=True)
+node4 = cluster.add_instance('node4', with_zookeeper=True, image='yandex/clickhouse-server', tag='20.12.4.5', stay_alive=True, with_installed_binary=True)
+
+node5 = cluster.add_instance('node5', with_zookeeper=True, image='yandex/clickhouse-server', tag='20.12.4.5', stay_alive=True, with_installed_binary=True)
+node6 = cluster.add_instance('node6', with_zookeeper=True, image='yandex/clickhouse-server', tag='20.12.4.5', stay_alive=True, with_installed_binary=True)
 
 @pytest.fixture(scope="module")
 def started_cluster():
@@ -329,3 +334,73 @@ def test_ttl_empty_parts(started_cluster):
     error_msg = '<Error> default.test_ttl_empty_parts (ReplicatedMergeTreeCleanupThread)'
     assert not node1.contains_in_log(error_msg)
     assert not node2.contains_in_log(error_msg)
+
+@pytest.mark.parametrize(
+    ('node_left', 'node_right', 'num_run'),
+    [(node1, node2, 0), (node3, node4, 1), (node5, node6, 2)]
+)
+def test_ttl_compatibility(started_cluster, node_left, node_right, num_run):
+    drop_table([node_left, node_right], "test_ttl_delete")
+    drop_table([node_left, node_right], "test_ttl_group_by")
+    drop_table([node_left, node_right], "test_ttl_where")
+
+    for node in [node_left, node_right]:
+        node.query(
+            '''
+                CREATE TABLE test_ttl_delete(date DateTime, id UInt32)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_delete_{suff}', '{replica}')
+                ORDER BY id PARTITION BY toDayOfMonth(date)
+                TTL date + INTERVAL 3 SECOND
+            '''.format(suff=num_run, replica=node.name))
+
+        node.query(
+            '''
+                CREATE TABLE test_ttl_group_by(date DateTime, id UInt32, val UInt64)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_group_by_{suff}', '{replica}')
+                ORDER BY id PARTITION BY toDayOfMonth(date)
+                TTL date + INTERVAL 3 SECOND GROUP BY id SET val = sum(val)
+            '''.format(suff=num_run, replica=node.name))
+
+        node.query(
+            '''
+                CREATE TABLE test_ttl_where(date DateTime, id UInt32)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_where_{suff}', '{replica}')
+                ORDER BY id PARTITION BY toDayOfMonth(date)
+                TTL date + INTERVAL 3 SECOND DELETE WHERE id % 2 = 1
+            '''.format(suff=num_run, replica=node.name))
+
+    node_left.query("INSERT INTO test_ttl_delete VALUES (now(), 1)")
+    node_left.query("INSERT INTO test_ttl_delete VALUES (toDateTime('2100-10-11 10:00:00'), 2)")
+    node_right.query("INSERT INTO test_ttl_delete VALUES (now(), 3)")
+    node_right.query("INSERT INTO test_ttl_delete VALUES (toDateTime('2100-10-11 10:00:00'), 4)")
+
+    node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 1)")
+    node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 2)")
+    node_right.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 3)")
+    node_right.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 4)")
+
+    node_left.query("INSERT INTO test_ttl_where VALUES (now(), 1)")
+    node_left.query("INSERT INTO test_ttl_where VALUES (now(), 2)")
+    node_right.query("INSERT INTO test_ttl_where VALUES (now(), 3)")
+    node_right.query("INSERT INTO test_ttl_where VALUES (now(), 4)")
+
+    if node_left.with_installed_binary:
+        node_left.restart_with_latest_version()
+
+    if node_right.with_installed_binary:
+        node_right.restart_with_latest_version()
+    
+    time.sleep(5) # Wait for TTL
+
+    node_right.query("OPTIMIZE TABLE test_ttl_delete FINAL")
+    node_right.query("OPTIMIZE TABLE test_ttl_group_by FINAL")
+    node_right.query("OPTIMIZE TABLE test_ttl_where FINAL")
+
+    assert node_left.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"
+    assert node_right.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"
+
+    assert node_left.query("SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"
+    assert node_right.query("SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"
+
+    assert node_left.query("SELECT id FROM test_ttl_where ORDER BY id") == "2\n4\n"
+    assert node_right.query("SELECT id FROM test_ttl_where ORDER BY id") == "2\n4\n"
diff --git a/tests/performance/ColumnMap.xml b/tests/performance/ColumnMap.xml
index 96f2026f870..f6393985377 100644
--- a/tests/performance/ColumnMap.xml
+++ b/tests/performance/ColumnMap.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
 
     <settings>
         <allow_experimental_map_type>1</allow_experimental_map_type>
diff --git a/tests/performance/aggregating_merge_tree_simple_aggregate_function_string.xml b/tests/performance/aggregating_merge_tree_simple_aggregate_function_string.xml
index c12f26ad595..0c93b4745cf 100644
--- a/tests/performance/aggregating_merge_tree_simple_aggregate_function_string.xml
+++ b/tests/performance/aggregating_merge_tree_simple_aggregate_function_string.xml
@@ -6,7 +6,7 @@
         SETTINGS index_granularity = 8192
         AS
         SELECT CAST(reinterpretAsString(number), 'SimpleAggregateFunction(any, String)') AS key
-        FROM numbers_mt(toUInt64(5e6))
+        FROM numbers_mt(5e6)
         SETTINGS max_insert_threads = 16
     </create_query>
 
diff --git a/tests/performance/array_index_low_cardinality_strings.xml b/tests/performance/array_index_low_cardinality_strings.xml
index 896a5923a9e..bbfea083f0a 100644
--- a/tests/performance/array_index_low_cardinality_strings.xml
+++ b/tests/performance/array_index_low_cardinality_strings.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <create_query>DROP TABLE IF EXISTS perf_lc_str</create_query>
     <create_query>CREATE TABLE perf_lc_str(
         str LowCardinality(String),
diff --git a/tests/performance/avg.xml b/tests/performance/avg.xml
new file mode 100644
index 00000000000..b58b5d7cdb8
--- /dev/null
+++ b/tests/performance/avg.xml
@@ -0,0 +1,3 @@
+<test>
+    <query>SELECT avg(number) FROM numbers(1000000000)</query>
+</test>
diff --git a/tests/performance/casts.xml b/tests/performance/casts.xml
index 17a1c774353..aeb0674b9bf 100644
--- a/tests/performance/casts.xml
+++ b/tests/performance/casts.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <settings>
         <max_memory_usage>15G</max_memory_usage>
     </settings>
diff --git a/tests/performance/conditional.xml b/tests/performance/conditional.xml
index 91b6cb95ff2..21623f45b05 100644
--- a/tests/performance/conditional.xml
+++ b/tests/performance/conditional.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(if(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04')))</query>
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04')))</query>
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(if(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')]))</query>
diff --git a/tests/performance/decimal_aggregates.xml b/tests/performance/decimal_aggregates.xml
index 615c3201843..f7bc2ac1868 100644
--- a/tests/performance/decimal_aggregates.xml
+++ b/tests/performance/decimal_aggregates.xml
@@ -11,7 +11,7 @@
     <query>SELECT min(d32), max(d32), argMin(x, d32), argMax(x, d32) FROM t</query>
     <query>SELECT min(d64), max(d64), argMin(x, d64), argMax(x, d64) FROM t</query>
     <query>SELECT min(d128), max(d128), argMin(x, d128), argMax(x, d128) FROM t</query>
-    
+
     <query>SELECT avg(d32), sum(d32), sumWithOverflow(d32) FROM t</query>
     <query>SELECT avg(d64), sum(d64), sumWithOverflow(d64) FROM t</query>
     <query>SELECT avg(d128), sum(d128), sumWithOverflow(d128) FROM t</query>
@@ -19,11 +19,11 @@
     <query>SELECT uniq(d32), uniqCombined(d32), uniqExact(d32), uniqHLL12(d32) FROM     (SELECT * FROM t LIMIT 10000000)</query>
     <query>SELECT uniq(d64), uniqCombined(d64), uniqExact(d64), uniqHLL12(d64) FROM     (SELECT * FROM t LIMIT 10000000)</query>
     <query>SELECT uniq(d128), uniqCombined(d128), uniqExact(d128), uniqHLL12(d128) FROM (SELECT * FROM t LIMIT 1000000)</query>
-    
+
     <query>SELECT median(d32), medianExact(d32), medianExactWeighted(d32, 2) FROM    (SELECT * FROM t LIMIT 10000000)</query>
     <query>SELECT median(d64), medianExact(d64), medianExactWeighted(d64, 2) FROM    (SELECT * FROM t LIMIT 1000000)</query>
     <query>SELECT median(d128), medianExact(d128), medianExactWeighted(d128, 2) FROM (SELECT * FROM t LIMIT 1000000)</query>
-    
+
     <query>SELECT quantile(d32), quantileExact(d32), quantileExactWeighted(d32, 2) FROM    (SELECT * FROM t LIMIT 10000000)</query>
     <query>SELECT quantile(d64), quantileExact(d64), quantileExactWeighted(d64, 2) FROM    (SELECT * FROM t LIMIT 1000000)</query>
     <query>SELECT quantile(d128), quantileExact(d128), quantileExactWeighted(d128, 2) FROM (SELECT * FROM t LIMIT 1000000)</query>
@@ -31,8 +31,8 @@
     <query>SELECT quantilesExact(0.1, 0.9)(d32), quantilesExactWeighted(0.1, 0.9)(d32, 2) FROM   (SELECT * FROM t LIMIT 10000000)</query>
     <query>SELECT quantilesExact(0.1, 0.9)(d64), quantilesExactWeighted(0.1, 0.9)(d64, 2) FROM   (SELECT * FROM t LIMIT 1000000)</query>
     <query>SELECT quantilesExact(0.1, 0.9)(d128), quantilesExactWeighted(0.1, 0.9)(d128, 2) FROM (SELECT * FROM t LIMIT 1000000)</query>
-    
+
     <query>SELECT varPop(d32), varSamp(d32), stddevPop(d32) FROM t</query>
-    <query>SELECT varPop(d64), varSamp(d64), stddevPop(d64) FROM    (SELECT * FROM t LIMIT 1000000)</query>
-    <query>SELECT varPop(d128), varSamp(d128), stddevPop(d128) FROM (SELECT * FROM t LIMIT 1000000)</query>
+    <query>SELECT varPop(d64), varSamp(d64), stddevPop(d64) FROM    (SELECT * FROM t LIMIT 10000000)</query>
+    <query>SELECT varPop(d128), varSamp(d128), stddevPop(d128) FROM (SELECT * FROM t LIMIT 10000000)</query>
 </test>
diff --git a/tests/performance/decimal_casts.xml b/tests/performance/decimal_casts.xml
index f087d0938c1..6dd38b6a06a 100644
--- a/tests/performance/decimal_casts.xml
+++ b/tests/performance/decimal_casts.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <settings>
         <max_memory_usage>15G</max_memory_usage>
     </settings>
diff --git a/tests/performance/general_purpose_hashes.xml b/tests/performance/general_purpose_hashes.xml
index f34554360cf..bd2fa9674f6 100644
--- a/tests/performance/general_purpose_hashes.xml
+++ b/tests/performance/general_purpose_hashes.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <substitutions>
         <substitution>
            <name>gp_hash_func</name>
diff --git a/tests/performance/group_by_fixed_keys.xml b/tests/performance/group_by_fixed_keys.xml
new file mode 100644
index 00000000000..0be29ff11ac
--- /dev/null
+++ b/tests/performance/group_by_fixed_keys.xml
@@ -0,0 +1,7 @@
+<test>
+    <query>WITH toUInt8(number) AS k, toUInt64(k) AS k1, k AS k2 SELECT k1, k2, count() FROM numbers(100000000) GROUP BY k1, k2</query>
+    <query>WITH toUInt8(number) AS k, toUInt16(k) AS k1, toUInt32(k) AS k2, k AS k3 SELECT k1, k2, k3, count() FROM numbers(100000000) GROUP BY k1, k2, k3</query>
+    <query>WITH toUInt8(number) AS k, k AS k1, k + 1 AS k2 SELECT k1, k2, count() FROM numbers(100000000) GROUP BY k1, k2</query>
+    <query>WITH toUInt8(number) AS k, k AS k1, k + 1 AS k2, k + 2 AS k3, k + 3 AS k4 SELECT k1, k2, k3, k4, count() FROM numbers(100000000) GROUP BY k1, k2, k3, k4</query>
+    <query>WITH toUInt8(number) AS k, toUInt64(k) AS k1, k1 + 1 AS k2 SELECT k1, k2, count() FROM numbers(100000000) GROUP BY k1, k2</query>
+</test>
diff --git a/tests/performance/group_by_sundy_li.xml b/tests/performance/group_by_sundy_li.xml
index 3fcc4acf88d..c49712a8519 100644
--- a/tests/performance/group_by_sundy_li.xml
+++ b/tests/performance/group_by_sundy_li.xml
@@ -1,19 +1,24 @@
 <test max_ignored_relative_change="0.2">
+    <settings>
+        <max_insert_threads>8</max_insert_threads>
+    </settings>
+
     <create_query>
-CREATE TABLE a
-(
-    d Date,
-    os String
-)
-ENGINE = MergeTree
-PARTITION BY d
-ORDER BY d
+        CREATE TABLE a
+        (
+            d Date,
+            os String,
+            n UInt64
+        )
+        ENGINE = MergeTree
+        PARTITION BY d
+        ORDER BY (d, n)
     </create_query>
 
-    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1], number from  numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1], number from  numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1], number from  numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1], number from  numbers_mt(100000000)</fill_query>
 
     <fill_query>OPTIMIZE TABLE a FINAL</fill_query>
 
diff --git a/tests/performance/if_array_num.xml b/tests/performance/if_array_num.xml
index 26d16cbfb97..f3f418b809c 100644
--- a/tests/performance/if_array_num.xml
+++ b/tests/performance/if_array_num.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
 
 
 
diff --git a/tests/performance/jit_small_requests.xml b/tests/performance/jit_small_requests.xml
index d8f917fb9af..c9abec0926b 100644
--- a/tests/performance/jit_small_requests.xml
+++ b/tests/performance/jit_small_requests.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <query>
         WITH
             bitXor(number, 0x4CF2D2BAAE6DA887) AS x0,
diff --git a/tests/performance/joins_in_memory_pmj.xml b/tests/performance/joins_in_memory_pmj.xml
index 1142fdd8222..5dd4395513d 100644
--- a/tests/performance/joins_in_memory_pmj.xml
+++ b/tests/performance/joins_in_memory_pmj.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.9">
+<test max_ignored_relative_change="1.3">
     <create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>
 
     <settings>
diff --git a/tests/performance/memory_cache_friendliness.xml b/tests/performance/memory_cache_friendliness.xml
new file mode 100644
index 00000000000..92b79661540
--- /dev/null
+++ b/tests/performance/memory_cache_friendliness.xml
@@ -0,0 +1,8 @@
+<test>
+    <create_query>CREATE TABLE test_memory (x UInt64) ENGINE Memory</create_query>
+    <fill_query>INSERT INTO test_memory SELECT 1 FROM numbers(1000000000)</fill_query>
+
+    <query>SELECT sum(x * x + x) FROM test_memory</query>
+
+    <drop_query>DROP TABLE IF EXISTS test_memory</drop_query>
+</test>
diff --git a/tests/performance/number_formatting_formats.xml b/tests/performance/number_formatting_formats.xml
index 92e04a62024..77b39da8e92 100644
--- a/tests/performance/number_formatting_formats.xml
+++ b/tests/performance/number_formatting_formats.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <substitutions>
         <substitution>
             <name>format</name>
diff --git a/tests/performance/optimized_select_final_one_part.xml b/tests/performance/optimized_select_final_one_part.xml
new file mode 100644
index 00000000000..92c8eed859a
--- /dev/null
+++ b/tests/performance/optimized_select_final_one_part.xml
@@ -0,0 +1,20 @@
+<test max_ignored_relative_change="0.2">
+    <settings>
+        <do_not_merge_across_partitions_select_final>1</do_not_merge_across_partitions_select_final>
+    </settings>
+
+    <create_query>
+        CREATE TABLE optimized_select_final (t DateTime, x Int32, s String)
+        ENGINE = ReplacingMergeTree()
+        PARTITION BY toYYYYMM(t) ORDER BY x
+    </create_query>
+
+    <fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), number, 'string' FROM numbers(50000000)</fill_query>
+
+    <fill_query>OPTIMIZE TABLE optimized_select_final FINAL</fill_query>
+
+    <query>SELECT * FROM optimized_select_final FINAL where s = 'string' FORMAT Null</query>
+
+    <drop_query>DROP TABLE IF EXISTS  optimized_select_final</drop_query>
+
+</test>
diff --git a/tests/performance/or_null_default.xml b/tests/performance/or_null_default.xml
index 009719f66a5..6fed0cce4d6 100644
--- a/tests/performance/or_null_default.xml
+++ b/tests/performance/or_null_default.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.3">
     <query>SELECT sumOrNull(number) FROM numbers(100000000)</query>
     <query>SELECT sumOrDefault(toNullable(number)) FROM numbers(100000000)</query>
     <query>SELECT sumOrNull(number) FROM numbers(10000000) GROUP BY number % 1024</query>
diff --git a/tests/performance/polymorphic_parts_l.xml b/tests/performance/polymorphic_parts_l.xml
index d2ae9417bf7..539422aa6e3 100644
--- a/tests/performance/polymorphic_parts_l.xml
+++ b/tests/performance/polymorphic_parts_l.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <create_query>
         CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree()
         PARTITION BY toYYYYMM(EventDate)
diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml
index 54a81def55e..ed29ac43d64 100644
--- a/tests/performance/polymorphic_parts_m.xml
+++ b/tests/performance/polymorphic_parts_m.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <create_query>
         CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree()
         PARTITION BY toYYYYMM(EventDate)
diff --git a/tests/performance/polymorphic_parts_s.xml b/tests/performance/polymorphic_parts_s.xml
index 5021e135bb9..0496692b087 100644
--- a/tests/performance/polymorphic_parts_s.xml
+++ b/tests/performance/polymorphic_parts_s.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <create_query>
         CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree()
         PARTITION BY toYYYYMM(EventDate)
diff --git a/tests/performance/read_in_order_many_parts.xml b/tests/performance/read_in_order_many_parts.xml
index c10d00c436f..ca713935426 100644
--- a/tests/performance/read_in_order_many_parts.xml
+++ b/tests/performance/read_in_order_many_parts.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <settings>
         <optimize_aggregation_in_order>1</optimize_aggregation_in_order>
         <optimize_read_in_order>1</optimize_read_in_order>
diff --git a/tests/performance/reinterpret_as.xml b/tests/performance/reinterpret_as.xml
new file mode 100644
index 00000000000..b9b6fec2084
--- /dev/null
+++ b/tests/performance/reinterpret_as.xml
@@ -0,0 +1,257 @@
+<test max_ignored_relative_change="0.2">
+    <settings>
+        <allow_experimental_bigint_types>1</allow_experimental_bigint_types>
+        <max_memory_usage>15G</max_memory_usage>
+    </settings>
+
+    <query>
+        SELECT
+            reinterpretAsUInt8(a),
+            reinterpretAsUInt8(b),
+            reinterpretAsUInt8(c),
+            reinterpretAsUInt8(d),
+            reinterpretAsUInt8(f),
+            reinterpretAsUInt8(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsUInt16(a),
+            reinterpretAsUInt16(b),
+            reinterpretAsUInt16(c),
+            reinterpretAsUInt16(d),
+            reinterpretAsUInt16(f),
+            reinterpretAsUInt16(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsUInt32(a),
+            reinterpretAsUInt32(b),
+            reinterpretAsUInt32(c),
+            reinterpretAsUInt32(d),
+            reinterpretAsUInt32(f),
+            reinterpretAsUInt32(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsUInt64(a),
+            reinterpretAsUInt64(b),
+            reinterpretAsUInt64(c),
+            reinterpretAsUInt64(d),
+            reinterpretAsUInt64(f),
+            reinterpretAsUInt64(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsUInt256(a),
+            reinterpretAsUInt256(b),
+            reinterpretAsUInt256(c),
+            reinterpretAsUInt256(d),
+            reinterpretAsUInt256(f),
+            reinterpretAsUInt256(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(10000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+
+    <query>
+        SELECT
+            reinterpretAsInt8(a),
+            reinterpretAsInt8(b),
+            reinterpretAsInt8(c),
+            reinterpretAsInt8(d),
+            reinterpretAsInt8(f),
+            reinterpretAsInt8(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsInt16(a),
+            reinterpretAsInt16(b),
+            reinterpretAsInt16(c),
+            reinterpretAsInt16(d),
+            reinterpretAsInt16(f),
+            reinterpretAsInt16(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsInt32(a),
+            reinterpretAsInt32(b),
+            reinterpretAsInt32(c),
+            reinterpretAsInt32(d),
+            reinterpretAsInt32(f),
+            reinterpretAsInt32(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsInt64(a),
+            reinterpretAsInt64(b),
+            reinterpretAsInt64(c),
+            reinterpretAsInt64(d),
+            reinterpretAsInt64(f),
+            reinterpretAsInt64(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsInt128(a),
+            reinterpretAsInt128(b),
+            reinterpretAsInt128(c),
+            reinterpretAsInt128(d),
+            reinterpretAsInt128(f),
+            reinterpretAsInt128(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsInt256(a),
+            reinterpretAsInt256(b),
+            reinterpretAsInt256(c),
+            reinterpretAsInt256(d),
+            reinterpretAsInt256(f),
+            reinterpretAsInt256(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(10000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+
+    <query>
+        SELECT
+            reinterpretAsString(a),
+            reinterpretAsString(b),
+            reinterpretAsString(c),
+            reinterpretAsString(d),
+            reinterpretAsString(f),
+            reinterpretAsString(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(20000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+
+    <query>
+         SELECT
+            reinterpretAsFixedString(a),
+            reinterpretAsFixedString(b),
+            reinterpretAsFixedString(c),
+            reinterpretAsFixedString(d),
+            reinterpretAsFixedString(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(100000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+
+</test>
diff --git a/tests/performance/set_index.xml b/tests/performance/set_index.xml
index fd018b79597..76f1087a1bf 100644
--- a/tests/performance/set_index.xml
+++ b/tests/performance/set_index.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <create_query>CREATE TABLE test_in (`a` UInt32) ENGINE = MergeTree() ORDER BY a</create_query>
     <fill_query>INSERT INTO test_in SELECT number FROM numbers(500000000)</fill_query>
 
diff --git a/tests/performance/uniq.xml b/tests/performance/uniq.xml
index 7a35c6fb704..b4e73733769 100644
--- a/tests/performance/uniq.xml
+++ b/tests/performance/uniq.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.7">
+<test>
 
     <preconditions>
         <table_exists>hits_100m_single</table_exists>
diff --git a/tests/performance/window_functions.xml b/tests/performance/window_functions.xml
index f42345d0696..622e349d060 100644
--- a/tests/performance/window_functions.xml
+++ b/tests/performance/window_functions.xml
@@ -25,7 +25,9 @@
         select *
         from (
             select CounterID, UserID, count(*) user_hits,
-                count() over (partition by CounterID order by user_hits desc)
+                count()
+                    over (partition by CounterID order by user_hits desc
+                        rows unbounded preceding)
                     user_rank
             from hits_100m_single
             where CounterID < 10000
@@ -35,4 +37,77 @@
         format Null
     ]]></query>
 
+    <!--
+        The RANGE version should give (almost) the same result, because counts
+        for the top ranking users are probably different, so the ranks won't be
+        influenced by grouping. But it is going to be slower than ROWS because
+        of the additional work of finding the group boundaries.
+    -->
+    <query><![CDATA[
+        select *
+        from (
+            select CounterID, UserID, count(*) user_hits,
+                count()
+                    over (partition by CounterID order by user_hits desc
+                        range unbounded preceding)
+                    user_rank
+            from hits_100m_single
+            where CounterID < 10000
+            group by CounterID, UserID
+        )
+        where user_rank <= 10
+        format Null
+    ]]></query>
+
+    <!--
+        Rows from the hottest 21-second intervals, to test the RANGE OFFSET frame.
+    -->
+    <query>
+        SELECT * FROM
+            (SELECT EventTime,
+                count(*) OVER (ORDER BY EventTime ASC
+                    RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING) AS c
+            FROM hits_10m_single)
+        FORMAT Null
+    </query>
+
+    <!--
+        This is kind of the same, except the following frame boundary is not
+        inclusive. Should be much faster, because we don't have to reset the
+        aggregation state. After we support subtraction of aggregate state, the
+        above query should become closer in performance to this one.
+    -->
+    <query>
+        select * from
+            (select EventTime,
+                count(*) over (partition by
+                    floor((toUInt32(EventTime) + 10 + 1) / 20)) as c
+            from hits_10m_single)
+        format Null
+    </query>
+
+    <!-- Some synthetic tests.-->
+    <query>
+        select
+            min(number) over w,
+            count(*) over w,
+            max(number) over w
+        from
+            (select number, intDiv(number, 1111) p, mod(number, 111) o
+                from numbers(10000000)) t
+        window w as (partition by p order by o)
+        format Null
+    </query>
+
+    <query>
+        select
+            first_value(number) over w,
+            dense_rank() over w
+        from
+            (select number, intDiv(number, 1111) p, mod(number, 111) o
+                from numbers(10000000)) t
+        window w as (partition by p order by o)
+        format Null
+    </query>
+
 </test>
diff --git a/tests/queries/0_stateless/00011_array_join_alias.sql b/tests/queries/0_stateless/00011_array_join_alias.sql
index 228038c1509..5eafeddb8fe 100644
--- a/tests/queries/0_stateless/00011_array_join_alias.sql
+++ b/tests/queries/0_stateless/00011_array_join_alias.sql
@@ -1 +1,2 @@
-SELECT x, a FROM (SELECT arrayJoin(['Hello', 'Goodbye']) AS x, [1, 2, 3] AS arr) ARRAY JOIN arr AS a
+SELECT x, a FROM (SELECT arrayJoin(['Hello', 'Goodbye']) AS x, [1, 2, 3] AS arr) ARRAY JOIN; -- { serverError 42 }
+SELECT x, a FROM (SELECT arrayJoin(['Hello', 'Goodbye']) AS x, [1, 2, 3] AS arr) ARRAY JOIN arr AS a;
diff --git a/tests/queries/0_stateless/00302_http_compression.sh b/tests/queries/0_stateless/00302_http_compression.sh
index 829475e8602..cfa9a930f09 100755
--- a/tests/queries/0_stateless/00302_http_compression.sh
+++ b/tests/queries/0_stateless/00302_http_compression.sh
@@ -4,6 +4,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
+if ! command -v gzip &> /dev/null; then echo "gzip not found" 1>&2; exit 1; fi
+if ! command -v brotli &> /dev/null; then echo "brotli not found" 1>&2; exit 1; fi
+if ! command -v xz &> /dev/null; then echo "xz not found" 1>&2; exit 1; fi
+if ! command -v zstd &> /dev/null; then echo "zstd not found" 1>&2; exit 1; fi
+
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1"                                     -d 'SELECT number FROM system.numbers LIMIT 10';
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=0" -H 'Accept-Encoding: gzip'          -d 'SELECT number FROM system.numbers LIMIT 10';
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip'          -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d;
diff --git a/tests/queries/0_stateless/00341_squashing_insert_select2.sql b/tests/queries/0_stateless/00341_squashing_insert_select2.sql
index 469fdaaa64a..3eb5a2682e0 100644
--- a/tests/queries/0_stateless/00341_squashing_insert_select2.sql
+++ b/tests/queries/0_stateless/00341_squashing_insert_select2.sql
@@ -1,5 +1,5 @@
 DROP TABLE IF EXISTS numbers_squashed;
-CREATE TABLE numbers_squashed (number UInt8) ENGINE = Memory;
+CREATE TABLE numbers_squashed (number UInt8) ENGINE = StripeLog;
 
 SET min_insert_block_size_rows = 100;
 SET min_insert_block_size_bytes = 0;
diff --git a/tests/queries/0_stateless/00474_readonly_settings.sh b/tests/queries/0_stateless/00474_readonly_settings.sh
index 0edde9f12ed..0887ecfa14e 100755
--- a/tests/queries/0_stateless/00474_readonly_settings.sh
+++ b/tests/queries/0_stateless/00474_readonly_settings.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 
+unset CLICKHOUSE_LOG_COMMENT
+
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
diff --git a/tests/queries/0_stateless/00502_sum_map.reference b/tests/queries/0_stateless/00502_sum_map.reference
index 0002c43945a..c38fb2ec7d6 100644
--- a/tests/queries/0_stateless/00502_sum_map.reference
+++ b/tests/queries/0_stateless/00502_sum_map.reference
@@ -22,3 +22,5 @@
 ([1.01],[1])
 (['a','b'],[1,2])
 (['a','ab','abc'],[3,2,1])
+([1,2,3,4,5,6,7,8],[1.00000,2.00000,6.00000,8.00000,10.00000,12.00000,7.00000,8.00000])
+([1,2,3,4,5,6,7,8],[1.00000,2.00000,6.00000,8.00000,10.00000,12.00000,7.00000,8.00000])
diff --git a/tests/queries/0_stateless/00502_sum_map.sql b/tests/queries/0_stateless/00502_sum_map.sql
index 021aaf3cd3b..51007a9c78a 100644
--- a/tests/queries/0_stateless/00502_sum_map.sql
+++ b/tests/queries/0_stateless/00502_sum_map.sql
@@ -38,3 +38,19 @@ select sumMap(val, cnt) from ( SELECT [ CAST(1.01, 'Decimal(10,2)') ] as val, [1
 
 select sumMap(val, cnt) from ( SELECT [ CAST('a', 'FixedString(1)'), CAST('b', 'FixedString(1)' ) ] as val, [1, 2] as cnt );
 select sumMap(val, cnt) from ( SELECT [ CAST('abc', 'String'), CAST('ab', 'String'), CAST('a', 'String') ] as val, [1, 2, 3] as cnt );
+
+DROP TABLE IF EXISTS sum_map_decimal;
+
+CREATE TABLE sum_map_decimal(
+    statusMap Nested(
+        goal_id UInt16,
+        revenue Decimal32(5)
+    )
+) ENGINE = Log;
+
+INSERT INTO sum_map_decimal VALUES ([1, 2, 3], [1.0, 2.0, 3.0]), ([3, 4, 5], [3.0, 4.0, 5.0]), ([4, 5, 6], [4.0, 5.0, 6.0]), ([6, 7, 8], [6.0, 7.0, 8.0]);
+
+SELECT sumMap(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal;
+SELECT sumMapWithOverflow(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal;
+
+DROP TABLE sum_map_decimal;
diff --git a/tests/queries/0_stateless/00506_union_distributed.sql b/tests/queries/0_stateless/00506_union_distributed.sql
index 0bd4dd43ac9..3f631b8da56 100644
--- a/tests/queries/0_stateless/00506_union_distributed.sql
+++ b/tests/queries/0_stateless/00506_union_distributed.sql
@@ -1,6 +1,7 @@
-
 -- https://github.com/ClickHouse/ClickHouse/issues/1059
 
+SET insert_distributed_sync = 1;
+
 DROP TABLE IF EXISTS union1;
 DROP TABLE IF EXISTS union2;
 DROP TABLE IF EXISTS union3;
diff --git a/tests/queries/0_stateless/00515_gcd_lcm.sql b/tests/queries/0_stateless/00515_gcd_lcm.sql
index c3bf3275bb8..67fab1c9d59 100644
--- a/tests/queries/0_stateless/00515_gcd_lcm.sql
+++ b/tests/queries/0_stateless/00515_gcd_lcm.sql
@@ -24,18 +24,18 @@ select lcm(2147483647, 2147483646);
 select lcm(4611686011984936962, 2147483647);
 select lcm(-2147483648, 1);
 -- test gcd float
-select gcd(1280.1, 1024.1); -- { serverError 48 }
-select gcd(11.1, 121.1); -- { serverError 48 }
-select gcd(-256.1, 64.1); -- { serverError 48 }
-select gcd(1.1, 1.1); -- { serverError 48 }
-select gcd(4.1, 2.1); -- { serverError 48 }
-select gcd(15.1, 49.1); -- { serverError 48 }
-select gcd(255.1, 254.1); -- { serverError 48 }
+select gcd(1280.1, 1024.1); -- { serverError 43 }
+select gcd(11.1, 121.1); -- { serverError 43 }
+select gcd(-256.1, 64.1); -- { serverError 43 }
+select gcd(1.1, 1.1); -- { serverError 43 }
+select gcd(4.1, 2.1); -- { serverError 43 }
+select gcd(15.1, 49.1); -- { serverError 43 }
+select gcd(255.1, 254.1); -- { serverError 43 }
 -- test lcm float
-select lcm(1280.1, 1024.1); -- { serverError 48 }
-select lcm(11.1, 121.1); -- { serverError 48 }
-select lcm(-256.1, 64.1); -- { serverError 48 }
-select lcm(1.1, 1.1); -- { serverError 48 }
-select lcm(4.1, 2.1); -- { serverError 48 }
-select lcm(15.1, 49.1); -- { serverError 48 }
-select lcm(255.1, 254.1); -- { serverError 48 }
+select lcm(1280.1, 1024.1); -- { serverError 43 }
+select lcm(11.1, 121.1); -- { serverError 43 }
+select lcm(-256.1, 64.1); -- { serverError 43 }
+select lcm(1.1, 1.1); -- { serverError 43 }
+select lcm(4.1, 2.1); -- { serverError 43 }
+select lcm(15.1, 49.1); -- { serverError 43 }
+select lcm(255.1, 254.1); -- { serverError 43 }
diff --git a/tests/queries/0_stateless/00597_push_down_predicate.reference b/tests/queries/0_stateless/00597_push_down_predicate.reference
index cea533d6ccb..bd1c4791df4 100644
--- a/tests/queries/0_stateless/00597_push_down_predicate.reference
+++ b/tests/queries/0_stateless/00597_push_down_predicate.reference
@@ -114,7 +114,8 @@ FROM
 (
     SELECT
         1 AS id,
-        identity(cast(1, \'UInt8\')) AS subquery
+        identity(CAST(1, \'UInt8\')) AS subquery
+    WHERE subquery = 1
 )
 WHERE subquery = 1
 1	1
diff --git a/tests/queries/0_stateless/00597_push_down_predicate.sql b/tests/queries/0_stateless/00597_push_down_predicate.sql
index ea01bba9f4d..ec306ac6792 100644
--- a/tests/queries/0_stateless/00597_push_down_predicate.sql
+++ b/tests/queries/0_stateless/00597_push_down_predicate.sql
@@ -8,6 +8,8 @@ DROP TABLE IF EXISTS test_view_00597;
 CREATE TABLE test_00597(date Date, id Int8, name String, value Int64) ENGINE = MergeTree(date, (id, date), 8192);
 CREATE VIEW test_view_00597 AS SELECT * FROM test_00597;
 
+SELECT * FROM (SELECT floor(floor(1, floor(NULL), id = 257), floor(floor(floor(floor(NULL), '10485.76', '9223372036854775807', NULL), floor(10, floor(65535, NULL), 100.0000991821289), NULL)), '2.56'), b.* FROM (SELECT floor(floor(floor(floor(NULL), 1000.0001220703125))), * FROM test_00597) AS b) WHERE id = 257;
+
 INSERT INTO test_00597 VALUES('2000-01-01', 1, 'test string 1', 1);
 INSERT INTO test_00597 VALUES('2000-01-01', 2, 'test string 2', 2);
 
diff --git a/tests/queries/0_stateless/00642_cast.reference b/tests/queries/0_stateless/00642_cast.reference
index 3d5572932fb..7f5333f590e 100644
--- a/tests/queries/0_stateless/00642_cast.reference
+++ b/tests/queries/0_stateless/00642_cast.reference
@@ -10,11 +10,11 @@ hello
 CREATE TABLE default.cast
 (
     `x` UInt8,
-    `e` Enum8('hello' = 1, 'world' = 2) DEFAULT cast(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')
+    `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')
 )
 ENGINE = MergeTree
 ORDER BY e
 SETTINGS index_granularity = 8192
 x	UInt8					
-e	Enum8(\'hello\' = 1, \'world\' = 2)	DEFAULT	cast(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\')			
+e	Enum8(\'hello\' = 1, \'world\' = 2)	DEFAULT	CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\')			
 1	hello
diff --git a/tests/queries/0_stateless/00643_cast_zookeeper.reference b/tests/queries/0_stateless/00643_cast_zookeeper.reference
index 658233be742..9123463de1a 100644
--- a/tests/queries/0_stateless/00643_cast_zookeeper.reference
+++ b/tests/queries/0_stateless/00643_cast_zookeeper.reference
@@ -1,12 +1,12 @@
 CREATE TABLE default.cast1
 (
     `x` UInt8,
-    `e` Enum8('hello' = 1, 'world' = 2) DEFAULT cast(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')
+    `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')
 )
 ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00643/cast', 'r1')
 ORDER BY e
 SETTINGS index_granularity = 8192
 x	UInt8					
-e	Enum8(\'hello\' = 1, \'world\' = 2)	DEFAULT	cast(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\')			
+e	Enum8(\'hello\' = 1, \'world\' = 2)	DEFAULT	CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\')			
 1	hello
 1	hello
diff --git a/tests/queries/0_stateless/00643_cast_zookeeper.sql b/tests/queries/0_stateless/00643_cast_zookeeper.sql
index c52d44bd88b..c9760f00ca7 100644
--- a/tests/queries/0_stateless/00643_cast_zookeeper.sql
+++ b/tests/queries/0_stateless/00643_cast_zookeeper.sql
@@ -1,3 +1,5 @@
+SET database_atomic_wait_for_drop_and_detach_synchronously=1;
+
 DROP TABLE IF EXISTS cast1;
 DROP TABLE IF EXISTS cast2;
 
diff --git a/tests/queries/0_stateless/00688_low_cardinality_syntax.reference b/tests/queries/0_stateless/00688_low_cardinality_syntax.reference
index 035402c889d..ca27069a7df 100644
--- a/tests/queries/0_stateless/00688_low_cardinality_syntax.reference
+++ b/tests/queries/0_stateless/00688_low_cardinality_syntax.reference
@@ -18,3 +18,6 @@ c
 d
 cb
 db
+-
+61f0c404-5cb3-11e7-907b-a6006ad3dba0	61f0c404-5cb3-11e7-907b-a6006ad3dba0	61f0c404-5cb3-11e7-907b-a6006ad3dba0
+\N	\N	\N
diff --git a/tests/queries/0_stateless/00688_low_cardinality_syntax.sql b/tests/queries/0_stateless/00688_low_cardinality_syntax.sql
index 98d7b7f5f8a..3ca7b482b84 100644
--- a/tests/queries/0_stateless/00688_low_cardinality_syntax.sql
+++ b/tests/queries/0_stateless/00688_low_cardinality_syntax.sql
@@ -71,3 +71,12 @@ select (toLowCardinality('a') as val) || 'b' group by val;
 select toLowCardinality(z) as val from (select arrayJoin(['c', 'd']) as z) group by val;
 select (toLowCardinality(z) as val) || 'b'  from (select arrayJoin(['c', 'd']) as z) group by val;
 
+select '-';
+drop table if exists lc_str_uuid;
+create table lc_str_uuid(str1 String, str2 LowCardinality(String), str3 StringWithDictionary) ENGINE=Memory;
+select toUUID(str1), toUUID(str2), toUUID(str3) from lc_str_uuid;
+select toUUID(str1, '', NULL), toUUID(str2, '', NULL), toUUID(str3, '', NULL) from lc_str_uuid;
+insert into lc_str_uuid values ('61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0');
+select toUUID(str1), toUUID(str2), toUUID(str3) from lc_str_uuid;
+select toUUID(str1, '', NULL), toUUID(str2, '', NULL), toUUID(str3, '', NULL) from lc_str_uuid;
+drop table if exists lc_str_uuid;
diff --git a/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql b/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql
index b23d8a566c8..d4d260ee92e 100644
--- a/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql
+++ b/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql
@@ -1,4 +1,6 @@
+set insert_distributed_sync = 1;
 set allow_suspicious_low_cardinality_types = 1;
+
 DROP TABLE IF EXISTS test_low_null_float;
 DROP TABLE IF EXISTS dist_00717;
 
diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
index 5060b5253fe..e0922ad435d 100644
--- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
+++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
@@ -49,3 +49,19 @@ FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF is ipv6 string: 	1
 ::ffff:127.0.0.1 is ipv6 string:                        	1
 ::ffff:8.8.8.8 is ipv6 string:                          	1
 2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D is ipv6 string: 	1
+::ffff:0.0.0.0
+::ffff:127.0.0.1
+::ffff:127.0.0.1
+::ffff:127.0.0.0
+::ffff:127.0.0.1
+::ffff:127.0.0.2
+::ffff:127.0.0.3
+::ffff:127.0.0.4
+::ffff:127.0.0.5
+::ffff:127.0.0.6
+::ffff:127.0.0.7
+::ffff:127.0.0.8
+::ffff:127.0.0.9
+::ffff:127.0.0.10
+::ffff:127.0.0.11
+::ffff:127.0.0.12
diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
index 099dc20762e..5815afb1605 100644
--- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
+++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
@@ -84,3 +84,9 @@ SELECT '::ffff:127.0.0.1 is ipv6 string:                        ', isIPv6String(
 SELECT '::ffff:8.8.8.8 is ipv6 string:                          ', isIPv6String('::ffff:8.8.8.8');
 SELECT '2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D is ipv6 string: ', isIPv6String('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D');
 
+-- IPV6 functions parse IPv4 addresses.
+
+SELECT toIPv6('0.0.0.0');
+SELECT toIPv6('127.0.0.1');
+SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
+SELECT toIPv6('127.0.0.' || toString(number)) FROM numbers(13);
diff --git a/tests/queries/0_stateless/00738_lock_for_inner_table.sh b/tests/queries/0_stateless/00738_lock_for_inner_table.sh
index 9540d566ac3..d19288f65d8 100755
--- a/tests/queries/0_stateless/00738_lock_for_inner_table.sh
+++ b/tests/queries/0_stateless/00738_lock_for_inner_table.sh
@@ -5,21 +5,37 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-echo "DROP TABLE IF EXISTS tab_00738;
-DROP TABLE IF EXISTS mv;
-CREATE TABLE tab_00738(a Int) ENGINE = Log;
-CREATE MATERIALIZED VIEW mv UUID '00000738-1000-4000-8000-000000000001' ENGINE = Log AS SELECT a FROM tab_00738;" | ${CLICKHOUSE_CLIENT} -n
+# there are some issues with Atomic database, let's generate it uniq
+# otherwise flaky check will not pass.
+uuid=$(${CLICKHOUSE_CLIENT} --query "SELECT reinterpretAsUUID(currentDatabase())")
 
-${CLICKHOUSE_CLIENT} --query_id test_00738 --query "INSERT INTO tab_00738 SELECT number FROM numbers(10000000)" &
+echo "DROP TABLE IF EXISTS tab_00738 SYNC;
+DROP TABLE IF EXISTS mv SYNC;
+-- create table with fsync and 20 partitions for slower INSERT
+-- (since increasing number of records will make it significantly slower in debug build, but not in release)
+CREATE TABLE tab_00738(a Int) ENGINE = MergeTree() ORDER BY a PARTITION BY a%20 SETTINGS fsync_after_insert=1;
+CREATE MATERIALIZED VIEW mv UUID '$uuid' ENGINE = Log AS SELECT a FROM tab_00738;" | ${CLICKHOUSE_CLIENT} -n
+
+${CLICKHOUSE_CLIENT} --query_id insert_$CLICKHOUSE_DATABASE --query "INSERT INTO tab_00738 SELECT number FROM numbers(10000000)" &
 
 function drop()
 {
-    ${CLICKHOUSE_CLIENT} --query "DROP TABLE \`.inner_id.00000738-1000-4000-8000-000000000001\`" -n
+    ${CLICKHOUSE_CLIENT} --query "DROP TABLE \`.inner_id.$uuid\`" -n
 }
 
 function wait_for_query_to_start()
 {
-    while [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'test_00738'") == 0 ]]; do sleep 0.001; done
+    while [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'insert_$CLICKHOUSE_DATABASE'") == 0 ]]; do sleep 0.001; done
+
+    # The query is already started, but there is no guarantee that it locks the underlying table already.
+    # Wait until PushingToViewsBlockOutputStream will acquire the lock of the underlying table for the INSERT query.
+    # (assume that 0.5 second is enough for this, but this is not 100% correct)
+    sleep 0.5
+
+    # query already finished, fail
+    if [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'insert_$CLICKHOUSE_DATABASE'") == 0 ]]; then
+        exit 2
+    fi
 }
 
 export -f wait_for_query_to_start
diff --git a/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql b/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql
index 15573d859bb..196dfd84c7f 100644
--- a/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql
+++ b/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql
@@ -1,3 +1,5 @@
+SET insert_distributed_sync = 1;
+
 DROP TABLE IF EXISTS low_cardinality;
 DROP TABLE IF EXISTS low_cardinality_all;
 
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto
new file mode 100644
index 00000000000..8673924c929
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto
@@ -0,0 +1,14 @@
+syntax = "proto3";
+
+message ABC
+{
+    message nested
+    {
+        message nested
+        {
+            repeated int32 c = 1;
+        }
+        repeated nested b = 1;
+    }
+    repeated nested a = 1;
+}
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference
new file mode 100644
index 00000000000..69e7d5e1da8
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference
@@ -0,0 +1,52 @@
+[[],[[]],[[1]],[[2,3],[4]]]
+[[[5,6,7]],[[8,9,10]]]
+
+Binary representation:
+00000000  1a 0a 00 0a 02 0a 00 0a  05 0a 03 0a 01 01 0a 0b  |................|
+00000010  0a 04 0a 02 02 03 0a 03  0a 01 04 12 0a 07 0a 05  |................|
+00000020  0a 03 05 06 07 0a 07 0a  05 0a 03 08 09 0a        |..............|
+0000002e
+
+MESSAGE #1 AT 0x00000001
+a {
+}
+a {
+  b {
+  }
+}
+a {
+  b {
+    c: 1
+  }
+}
+a {
+  b {
+    c: 2
+    c: 3
+  }
+  b {
+    c: 4
+  }
+}
+MESSAGE #2 AT 0x0000001C
+a {
+  b {
+    c: 5
+    c: 6
+    c: 7
+  }
+}
+a {
+  b {
+    c: 8
+    c: 9
+    c: 10
+  }
+}
+
+Binary representation is as expected
+
+[[],[[]],[[1]],[[2,3],[4]]]
+[[[5,6,7]],[[8,9,10]]]
+[[],[[]],[[1]],[[2,3],[4]]]
+[[[5,6,7]],[[8,9,10]]]
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
new file mode 100755
index 00000000000..903217ca939
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS array_3dim_protobuf_00825;
+
+CREATE TABLE array_3dim_protobuf_00825
+(
+    `a_b_c` Array(Array(Array(Int32)))
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO array_3dim_protobuf_00825 VALUES ([[], [[]], [[1]], [[2,3],[4]]]), ([[[5, 6, 7]], [[8, 9, 10]]]);
+
+SELECT * FROM array_3dim_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_3dim.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_3dim:ABC'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_3dim:ABC" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_3dim:ABC'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto
new file mode 100644
index 00000000000..8f84164da2a
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto
@@ -0,0 +1,9 @@
+syntax = "proto3";
+
+message AA {
+    message nested_array {
+	    repeated double c = 2;
+    }
+    string a = 1;
+    repeated nested_array b = 2;
+}
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference
new file mode 100644
index 00000000000..5ea6780a3ba
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference
@@ -0,0 +1,41 @@
+one	[[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
+
+Binary representation:
+00000000  6b 0a 03 6f 6e 65 12 1a  12 18 00 00 00 00 00 00  |k..one..........|
+00000010  f0 3f 00 00 00 00 00 00  00 40 00 00 00 00 00 00  |.?.......@......|
+00000020  08 40 12 12 12 10 00 00  00 00 00 00 e0 3f 00 00  |.@...........?..|
+00000030  00 00 00 00 d0 3f 12 00  12 12 12 10 00 00 00 00  |.....?..........|
+00000040  00 00 10 40 00 00 00 00  00 00 14 40 12 12 12 10  |...@.......@....|
+00000050  00 00 00 00 00 00 c0 3f  00 00 00 00 00 00 b0 3f  |.......?.......?|
+00000060  12 0a 12 08 00 00 00 00  00 00 18 40              |...........@|
+0000006c
+
+MESSAGE #1 AT 0x00000001
+a: "one"
+b {
+  c: 1
+  c: 2
+  c: 3
+}
+b {
+  c: 0.5
+  c: 0.25
+}
+b {
+}
+b {
+  c: 4
+  c: 5
+}
+b {
+  c: 0.125
+  c: 0.0625
+}
+b {
+  c: 6
+}
+
+Binary representation is as expected
+
+one	[[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
+one	[[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
new file mode 100755
index 00000000000..0b386723091
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/9069
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+CREATE TABLE array_of_arrays_protobuf_00825
+(
+    `a` String,
+    `b` Nested (
+        `c` Array(Float64)
+    )
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO array_of_arrays_protobuf_00825 VALUES ('one', [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]);
+
+SELECT * FROM array_of_arrays_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_of_arrays.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_of_arrays:AA'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_of_arrays:AA" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_of_arrays:AA'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto
new file mode 100644
index 00000000000..ba558dbbadb
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto
@@ -0,0 +1,13 @@
+syntax = "proto3";
+
+message Message
+{
+  enum Enum
+  {
+    FIRST = 0;
+    SECOND = 1;
+    TEN = 10;
+    HUNDRED = 100;
+  };
+  Enum x = 1;
+};
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference
new file mode 100644
index 00000000000..ef8059bac28
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference
@@ -0,0 +1,31 @@
+Second
+Third
+First
+First
+Second
+
+Binary representation:
+00000000  02 08 01 02 08 64 00 00  02 08 01                 |.....d.....|
+0000000b
+
+MESSAGE #1 AT 0x00000001
+x: SECOND
+MESSAGE #2 AT 0x00000004
+x: HUNDRED
+MESSAGE #3 AT 0x00000007
+MESSAGE #4 AT 0x00000008
+MESSAGE #5 AT 0x00000009
+x: SECOND
+
+Binary representation is as expected
+
+Second
+Third
+First
+First
+Second
+Second
+Third
+First
+First
+Second
diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
new file mode 100755
index 00000000000..cbb387a62a5
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/7438
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS enum_mapping_protobuf_00825;
+
+CREATE TABLE enum_mapping_protobuf_00825
+(
+  x Enum16('First'=-100, 'Second'=0, 'Third'=100)
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO enum_mapping_protobuf_00825 VALUES ('Second'), ('Third'), ('First'), ('First'), ('Second');
+
+SELECT * FROM enum_mapping_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_enum_mapping.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_enum_mapping:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_enum_mapping:Message" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_enum_mapping:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.proto b/tests/queries/0_stateless/00825_protobuf_format_map.proto
new file mode 100644
index 00000000000..561b409b733
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_map.proto
@@ -0,0 +1,5 @@
+syntax = "proto3";
+
+message Message {
+  map<string, uint32> a = 1;
+};
diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.reference b/tests/queries/0_stateless/00825_protobuf_format_map.reference
new file mode 100644
index 00000000000..e3f17cb1095
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_map.reference
@@ -0,0 +1,19 @@
+{'x':5,'y':7}
+{'z':11}
+{'temp':0}
+{'':0}
+
+Binary representation:
+00000000  0e 0a 05 0a 01 78 10 05  0a 05 0a 01 79 10 07 07  |.....x......y...|
+00000010  0a 05 0a 01 7a 10 0b 0a  0a 08 0a 04 74 65 6d 70  |....z.......temp|
+00000020  10 00 06 0a 04 0a 00 10  00                       |.........|
+00000029
+
+{'x':5,'y':7}
+{'z':11}
+{'temp':0}
+{'':0}
+{'x':5,'y':7}
+{'z':11}
+{'temp':0}
+{'':0}
diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.sh b/tests/queries/0_stateless/00825_protobuf_format_map.sh
new file mode 100755
index 00000000000..5df25c41750
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_map.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/6497
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+SET allow_experimental_map_type = 1;
+
+DROP TABLE IF EXISTS map_00825;
+
+CREATE TABLE map_00825
+(
+  a Map(String, UInt32)
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO map_00825 VALUES ({'x':5, 'y':7}), ({'z':11}), ({'temp':0}), ({'':0});
+
+SELECT * FROM map_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_map.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_map:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+echo "Binary representation:"
+hexdump -C $BINARY_FILE_PATH
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO map_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_map:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto
new file mode 100644
index 00000000000..052741f504b
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto
@@ -0,0 +1,10 @@
+syntax = "proto3";
+
+message Repeated {
+  string foo = 1;
+  int64 bar = 2;
+}
+
+message Message {
+  repeated Repeated messages = 1;
+};
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference
new file mode 100644
index 00000000000..6cdd56a5b7f
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference
@@ -0,0 +1,25 @@
+['1']	[0]
+['1','']	[0,1]
+
+Binary representation:
+00000000  05 0a 03 0a 01 31 09 0a  03 0a 01 31 0a 02 10 01  |.....1.....1....|
+00000010
+
+MESSAGE #1 AT 0x00000001
+messages {
+  foo: "1"
+}
+MESSAGE #2 AT 0x00000007
+messages {
+  foo: "1"
+}
+messages {
+  bar: 1
+}
+
+Binary representation is as expected
+
+['1']	[0]
+['1','']	[0,1]
+['1']	[0]
+['1','']	[0,1]
diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
new file mode 100755
index 00000000000..58ded92f2c1
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/6497
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS nested_optional_protobuf_00825;
+
+CREATE TABLE nested_optional_protobuf_00825
+(
+  messages Nested
+  (
+    foo String,
+    bar Int64
+  )
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO nested_optional_protobuf_00825 VALUES (['1'], [0]), (['1', ''], [0, 1]);
+
+SELECT * FROM nested_optional_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_optional.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_nested_optional:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_nested_optional:Message" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_nested_optional:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.proto b/tests/queries/0_stateless/00825_protobuf_format_table_default.proto
new file mode 100644
index 00000000000..08e6049ffe0
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.proto
@@ -0,0 +1,6 @@
+syntax = "proto3";
+
+message Message {
+  sint32 x = 1;
+  sint32 z = 2;
+};
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.reference b/tests/queries/0_stateless/00825_protobuf_format_table_default.reference
new file mode 100644
index 00000000000..5472f3bfa14
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.reference
@@ -0,0 +1,37 @@
+0	0	0
+2	4	8
+3	9	27
+5	25	125
+101	102	103
+
+Binary representation:
+00000000  00 04 08 04 10 10 04 08  06 10 36 05 08 0a 10 fa  |..........6.....|
+00000010  01 06 08 ca 01 10 ce 01                           |........|
+00000018
+
+MESSAGE #1 AT 0x00000001
+MESSAGE #2 AT 0x00000002
+x: 2
+z: 8
+MESSAGE #3 AT 0x00000007
+x: 3
+z: 27
+MESSAGE #4 AT 0x0000000C
+x: 5
+z: 125
+MESSAGE #5 AT 0x00000012
+x: 101
+z: 103
+
+Binary representation is as expected
+
+0	0	0
+0	0	0
+2	4	8
+2	4	8
+3	9	27
+3	9	27
+5	25	125
+5	25	125
+101	102	103
+101	10201	103
diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.sh b/tests/queries/0_stateless/00825_protobuf_format_table_default.sh
new file mode 100755
index 00000000000..97f7769269a
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS table_default_protobuf_00825;
+
+CREATE TABLE table_default_protobuf_00825
+(
+  x Int64,
+  y Int64 DEFAULT x * x,
+  z Int64 DEFAULT x * x * x
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO table_default_protobuf_00825 (x) VALUES (0), (2), (3), (5);
+INSERT INTO table_default_protobuf_00825 VALUES (101, 102, 103);
+
+SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_table_default.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_table_default:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_table_default:Message" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_table_default:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/tests/queries/0_stateless/00826_cross_to_inner_join.reference
index e7c8d6b1ea9..84867de2849 100644
--- a/tests/queries/0_stateless/00826_cross_to_inner_join.reference
+++ b/tests/queries/0_stateless/00826_cross_to_inner_join.reference
@@ -95,7 +95,7 @@ SELECT
     t2_00826.a,
     t2_00826.b
 FROM t1_00826
-ALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)
+ALL INNER JOIN t2_00826 ON (((a = t2_00826.a) AND (a = t2_00826.a)) AND (a = t2_00826.a)) AND (b = t2_00826.b)
 WHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b)))
 cross split conjunction
 SELECT
diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference
index fc39ef13935..4db65b0b795 100644
--- a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference
+++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference
@@ -127,7 +127,7 @@ FROM
     ) AS `--.s`
     CROSS JOIN t3
 ) AS `--.s`
-ALL INNER JOIN t4 ON (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`)
+ALL INNER JOIN t4 ON ((a = `--t1.a`) AND (a = `--t2.a`)) AND (a = `--t3.a`)
 WHERE (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`)
 SELECT `--t1.a` AS `t1.a`
 FROM 
diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.reference b/tests/queries/0_stateless/00878_join_unexpected_results.reference
index a389cb47a96..65fcbc257ca 100644
--- a/tests/queries/0_stateless/00878_join_unexpected_results.reference
+++ b/tests/queries/0_stateless/00878_join_unexpected_results.reference
@@ -23,6 +23,8 @@ join_use_nulls = 1
 -
 \N	\N
 -
+1	1	\N	\N
+2	2	\N	\N
 -
 1	1	1	1
 2	2	\N	\N
@@ -49,6 +51,8 @@ join_use_nulls = 0
 -
 -
 -
+1	1	0	0
+2	2	0	0
 -
 1	1	1	1
 2	2	0	0
diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.sql b/tests/queries/0_stateless/00878_join_unexpected_results.sql
index 0aef5208b26..6f6cd6e6479 100644
--- a/tests/queries/0_stateless/00878_join_unexpected_results.sql
+++ b/tests/queries/0_stateless/00878_join_unexpected_results.sql
@@ -30,11 +30,11 @@ select * from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null;
 select '-';
 select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null;
 select '-';
-select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 }
+select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a;
 select '-';
 select t.*, s.* from t left join s on (s.a=t.a) order by t.a;
 select '-';
-select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 }
+select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2;
 
 select 'join_use_nulls = 0';
 set join_use_nulls = 0;
@@ -58,11 +58,11 @@ select '-';
 select '-';
 -- select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; -- TODO
 select '-';
-select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 }
+select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a;
 select '-';
 select t.*, s.* from t left join s on (s.a=t.a) order by t.a;
 select '-';
-select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 }
+select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2;
 
 drop table t;
 drop table s;
diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.reference b/tests/queries/0_stateless/00945_bloom_filter_index.reference
index 184aafdd568..c0c2254648e 100644
--- a/tests/queries/0_stateless/00945_bloom_filter_index.reference
+++ b/tests/queries/0_stateless/00945_bloom_filter_index.reference
@@ -211,6 +211,14 @@
 2
 1
 1
+2
+2
+2
+2
+1
+2
+1
+2
 1	value1
 1	value2
 2	value3
diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.sql b/tests/queries/0_stateless/00945_bloom_filter_index.sql
index 82321a75c67..f45c4c04290 100644
--- a/tests/queries/0_stateless/00945_bloom_filter_index.sql
+++ b/tests/queries/0_stateless/00945_bloom_filter_index.sql
@@ -163,23 +163,23 @@ DROP TABLE IF EXISTS bloom_filter_lc_null_types_test;
 DROP TABLE IF EXISTS bloom_filter_array_lc_null_types_test;
 
 CREATE TABLE bloom_filter_array_lc_null_types_test (
-    order_key   Array(LowCardinality(Nullable((UInt64)))),
+    order_key   Array(LowCardinality(Nullable(UInt64))),
 
-    i8 Array(LowCardinality(Nullable((Int8)))),
-    i16 Array(LowCardinality(Nullable((Int16)))),
-    i32 Array(LowCardinality(Nullable((Int32)))),
-    i64 Array(LowCardinality(Nullable((Int64)))),
-    u8 Array(LowCardinality(Nullable((UInt8)))),
-    u16 Array(LowCardinality(Nullable((UInt16)))),
-    u32 Array(LowCardinality(Nullable((UInt32)))),
-    u64 Array(LowCardinality(Nullable((UInt64)))),
-    f32 Array(LowCardinality(Nullable((Float32)))),
-    f64 Array(LowCardinality(Nullable((Float64)))),
+    i8 Array(LowCardinality(Nullable(Int8))),
+    i16 Array(LowCardinality(Nullable(Int16))),
+    i32 Array(LowCardinality(Nullable(Int32))),
+    i64 Array(LowCardinality(Nullable(Int64))),
+    u8 Array(LowCardinality(Nullable(UInt8))),
+    u16 Array(LowCardinality(Nullable(UInt16))),
+    u32 Array(LowCardinality(Nullable(UInt32))),
+    u64 Array(LowCardinality(Nullable(UInt64))),
+    f32 Array(LowCardinality(Nullable(Float32))),
+    f64 Array(LowCardinality(Nullable(Float64))),
 
-    date Array(LowCardinality(Nullable((Date)))),
+    date Array(LowCardinality(Nullable(Date))),
     date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))),
 
-    str Array(LowCardinality(Nullable((String)))),
+    str Array(LowCardinality(Nullable(String))),
     fixed_string Array(LowCardinality(Nullable(FixedString(5)))),
     INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string)
     TYPE bloom_filter GRANULARITY 1)
@@ -286,7 +286,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string
 DROP TABLE IF EXISTS bloom_filter_array_lc_null_types_test;
 
 DROP TABLE IF EXISTS bloom_filter_array_offsets_lc_str;
-CREATE TABLE bloom_filter_array_offsets_lc_str (order_key int, str Array(LowCardinality((String))), INDEX idx str TYPE bloom_filter(1.) GRANULARITY 1024) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 1024;
+CREATE TABLE bloom_filter_array_offsets_lc_str (order_key int, str Array(LowCardinality(String)), INDEX idx str TYPE bloom_filter(1.) GRANULARITY 1024) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 1024;
 INSERT INTO bloom_filter_array_offsets_lc_str SELECT number AS i, if(i%2, ['value'], []) FROM system.numbers LIMIT 10000;
 SELECT count() FROM bloom_filter_array_offsets_lc_str WHERE has(str, 'value');
 DROP TABLE IF EXISTS bloom_filter_array_offsets_lc_str;
@@ -348,6 +348,16 @@ SELECT id FROM test_bf_indexOf WHERE 1 <= indexOf(ary, 'value1') ORDER BY id FOR
 SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') >= 2 ORDER BY id FORMAT TSV;
 SELECT id FROM test_bf_indexOf WHERE 2 <= indexOf(ary, 'value1') ORDER BY id FORMAT TSV;
 
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') = toDecimal32(0, 2) ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE toDecimal128(0, 2) = indexOf(ary, 'value1') ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') = '0' ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE '0' = indexOf(ary, 'value1') ORDER BY id FORMAT TSV;
+
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') > toDecimal32(0, 2) ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') < toDecimal128(1, 2) ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') > '0' ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') < '1' ORDER BY id FORMAT TSV;
+
 SELECT id, ary[indexOf(ary, 'value1')] FROM test_bf_indexOf WHERE ary[indexOf(ary, 'value1')] = 'value1' ORDER BY id FORMAT TSV;
 SELECT id, ary[indexOf(ary, 'value2')] FROM test_bf_indexOf WHERE ary[indexOf(ary, 'value2')] = 'value2' ORDER BY id FORMAT TSV;
 SELECT id, ary[indexOf(ary, 'value3')] FROM test_bf_indexOf WHERE ary[indexOf(ary, 'value3')] = 'value3' ORDER BY id FORMAT TSV;
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh.py
new file mode 100755
index 00000000000..27308548452
--- /dev/null
+++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+import os
+import sys
+import signal
+
+CURDIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
+
+from client import client, prompt, end_of_block
+
+log = None
+# uncomment the line below for debugging
+#log=sys.stdout
+
+with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2:
+    client1.expect(prompt)
+    client2.expect(prompt)
+
+    client1.send('SET allow_experimental_live_view = 1')
+    client1.expect(prompt)
+    client2.send('SET allow_experimental_live_view = 1')
+    client2.expect(prompt)
+
+    client1.send('DROP TABLE IF EXISTS test.lv')
+    client1.expect(prompt)
+    client1.send("CREATE LIVE VIEW test.lv WITH REFRESH 1"
+                 " AS SELECT value FROM system.events WHERE event = 'OSCPUVirtualTimeMicroseconds'")
+    client1.expect(prompt)
+    client1.send('WATCH test.lv FORMAT JSONEachRow')
+    client1.expect(r'"_version":' + end_of_block)
+    client1.expect(r'"_version":' + end_of_block)
+    client1.expect(r'"_version":' + end_of_block)
+    # send Ctrl-C
+    client1.send('\x03', eol='')
+    match = client1.expect('(%s)|([#\$] )' % prompt)
+    if match.groups()[1]:
+        client1.send(client1.command)
+        client1.expect(prompt)
+    client1.send('DROP TABLE test.lv')
+    client1.expect(prompt)
+
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh.reference b/tests/queries/0_stateless/00962_live_view_periodic_refresh.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py
new file mode 100755
index 00000000000..76b9980d1f5
--- /dev/null
+++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+import os
+import sys
+import time
+import signal
+
+CURDIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
+
+from client import client, prompt, end_of_block
+
+log = None
+# uncomment the line below for debugging
+#log=sys.stdout
+
+with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2:
+    client1.expect(prompt)
+    client2.expect(prompt)
+
+    client1.send('SET allow_experimental_live_view = 1')
+    client1.expect(prompt)
+    client2.send('SET allow_experimental_live_view = 1')
+    client2.expect(prompt)
+
+    client1.send('DROP TABLE IF EXISTS test.lv')
+    client1.expect(prompt)
+    client1.send("CREATE LIVE VIEW test.lv WITH TIMEOUT 60 AND REFRESH 1"
+                 " AS SELECT value FROM system.events WHERE event = 'OSCPUVirtualTimeMicroseconds'")
+    client1.expect(prompt)
+    client1.send('WATCH test.lv FORMAT JSONEachRow')
+    client1.expect(r'"_version":' + end_of_block)
+    client1.expect(r'"_version":' + end_of_block)
+    client1.expect(r'"_version":' + end_of_block)
+    # send Ctrl-C
+    client1.send('\x03', eol='')
+    match = client1.expect('(%s)|([#\$] )' % prompt)
+    if match.groups()[1]:
+        client1.send(client1.command)
+        client1.expect(prompt)
+    # poll until live view table is dropped
+    start_time = time.time()
+    while True:
+        client1.send('SELECT * FROM test.lv FORMAT JSONEachRow')
+        client1.expect(prompt)
+        if 'Table test.lv doesn\'t exist' in client1.before:
+            break
+        if time.time() - start_time > 90:
+            break
+    # check table is dropped
+    client1.send('DROP TABLE test.lv')
+    client1.expect('Table test.lv doesn\'t exist')
+    client1.expect(prompt)
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.reference b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py
new file mode 100755
index 00000000000..34d5db676f4
--- /dev/null
+++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+import os
+import sys
+import signal
+
+CURDIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
+
+from client import client, prompt, end_of_block
+
+log = None
+# uncomment the line below for debugging
+#log=sys.stdout
+
+with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2:
+    client1.expect(prompt)
+    client2.expect(prompt)
+
+    client1.send('SET allow_experimental_live_view = 1')
+    client1.expect(prompt)
+    client2.send('SET allow_experimental_live_view = 1')
+    client2.expect(prompt)
+
+    client1.send('DROP TABLE IF EXISTS test.lv')
+    client1.expect(prompt)
+    client1.send('DROP TABLE IF EXISTS test.mt')
+    client1.expect(prompt)
+    client1.send('DROP DICTIONARY IF EXITS test.dict')
+    client1.expect(prompt)
+    
+    client1.send("CREATE TABLE test.mt (a Int32, b Int32) Engine=MergeTree order by tuple()")
+    client1.expect(prompt)
+    client1.send("CREATE DICTIONARY test.dict(a Int32, b Int32) PRIMARY KEY a LAYOUT(FLAT()) " + \
+                 "SOURCE(CLICKHOUSE(db 'test' table 'mt')) LIFETIME(1)")
+    client1.expect(prompt)   
+    client1.send("CREATE LIVE VIEW test.lv WITH REFRESH 1 AS SELECT * FROM test.dict")
+    client1.expect(prompt)
+
+    client2.send("INSERT INTO test.mt VALUES (1,2)")
+    client2.expect(prompt) 
+
+    client1.send('WATCH test.lv FORMAT JSONEachRow')
+    client1.expect(r'"_version":"1"')
+    
+    client2.send("INSERT INTO test.mt VALUES (2,2)")
+    client2.expect(prompt) 
+    client1.expect(r'"_version":"2"')
+    
+    client2.send("INSERT INTO test.mt VALUES (3,2)")
+    client2.expect(prompt)    
+    client1.expect(r'"_version":"3"')
+    
+    # send Ctrl-C
+    client1.send('\x03', eol='')
+    match = client1.expect('(%s)|([#\$] )' % prompt)
+    if match.groups()[1]:
+        client1.send(client1.command)
+        client1.expect(prompt)
+
+    client1.send('DROP TABLE IF EXISTS test.lv')
+    client1.expect(prompt)
+    client1.send('DROP DICTIONARY IF EXISTS test.dict')
+    client1.expect(prompt)
+    client1.send('DROP TABLE IF EXISTS test.mt')
+    client1.expect(prompt)
+    
+    
+
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.reference b/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql
index 455fab694cd..3b562801f92 100644
--- a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql
+++ b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql
@@ -1,8 +1,12 @@
+set insert_distributed_sync=1;
+
 DROP TABLE IF EXISTS dist_00967;
 DROP TABLE IF EXISTS underlying_00967;
 
+-- To suppress "Structure does not match (...), implicit conversion will be done." message
+SET send_logs_level='error';
+
 CREATE TABLE dist_00967 (key UInt64) Engine=Distributed('test_shard_localhost', currentDatabase(), underlying_00967);
--- fails for TinyLog()/MergeTree()/... but not for Memory()
 CREATE TABLE underlying_00967 (key Nullable(UInt64)) Engine=TinyLog();
 INSERT INTO dist_00967 SELECT toUInt64(number) FROM system.numbers LIMIT 1;
 
diff --git a/tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py b/tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py.disabled
similarity index 100%
rename from tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py
rename to tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py.disabled
diff --git a/tests/queries/0_stateless/01016_uniqCombined64.sql b/tests/queries/0_stateless/01016_uniqCombined64.sql
index 4720b53d15e..acf8135760a 100644
--- a/tests/queries/0_stateless/01016_uniqCombined64.sql
+++ b/tests/queries/0_stateless/01016_uniqCombined64.sql
@@ -5,5 +5,5 @@
 -- test is just to ensure that the result is different (and to document the
 -- outcome).
 
-SELECT uniqCombined(number)   FROM numbers(toUInt64(1e7));
-SELECT uniqCombined64(number) FROM numbers(toUInt64(1e7));
+SELECT uniqCombined(number)   FROM numbers(1e7);
+SELECT uniqCombined64(number) FROM numbers(1e7);
diff --git a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql
index bfcfec2b8ba..2ad1edae733 100644
--- a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql
+++ b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql
@@ -5,45 +5,45 @@
 -- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements
 SELECT 'UInt32';
 SET max_memory_usage = 4000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(8192 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 9830400;
-SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(8192 * 100) GROUP BY k);
 
 -- HashTable for UInt64 (used until (1<<12) elements), hence 4096 elements
 SELECT 'UInt64';
 SET max_memory_usage = 4000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 9830400;
-SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(4096 * 100) GROUP BY k);
 
 SELECT 'K=16';
 
 -- HashTable for UInt32 (used until (1<<12) elements), hence 4096 elements
 SELECT 'UInt32';
 SET max_memory_usage = 2000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 4915200;
-SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k);
 
 -- HashTable for UInt64 (used until (1<<11) elements), hence 2048 elements
 SELECT 'UInt64';
 SET max_memory_usage = 2000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 4915200;
-SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k);
 
 SELECT 'K=18';
 
 -- HashTable for UInt32 (used until (1<<14) elements), hence 16384 elements
 SELECT 'UInt32';
 SET max_memory_usage = 8000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(16384 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 19660800;
-SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(16384 * 100) GROUP BY k);
 
 -- HashTable for UInt64 (used until (1<<13) elements), hence 8192 elements
 SELECT 'UInt64';
 SET max_memory_usage = 8000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(8192 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 19660800;
-SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(8192 * 100) GROUP BY k);
diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh b/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh
index bc13e44934a..025fe51e2a9 100755
--- a/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh
+++ b/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh
@@ -113,8 +113,8 @@ timeout $TIMEOUT bash -c thread7 2> /dev/null &
 wait
 $CLICKHOUSE_CLIENT -q "SELECT 'Still alive'"
 
-$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY database_for_dict.dict1"
-$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY database_for_dict.dict2"
+$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY IF NOT EXISTS database_for_dict.dict1"
+$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY IF NOT EXISTS database_for_dict.dict2"
 
 $CLICKHOUSE_CLIENT -n -q "
     DROP TABLE table_for_dict1;
diff --git a/tests/queries/0_stateless/01023_materialized_view_query_context.sql b/tests/queries/0_stateless/01023_materialized_view_query_context.sql
index 7ec8d8fd506..351379d8b14 100644
--- a/tests/queries/0_stateless/01023_materialized_view_query_context.sql
+++ b/tests/queries/0_stateless/01023_materialized_view_query_context.sql
@@ -1,5 +1,8 @@
 -- Create dictionary, since dictGet*() uses DB::Context in executeImpl()
 -- (To cover scope of the Context in DB::PushingToViewsBlockOutputStream::process)
+
+set insert_distributed_sync=1;
+
 DROP TABLE IF EXISTS mv;
 DROP DATABASE IF EXISTS dict_in_01023;
 CREATE DATABASE dict_in_01023;
diff --git a/tests/queries/0_stateless/01029_early_constant_folding.reference b/tests/queries/0_stateless/01029_early_constant_folding.reference
index 7e2f6c7ce76..8a2d7e6c61a 100644
--- a/tests/queries/0_stateless/01029_early_constant_folding.reference
+++ b/tests/queries/0_stateless/01029_early_constant_folding.reference
@@ -2,7 +2,7 @@ SELECT 1
 WHERE 0
 SELECT 1
 SELECT 1
-WHERE 0
+WHERE (1 IN (0, 2)) AND (2 = (identity(CAST(2, \'UInt8\')) AS subquery))
 SELECT 1
 WHERE 1 IN (
 (
diff --git a/tests/queries/0_stateless/01029_early_constant_folding.sql b/tests/queries/0_stateless/01029_early_constant_folding.sql
index 428c3625295..6336b62e080 100644
--- a/tests/queries/0_stateless/01029_early_constant_folding.sql
+++ b/tests/queries/0_stateless/01029_early_constant_folding.sql
@@ -4,7 +4,7 @@ EXPLAIN SYNTAX SELECT 1 WHERE 1 = 0;
 
 EXPLAIN SYNTAX SELECT 1 WHERE 1 IN (0, 1, 2);
 
-EXPLAIN SYNTAX SELECT 1 WHERE 1 IN (0, 2) AND 2 = (SELECT 2);
+EXPLAIN SYNTAX SELECT 1 WHERE 1 IN (0, 2) AND 2 = ((SELECT 2) AS subquery);
 
 -- no constant folding
 
diff --git a/tests/queries/0_stateless/01044_great_circle_angle.reference b/tests/queries/0_stateless/01044_great_circle_angle.reference
index 60a616c7187..ebdeaa10067 100644
--- a/tests/queries/0_stateless/01044_great_circle_angle.reference
+++ b/tests/queries/0_stateless/01044_great_circle_angle.reference
@@ -17,11 +17,11 @@
 ██████████▎
 ████████████▍
 ██████████████▍
-████████████████▌
+████████████████▍
 ██████████████████▌
-████████████████████▋
-██████████████████████▋
-████████████████████████▋
+████████████████████▌
+██████████████████████▌
+████████████████████████▌
 ██████████████████████████▌
 ████████████████████████████▍
 ██████████████████████████████▍
diff --git a/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql b/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql
index 7aac720865d..318f48dc833 100644
--- a/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql
+++ b/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql
@@ -1,5 +1,7 @@
 -- from https://github.com/ClickHouse/ClickHouse/issues/5142
 
+set insert_distributed_sync = 1;
+
 DROP TABLE IF EXISTS t;
 DROP TABLE IF EXISTS t_d;
 DROP TABLE IF EXISTS t_v;
diff --git a/tests/queries/0_stateless/01051_new_any_join_engine.reference b/tests/queries/0_stateless/01051_new_any_join_engine.reference
index 635ae641a63..a20fea88656 100644
--- a/tests/queries/0_stateless/01051_new_any_join_engine.reference
+++ b/tests/queries/0_stateless/01051_new_any_join_engine.reference
@@ -29,3 +29,34 @@ anti left
 3	a4	
 anti right
 5		b6
+any left
+0	a1	
+1	a2	
+2	a3	b1
+3	a4	
+4	a5	b3
+any inner
+2	a3	b1
+4	a5	b3
+any right
+2	a3	b1
+2	a3	b2
+4	a5	b3
+4	a5	b4
+4	a5	b5
+5		b6
+semi left
+2	a3	b1
+4	a5	b3
+semi right
+2	a3	b1
+2	a3	b2
+4	a5	b3
+4	a5	b4
+4	a5	b5
+anti left
+0	a1	
+1	a2	
+3	a4	
+anti right
+5		b6
diff --git a/tests/queries/0_stateless/01051_new_any_join_engine.sql b/tests/queries/0_stateless/01051_new_any_join_engine.sql
index 8662d8532d4..a687a6494b5 100644
--- a/tests/queries/0_stateless/01051_new_any_join_engine.sql
+++ b/tests/queries/0_stateless/01051_new_any_join_engine.sql
@@ -57,6 +57,29 @@ SELECT * FROM t1 ANTI LEFT JOIN anti_left_join j USING(x) ORDER BY x, str, s;
 SELECT 'anti right';
 SELECT * FROM t1 ANTI RIGHT JOIN anti_right_join j USING(x) ORDER BY x, str, s;
 
+-- run queries once more time (issue #16991)
+
+SELECT 'any left';
+SELECT * FROM t1 ANY LEFT JOIN any_left_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'any inner';
+SELECT * FROM t1 ANY INNER JOIN any_inner_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'any right';
+SELECT * FROM t1 ANY RIGHT JOIN any_right_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'semi left';
+SELECT * FROM t1 SEMI LEFT JOIN semi_left_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'semi right';
+SELECT * FROM t1 SEMI RIGHT JOIN semi_right_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'anti left';
+SELECT * FROM t1 ANTI LEFT JOIN anti_left_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'anti right';
+SELECT * FROM t1 ANTI RIGHT JOIN anti_right_join j USING(x) ORDER BY x, str, s;
+
 DROP TABLE t1;
 
 DROP TABLE any_left_join;
diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
index 4ae655b1ec9..222c05ae827 100644
--- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
+++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
@@ -1,3 +1,5 @@
+-- set insert_distributed_sync = 1;  -- see https://github.com/ClickHouse/ClickHouse/issues/18971
+
 DROP TABLE IF EXISTS local_01099_a;
 DROP TABLE IF EXISTS local_01099_b;
 DROP TABLE IF EXISTS distributed_01099_a;
diff --git a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference
index 1a9e5685a6a..71be9c3fb5b 100644
--- a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference
+++ b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference
@@ -1,3 +1,4 @@
 1	2019-01-05	2020-01-10	1
+1
 date_table
 somedict
diff --git a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql
index 6ad76ee5a7e..471fd7959a9 100644
--- a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql
+++ b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql
@@ -29,6 +29,9 @@ LIFETIME(MIN 300 MAX 360);
 
 SELECT * from somedict;
 
+-- No dictionary columns
+SELECT 1 FROM somedict;
+
 SHOW TABLES;
 
-DROP DATABASE IF EXISTS database_for_dict;
+DROP DATABASE database_for_dict;
diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.expect b/tests/queries/0_stateless/01176_mysql_client_interactive.expect
new file mode 100755
index 00000000000..d592bbe1ce2
--- /dev/null
+++ b/tests/queries/0_stateless/01176_mysql_client_interactive.expect
@@ -0,0 +1,26 @@
+#!/usr/bin/expect -f
+
+log_user 0
+set timeout 5
+match_max 100000
+# A default timeout action is to do nothing, change it to fail
+expect_after {
+    timeout {
+        exit 1
+    }
+}
+
+set basedir [file dirname $argv0]
+spawn bash -c "source $basedir/../shell_config.sh ; \$MYSQL_CLIENT_BINARY \$MYSQL_CLIENT_OPT"
+expect "mysql> "
+
+send -- "USE system;\r"
+expect "Database changed"
+
+send -- "SELECT * FROM one;\r"
+expect "| dummy |"
+expect "|     0 |"
+expect "1 row in set"
+
+send -- "quit;\r"
+expect eof
diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.reference b/tests/queries/0_stateless/01176_mysql_client_interactive.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01177_group_array_moving.reference b/tests/queries/0_stateless/01177_group_array_moving.reference
new file mode 100644
index 00000000000..d74c84bb94f
--- /dev/null
+++ b/tests/queries/0_stateless/01177_group_array_moving.reference
@@ -0,0 +1,2 @@
+[-9223372036854775808,0,-9223372036854775808,0,-9223372036854775808,0]	[18446744073709551615,18446744073709551614,18446744073709551613,18446744073709551612,18446744073709551611,18446744073709551610]	[0,9223372036854775807,9223372036854775805,9223372036854775805,18446744073709551612,18446744073709551610]
+[-35888607147294850,-71777214294589700,-107665821441884540,-143554428589179400,-179443035736474240,-215331642883769100]	[17592202821648,35184405643296,52776608464944,70368811286592,87961014108240,105553216929888]	[0,1,3,3,4,6]
diff --git a/tests/queries/0_stateless/01177_group_array_moving.sql b/tests/queries/0_stateless/01177_group_array_moving.sql
new file mode 100644
index 00000000000..5689cd95f75
--- /dev/null
+++ b/tests/queries/0_stateless/01177_group_array_moving.sql
@@ -0,0 +1,4 @@
+SELECT groupArrayMovingSum(257)(-9223372036854775808), groupArrayMovingSum(1048575)(18446744073709551615), groupArrayMovingSum(9223372036854775807)(number * 9223372036854775807) FROM remote('127.0.0.{1..2}', numbers(3));
+SELECT groupArrayMovingAvg(257)(-9223372036854775808), groupArrayMovingAvg(1048575)(18446744073709551615), groupArrayMovingAvg(9223372036854775807)(number * 9223372036854775807) FROM remote('127.0.0.{1..2}', numbers(3));
+
+SELECT groupArrayMovingSum(257)(-9223372036854775808), groupArrayMovingSum(1)(10.000100135803223, [NULL, NULL], NULL), groupArrayMovingSum(NULL)(NULL) FROM numbers(1023) FORMAT Null;
diff --git a/tests/queries/0_stateless/01178_int_field_to_decimal.reference b/tests/queries/0_stateless/01178_int_field_to_decimal.reference
new file mode 100644
index 00000000000..6c256ba2032
--- /dev/null
+++ b/tests/queries/0_stateless/01178_int_field_to_decimal.reference
@@ -0,0 +1,2 @@
+9.00000000
+10.00000000
diff --git a/tests/queries/0_stateless/01178_int_field_to_decimal.sql b/tests/queries/0_stateless/01178_int_field_to_decimal.sql
new file mode 100644
index 00000000000..bbd72e57d70
--- /dev/null
+++ b/tests/queries/0_stateless/01178_int_field_to_decimal.sql
@@ -0,0 +1,10 @@
+select d from values('d Decimal(8, 8)', 0, 1) where d not in (-1, 0); -- { serverError 69 }
+select d from values('d Decimal(8, 8)', 0, 2) where d not in (1, 0); -- { serverError 69 }
+select d from values('d Decimal(9, 8)', 0, 3) where d not in (-9223372036854775808, 0); -- { serverError 69 }
+select d from values('d Decimal(9, 8)', 0, 4) where d not in (18446744073709551615, 0); -- { serverError 69 }
+select d from values('d Decimal(18, 8)', 0, 5) where d not in (-9223372036854775808, 0); -- { serverError 69 }
+select d from values('d Decimal(18, 8)', 0, 6) where d not in (18446744073709551615, 0); -- { serverError 69 }
+select d from values('d Decimal(26, 8)', 0, 7) where d not in (-9223372036854775808, 0); -- { serverError 69 }
+select d from values('d Decimal(27, 8)', 0, 8) where d not in (18446744073709551615, 0); -- { serverError 69 }
+select d from values('d Decimal(27, 8)', 0, 9) where d not in (-9223372036854775808, 0);
+select d from values('d Decimal(28, 8)', 0, 10) where d not in (18446744073709551615, 0);
diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.expect b/tests/queries/0_stateless/01179_insert_values_semicolon.expect
new file mode 100755
index 00000000000..c832be72c10
--- /dev/null
+++ b/tests/queries/0_stateless/01179_insert_values_semicolon.expect
@@ -0,0 +1,39 @@
+#!/usr/bin/expect -f
+
+log_user 0
+set timeout 5
+match_max 100000
+# A default timeout action is to do nothing, change it to fail
+expect_after {
+    timeout {
+        exit 1
+    }
+}
+
+set basedir [file dirname $argv0]
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT"
+expect ":) "
+
+send -- "DROP TABLE IF EXISTS test_01179\r"
+expect "Ok."
+
+send -- "CREATE TABLE test_01179 (date DateTime) ENGINE=Memory()\r"
+expect "Ok."
+
+send -- "INSERT INTO test_01179 values ('2020-01-01')\r"
+expect "Ok."
+
+send -- "INSERT INTO test_01179 values ('2020-01-01'); \r"
+expect "Ok."
+
+send -- "INSERT INTO test_01179 values ('2020-01-01'); (1) \r"
+expect "Cannot read data after semicolon"
+
+send -- "SELECT date, count() FROM test_01179 GROUP BY date FORMAT TSV\r"
+expect "2020-01-01 00:00:00\t3"
+
+send -- "DROP TABLE test_01179\r"
+expect "Ok."
+
+send -- "\4"
+expect eof
diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.reference b/tests/queries/0_stateless/01179_insert_values_semicolon.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01180_client_syntax_errors.expect b/tests/queries/0_stateless/01180_client_syntax_errors.expect
new file mode 100755
index 00000000000..bc775ce2c57
--- /dev/null
+++ b/tests/queries/0_stateless/01180_client_syntax_errors.expect
@@ -0,0 +1,32 @@
+#!/usr/bin/expect -f
+
+log_user 0
+set timeout 5
+match_max 100000
+# A default timeout action is to do nothing, change it to fail
+expect_after {
+    timeout {
+        exit 1
+    }
+}
+
+set basedir [file dirname $argv0]
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT"
+expect ":) "
+
+# Make a query with syntax error
+send -- "select \r"
+expect "Syntax error: failed at position 7 (end of query):"
+expect "Expected one of: "
+
+# Make another query with syntax error
+send -- "CREATE TABLE t4 UUID '57f27aa5-141c-47c5-888a-9563681717f5' AS t1 (`rowNumberInAllBlocks()` UInt64, `toLowCardinality(arrayJoin(\['exchange', 'tables'\]))` LowCardinality(String)) ENGINE = MergeTree \r"
+expect "Syntax error: failed at position 93 ('UInt64'):*"
+
+# Make a query with unmatched parentheses
+send -- "select (1, 2\r"
+expect "Syntax error: failed at position 8 ('('):"
+expect "Unmatched parentheses: ("
+
+send -- "\4"
+expect eof
diff --git a/tests/queries/0_stateless/01180_client_syntax_errors.reference b/tests/queries/0_stateless/01180_client_syntax_errors.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh
index db9d095fe92..ec07f4d3687 100755
--- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh
+++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 
+unset CLICKHOUSE_LOG_COMMENT
+
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition.reference b/tests/queries/0_stateless/01232_preparing_sets_race_condition_long.reference
similarity index 100%
rename from tests/queries/0_stateless/01232_preparing_sets_race_condition.reference
rename to tests/queries/0_stateless/01232_preparing_sets_race_condition_long.reference
diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh b/tests/queries/0_stateless/01232_preparing_sets_race_condition_long.sh
similarity index 100%
rename from tests/queries/0_stateless/01232_preparing_sets_race_condition.sh
rename to tests/queries/0_stateless/01232_preparing_sets_race_condition_long.sh
diff --git a/tests/queries/0_stateless/01235_live_view_over_distributed.sql b/tests/queries/0_stateless/01235_live_view_over_distributed.sql
index dd9ff80f30e..abc628475db 100644
--- a/tests/queries/0_stateless/01235_live_view_over_distributed.sql
+++ b/tests/queries/0_stateless/01235_live_view_over_distributed.sql
@@ -1,3 +1,4 @@
+set insert_distributed_sync = 1;
 SET allow_experimental_live_view = 1;
 
 DROP TABLE IF EXISTS lv;
@@ -7,7 +8,7 @@ DROP TABLE IF EXISTS visits_layer;
 CREATE TABLE visits(StartDate Date) ENGINE MergeTree ORDER BY(StartDate);
 CREATE TABLE visits_layer(StartDate Date) ENGINE Distributed(test_cluster_two_shards_localhost,  currentDatabase(), 'visits', rand());
 
-CREATE LIVE VIEW lv AS SELECT * FROM visits_layer ORDER BY StartDate; 
+CREATE LIVE VIEW lv AS SELECT * FROM visits_layer ORDER BY StartDate;
 
 INSERT INTO visits_layer (StartDate) VALUES ('2020-01-01');
 INSERT INTO visits_layer (StartDate) VALUES ('2020-01-02');
diff --git a/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql b/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql
index a572074de3c..de35b0c6c9d 100644
--- a/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql
+++ b/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql
@@ -1,4 +1,5 @@
 SET allow_experimental_live_view = 1;
+SET insert_distributed_sync = 1;
 
 DROP TABLE IF EXISTS lv;
 DROP TABLE IF EXISTS visits;
diff --git a/tests/queries/0_stateless/01238_http_memory_tracking.sh b/tests/queries/0_stateless/01238_http_memory_tracking.sh
index 90a7611c7c7..8c900e4c208 100755
--- a/tests/queries/0_stateless/01238_http_memory_tracking.sh
+++ b/tests/queries/0_stateless/01238_http_memory_tracking.sh
@@ -18,3 +18,6 @@ yes 'SELECT 1' 2>/dev/null | {
 } | grep -x -c 1
 
 wait
+
+# Reset max_memory_usage_for_user, so it will not affect other tests
+${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=0 -q "SELECT 1 FORMAT Null"
diff --git a/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference b/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference
index 096d5703292..72a41ac1d84 100644
--- a/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference
+++ b/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference
@@ -2,3 +2,7 @@
 [0,1,2,3,4,5,6,7,8,9,10,11,12]
 20
 0.49237
+78
+[0,1,2,3,4,5,6,7,8,9,10,11,12]
+20
+0.49237
diff --git a/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql b/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql
index f851e64dbcb..f95d2d87b8e 100644
--- a/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql
+++ b/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql
@@ -1,3 +1,12 @@
+SET distributed_aggregation_memory_efficient = 1;
+
+SELECT sum(DISTINCT number % 13) FROM remote('127.0.0.{1,2}', numbers_mt(100000));
+SELECT arraySort(groupArray(DISTINCT number % 13)) FROM remote('127.0.0.{1,2}', numbers_mt(100000));
+SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM remote('127.0.0.{1,2}', numbers_mt(100000));
+SELECT round(corrStable(DISTINCT x, y), 5) FROM (SELECT number % 10 AS x, number % 5 AS y FROM remote('127.0.0.{1,2}', numbers(1000)));
+
+SET distributed_aggregation_memory_efficient = 0;
+
 SELECT sum(DISTINCT number % 13) FROM remote('127.0.0.{1,2}', numbers_mt(100000));
 SELECT arraySort(groupArray(DISTINCT number % 13)) FROM remote('127.0.0.{1,2}', numbers_mt(100000));
 SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM remote('127.0.0.{1,2}', numbers_mt(100000));
diff --git a/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql b/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql
index a35dc7cca56..8799680125f 100644
--- a/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql
+++ b/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql
@@ -1,3 +1,5 @@
+set insert_distributed_sync = 1;
+
 DROP TABLE IF EXISTS visits;
 DROP TABLE IF EXISTS visits_dist;
 
diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.reference b/tests/queries/0_stateless/01280_ttl_where_group_by.reference
index ad20d38f2e6..7fe00709dee 100644
--- a/tests/queries/0_stateless/01280_ttl_where_group_by.reference
+++ b/tests/queries/0_stateless/01280_ttl_where_group_by.reference
@@ -1,20 +1,26 @@
+ttl_01280_1
 1	1	0	4
 1	2	3	7
 1	3	0	5
 2	1	0	1
 2	1	20	1
+ttl_01280_2
 1	1	[0,2,3]	4
 1	1	[5,4,1]	13
 1	3	[1,0,1,0]	17
 2	1	[3,1,0,3]	8
 3	1	[2,4,5]	8
+ttl_01280_3
 1	1	0	4
-1	3	10	6
+1	1	10	6
 2	1	0	3
-3	5	8	2
+3	1	8	2
+ttl_01280_4
 1	1	0	4
-3	3	13	9
+10	2	13	9
+ttl_01280_5
 1	2	7	5
 2	3	6	5
-1	2	3	5
-2	3	3	5
+ttl_01280_6
+1	5	3	5
+2	10	3	5
diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.sh b/tests/queries/0_stateless/01280_ttl_where_group_by.sh
index 5ca79951a46..9f30c7c5872 100755
--- a/tests/queries/0_stateless/01280_ttl_where_group_by.sh
+++ b/tests/queries/0_stateless/01280_ttl_where_group_by.sh
@@ -14,6 +14,7 @@ function optimize()
     done
 }
 
+echo "ttl_01280_1"
 $CLICKHOUSE_CLIENT -n --query "
 create table ttl_01280_1 (a Int, b Int, x Int, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second delete where x % 10 == 0 and y > 5;
 insert into ttl_01280_1 values (1, 1, 0, 4, now() + 10);
@@ -30,6 +31,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_1 ORDER BY a, b, x,
 
 $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_2"
 
+echo "ttl_01280_2"
 $CLICKHOUSE_CLIENT -n --query "
 create table ttl_01280_2 (a Int, b Int, x Array(Int32), y Double, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b set x = minForEach(x), y = sum(y), d = max(d);
 insert into ttl_01280_2 values (1, 1, array(0, 2, 3), 4, now() + 10);
@@ -48,8 +50,9 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_2 ORDER BY a, b, x,
 
 $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_3"
 
+echo "ttl_01280_3"
 $CLICKHOUSE_CLIENT -n --query "
-create table ttl_01280_3 (a Int, b Int, x Int64, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set x = argMax(x, d), y = argMax(y, d), d = max(d);
+create table ttl_01280_3 (a Int, b Int, x Int64, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set b = min(b), x = argMax(x, d), y = argMax(y, d), d = max(d);
 insert into ttl_01280_3 values (1, 1, 0, 4, now() + 10);
 insert into ttl_01280_3 values (1, 1, 10, 6, now() + 1);
 insert into ttl_01280_3 values (1, 2, 3, 7, now());
@@ -66,6 +69,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_3 ORDER BY a, b, x,
 
 $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_4"
 
+echo "ttl_01280_4"
 $CLICKHOUSE_CLIENT -n --query "
 create table ttl_01280_4 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), -(a + b)) ttl d + interval 1 second group by toDate(d) set x = sum(x), y = max(y);
 insert into ttl_01280_4 values (1, 1, 0, 4, now() + 10);
@@ -80,7 +84,8 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_4 ORDER BY a, b, x,
 
 $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_5"
 
-$CLICKHOUSE_CLIENT -n --query "create table ttl_01280_5 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a set x = sum(x);
+echo "ttl_01280_5"
+$CLICKHOUSE_CLIENT -n --query "create table ttl_01280_5 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a set x = sum(x), b = argMax(b, -b);
 insert into ttl_01280_5 values (1, 2, 3, 5, now());
 insert into ttl_01280_5 values (2, 10, 1, 5, now());
 insert into ttl_01280_5 values (2, 3, 5, 5, now());
@@ -92,6 +97,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_5 ORDER BY a, b, x,
 
 $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_6"
 
+echo "ttl_01280_6"
 $CLICKHOUSE_CLIENT -n --query "
 create table ttl_01280_6 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a;
 insert into ttl_01280_6 values (1, 2, 3, 5, now());
diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql b/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql
index f2c26a3d495..b273e065bcc 100644
--- a/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql
+++ b/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql
@@ -1,7 +1,4 @@
 create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by x set y = max(y); -- { serverError 450}
 create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by b set y = max(y); -- { serverError 450}
 create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b, x set y = max(y); -- { serverError 450}
-create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set b = min(b), y = max(y); -- { serverError 450}
 create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b set y = max(y), y = max(y); -- { serverError 450}
-create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a) ttl d + interval 1 second group by toDate(d), a set d = min(d), b = max(b); -- { serverError 450}
-create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (d, -(a + b)) ttl d + interval 1 second group by d, -(a + b) set a = sum(a), b = min(b); -- { serverError 450}
diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh
index 285e2ab8dad..4667c76cb60 100755
--- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh
+++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh
@@ -33,7 +33,7 @@ function execute_group_by()
         "--max_memory_usage_for_user="$((150<<20))
         "--max_threads=2"
     )
-    execute_null "${opts[@]}" <<<'SELECT uniq(number) FROM numbers_mt(toUInt64(1e6)) GROUP BY number % 5e5'
+    execute_null "${opts[@]}" <<<'SELECT uniq(number) FROM numbers_mt(1e6) GROUP BY number % 5e5'
 }
 
 # This is needed to keep at least one running query for user for the time of test.
@@ -42,3 +42,6 @@ execute_group_by
 # if memory accounting will be incorrect, the second query will be failed with MEMORY_LIMIT_EXCEEDED
 execute_group_by
 wait
+
+# Reset max_memory_usage_for_user, so it will not affect other tests
+${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=0 -q "SELECT 1 FORMAT Null"
diff --git a/tests/queries/0_stateless/01297_create_quota.reference b/tests/queries/0_stateless/01297_create_quota.reference
index b637f4f3296..375d67346be 100644
--- a/tests/queries/0_stateless/01297_create_quota.reference
+++ b/tests/queries/0_stateless/01297_create_quota.reference
@@ -57,7 +57,10 @@ q2_01297	local directory	[]	[5259492]	0	['r1_01297','u1_01297']	[]
 q3_01297	local directory	['client_key','user_name']	[5259492,15778476]	0	[]	[]
 q4_01297	local directory	[]	[604800]	1	[]	['u1_01297']
 -- system.quota_limits
-q2_01297	5259492	0	100	11	1000	10000	1001	10001	2.5
-q3_01297	5259492	0	\N	\N	1002	\N	\N	\N	\N
-q3_01297	15778476	0	100	11	\N	\N	\N	\N	\N
-q4_01297	604800	0	\N	\N	\N	\N	\N	\N	\N
+q2_01297	5259492	0	100	\N	\N	11	1000	10000	1001	10001	2.5
+q3_01297	5259492	0	\N	\N	\N	\N	1002	\N	\N	\N	\N
+q3_01297	15778476	0	100	\N	\N	11	\N	\N	\N	\N	\N
+q4_01297	604800	0	\N	\N	\N	\N	\N	\N	\N	\N	\N
+-- query_selects query_inserts
+CREATE QUOTA q1_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_selects = 1 TO r1_01297
+CREATE QUOTA q2_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_inserts = 1 TO r1_01297
diff --git a/tests/queries/0_stateless/01297_create_quota.sql b/tests/queries/0_stateless/01297_create_quota.sql
index a3fb8331e16..7d55b95601f 100644
--- a/tests/queries/0_stateless/01297_create_quota.sql
+++ b/tests/queries/0_stateless/01297_create_quota.sql
@@ -125,5 +125,13 @@ SELECT '-- system.quota_limits';
 SELECT * FROM system.quota_limits WHERE quota_name LIKE 'q%\_01297' ORDER BY quota_name, duration;
 DROP QUOTA q1_01297, q2_01297, q3_01297, q4_01297;
 
+SELECT '-- query_selects query_inserts';
+CREATE QUOTA q1_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_selects = 1 TO r1_01297;
+CREATE QUOTA q2_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_inserts = 1 TO r1_01297;
+SHOW CREATE QUOTA q1_01297;
+SHOW CREATE QUOTA q2_01297;
+DROP QUOTA q1_01297, q2_01297;
+
 DROP ROLE r1_01297;
 DROP USER u1_01297;
+
diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
index 9a554ead776..596e90adfd6 100644
--- a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
+++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
@@ -1,7 +1,7 @@
 DROP TABLE IF EXISTS lc_nullable;
 
 CREATE TABLE lc_nullable (
-    order_key   Array(LowCardinality(Nullable((UInt64)))),
+    order_key   Array(LowCardinality(Nullable(UInt64))),
 
     i8  Array(LowCardinality(Nullable(Int8))),
     i16 Array(LowCardinality(Nullable(Int16))),
@@ -14,10 +14,10 @@ CREATE TABLE lc_nullable (
     f32 Array(LowCardinality(Nullable(Float32))),
     f64 Array(LowCardinality(Nullable(Float64))),
 
-    date Array(LowCardinality(Nullable((Date)))),
+    date Array(LowCardinality(Nullable(Date))),
     date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))),
 
-    str Array(LowCardinality(Nullable((String)))),
+    str Array(LowCardinality(Nullable(String))),
     fixed_string Array(LowCardinality(Nullable(FixedString(5))))
 ) ENGINE = MergeTree() ORDER BY order_key;
 
diff --git a/tests/queries/0_stateless/01443_merge_truncate_long.reference b/tests/queries/0_stateless/01443_merge_truncate_long.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01443_merge_truncate.sh b/tests/queries/0_stateless/01443_merge_truncate_long.sh
similarity index 100%
rename from tests/queries/0_stateless/01443_merge_truncate.sh
rename to tests/queries/0_stateless/01443_merge_truncate_long.sh
diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
index 24667f26363..bf1d5b31682 100755
--- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
+++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
@@ -1,6 +1,8 @@
 #!/usr/bin/env bash
 set -ue
 
+unset CLICKHOUSE_LOG_COMMENT
+
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
diff --git a/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql b/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql
index de470fe6a57..5b59bc065dd 100644
--- a/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql
+++ b/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql
@@ -1,7 +1,9 @@
 SET max_insert_threads = 1, max_threads = 100, min_insert_block_size_rows = 1048576, max_block_size = 65536;
-CREATE TEMPORARY TABLE t (x UInt64);
+DROP TABLE IF EXISTS t;
+CREATE TABLE t (x UInt64) ENGINE = StripeLog;
 -- For trivial INSERT SELECT, max_threads is lowered to max_insert_threads and max_block_size is changed to min_insert_block_size_rows.
 INSERT INTO t SELECT * FROM numbers_mt(1000000);
 SET max_threads = 1;
 -- If data was inserted by more threads, we will probably see data out of order.
 SELECT DISTINCT blockSize(), runningDifference(x) FROM t;
+DROP TABLE t;
diff --git a/tests/queries/0_stateless/01457_create_as_table_function_structure.sql b/tests/queries/0_stateless/01457_create_as_table_function_structure.sql
index 1c9c1e1ef44..9399f06220b 100644
--- a/tests/queries/0_stateless/01457_create_as_table_function_structure.sql
+++ b/tests/queries/0_stateless/01457_create_as_table_function_structure.sql
@@ -18,6 +18,9 @@ DROP TABLE tmp;
 DETACH DATABASE test_01457;
 ATTACH DATABASE test_01457;
 
+-- To suppress "Structure does not match (...), implicit conversion will be done." message
+SET send_logs_level='error';
+
 CREATE TABLE tmp (n Int8) ENGINE=Memory;
 INSERT INTO test_01457.tf_remote_explicit_structure VALUES ('42');
 SELECT * FROM tmp;
diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql
index ce85dd72abf..16832c4fc59 100644
--- a/tests/queries/0_stateless/01475_read_subcolumns.sql
+++ b/tests/queries/0_stateless/01475_read_subcolumns.sql
@@ -61,3 +61,8 @@ SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'FileOpen')]
 FROM system.query_log
 WHERE (type = 'QueryFinish') AND (lower(query) LIKE lower('SELECT m.% FROM %t_map%'))
     AND event_time > now() - INTERVAL 10 SECOND AND current_database = currentDatabase();
+
+DROP TABLE t_arr;
+DROP TABLE t_nul;
+DROP TABLE t_tup;
+DROP TABLE t_map;
diff --git a/tests/queries/0_stateless/01475_read_subcolumns_2.sql b/tests/queries/0_stateless/01475_read_subcolumns_2.sql
index b8959cf27f7..e827d6c360a 100644
--- a/tests/queries/0_stateless/01475_read_subcolumns_2.sql
+++ b/tests/queries/0_stateless/01475_read_subcolumns_2.sql
@@ -12,7 +12,7 @@ CREATE TABLE subcolumns
     arr2 Array(Array(Nullable(String))),
     lc LowCardinality(String),
     nested Nested(col1 String, col2 Nullable(UInt32))
-) 
+)
 ENGINE = MergeTree order by tuple() SETTINGS min_bytes_for_wide_part = '10M';
 
 INSERT INTO subcolumns VALUES (([1, NULL], 2, 'a'), ['foo', NULL, 'bar'], [['123'], ['456', '789']], 'qqqq', ['zzz', 'xxx'], [42, 43]);
@@ -37,7 +37,7 @@ CREATE TABLE subcolumns
     arr2 Array(Array(Nullable(String))),
     lc LowCardinality(String),
     nested Nested(col1 String, col2 Nullable(UInt32))
-) 
+)
 ENGINE = MergeTree order by tuple() SETTINGS min_bytes_for_wide_part = 0;
 
 INSERT INTO subcolumns VALUES (([1, NULL], 2, 'a'), ['foo', NULL, 'bar'], [['123'], ['456', '789']], 'qqqq', ['zzz', 'xxx'], [42, 43]);
@@ -47,3 +47,5 @@ SELECT t.a.size0, t.a.null, t.u, t.s, t.s.null FROM subcolumns;
 SELECT sumArray(arr.null), sum(arr.size0) FROM subcolumns;
 SELECT arr2, arr2.size0, arr2.size1, arr2.null FROM subcolumns;
 -- SELECT nested.col1, nested.col2, nested.size0, nested.size0, nested.col2.null FROM subcolumns;
+
+DROP TABLE subcolumns;
diff --git a/tests/queries/0_stateless/01475_read_subcolumns_3.sql b/tests/queries/0_stateless/01475_read_subcolumns_3.sql
index 66bcd7dbc91..54598f19bdc 100644
--- a/tests/queries/0_stateless/01475_read_subcolumns_3.sql
+++ b/tests/queries/0_stateless/01475_read_subcolumns_3.sql
@@ -37,3 +37,5 @@ SELECT count() FROM map_subcolumns PREWHERE has(m.keys, 'b');
 
 SELECT id, m.size0 FROM map_subcolumns;
 SELECT count() FROM map_subcolumns WHERE m.size0 > 2;
+
+DROP TABLE map_subcolumns;
diff --git a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh
index 684d65ceb25..be22b1b4185 100755
--- a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh
+++ b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh
@@ -23,3 +23,5 @@ for engine in "${ENGINES[@]}"; do
     $CLICKHOUSE_CLIENT --query "SELECT * FROM subcolumns"
     $CLICKHOUSE_CLIENT --query "SELECT n, n.null, a1, a1.size0, a2, a2.size0, a2.size1, a2.size2, a3, a3.size0, a3.null, t, t.s, t.v, m, m.keys, m.values FROM subcolumns"
 done
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE subcolumns"
diff --git a/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference b/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference
new file mode 100644
index 00000000000..f8f36434a82
--- /dev/null
+++ b/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference
@@ -0,0 +1,4 @@
+2020-01-01 00:00:00	3
+2020-01-01 00:00:00	2020-01-01 00:00:00	111
+1
+0
diff --git a/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql b/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql
new file mode 100644
index 00000000000..7a0fb86330b
--- /dev/null
+++ b/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql
@@ -0,0 +1,78 @@
+DROP TABLE IF EXISTS derived_metrics_local;
+
+CREATE TABLE derived_metrics_local
+(
+  timestamp DateTime,
+  bytes UInt64
+)
+ENGINE=SummingMergeTree()
+PARTITION BY toYYYYMMDD(timestamp)
+ORDER BY (toStartOfHour(timestamp), timestamp)
+TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR GROUP BY toStartOfHour(timestamp)
+SET bytes=max(bytes);
+
+INSERT INTO derived_metrics_local values('2020-01-01 00:00:00', 1);
+INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 3);
+INSERT INTO derived_metrics_local values('2020-01-01 00:02:00', 2);
+
+OPTIMIZE TABLE derived_metrics_local FINAL;
+SELECT * FROM derived_metrics_local;
+
+DROP TABLE derived_metrics_local;
+
+CREATE TABLE derived_metrics_local 
+(
+  timestamp DateTime,
+  timestamp_h DateTime materialized toStartOfHour(timestamp),
+  bytes UInt64
+)
+ENGINE=SummingMergeTree()
+PARTITION BY toYYYYMMDD(timestamp)
+ORDER BY (timestamp_h, timestamp)
+TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR GROUP BY timestamp_h
+SET bytes=max(bytes), timestamp = toStartOfHour(any(timestamp));
+
+INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111);
+INSERT INTO derived_metrics_local values('2020-01-01 00:19:22', 22);
+INSERT INTO derived_metrics_local values('2020-01-01 00:59:02', 1);
+
+OPTIMIZE TABLE derived_metrics_local FINAL;
+SELECT timestamp, timestamp_h, bytes FROM derived_metrics_local;
+
+DROP TABLE IF EXISTS derived_metrics_local;
+
+CREATE TABLE derived_metrics_local
+(
+  timestamp DateTime,
+  bytes UInt64 TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR
+)
+ENGINE=MergeTree()
+ORDER BY (toStartOfHour(timestamp), timestamp)
+SETTINGS min_bytes_for_wide_part = 0;
+
+INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111) ('2020-01-01 00:19:22', 22) ('2100-01-01 00:19:22', 1);
+
+OPTIMIZE TABLE derived_metrics_local FINAL;
+SELECT sum(bytes) FROM derived_metrics_local;
+
+DROP TABLE IF EXISTS derived_metrics_local;
+
+CREATE TABLE derived_metrics_local
+(
+  timestamp DateTime,
+  bytes UInt64
+)
+ENGINE=MergeTree()
+PARTITION BY toYYYYMMDD(timestamp)
+ORDER BY (toStartOfHour(timestamp), timestamp)
+TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR
+SETTINGS min_bytes_for_wide_part = 0;
+
+INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111);
+INSERT INTO derived_metrics_local values('2020-01-01 00:19:22', 22);
+INSERT INTO derived_metrics_local values('2020-01-01 00:59:02', 1);
+
+OPTIMIZE TABLE derived_metrics_local FINAL;
+SELECT count() FROM derived_metrics_local;
+
+DROP TABLE IF EXISTS derived_metrics_local;
diff --git a/tests/queries/0_stateless/01508_partition_pruning.reference b/tests/queries/0_stateless/01508_partition_pruning.reference
deleted file mode 100644
index 0cc40d23b41..00000000000
--- a/tests/queries/0_stateless/01508_partition_pruning.reference
+++ /dev/null
@@ -1,244 +0,0 @@
---------- tMM ----------------------------
-select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-15');
-0	0
-Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges
-
-select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01');
-2	2880
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15');
-1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15';
-0	0
-Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009;
-2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816;
-2	2880
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015;
-1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15';
-1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00';
-3	15000
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
-
-select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00');
-6	30000
-Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges
-
-select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00');
-0	0
-Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges
-
-select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00';
-2	6440
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00';
-2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00';
-2	2880
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00';
-1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00';
-2	6440
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010;
-4	20000
-Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009;
-2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00';
-1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00';
-3	11440
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00';
-1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00';
-3	11440
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00';
-1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010;
-2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010;
-3	9999
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
-
-select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15';
-2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01';
-4	20000
-Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges
-
-select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01';
-2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010;
-2	9999
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010;
-1	10000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010;
-2	20000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
---------- tDD ----------------------------
-select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24');
-1	10000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24');
-1	10000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24';
-1	10000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26';
-3	40000
-Selected 3 parts by partition key, 3 parts by primary key, 4 marks by primary key, 4 marks to read from 3 ranges
-
-select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26';
-3	40000
-Selected 3 parts by partition key, 3 parts by primary key, 4 marks by primary key, 4 marks to read from 3 ranges
-
---------- sDD ----------------------------
-select uniqExact(_part), count() from sDD;
-6	30000
-Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges
-
-select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010;
-3	9999
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
-
-select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010;
-2	9999
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110;
-0	0
-Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges
-
-select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC');
-3	11440
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
-
-select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC');
-2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from sDD where d >= 1598918400000;
-4	20000
-Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges
-
-select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010;
-3	10001
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
-
---------- xMM ----------------------------
-select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00';
-2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00';
-3	10001
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
-
-select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00';
-2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1;
-1	1
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3;
-2	5001
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3;
-1	5000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1;
-2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from xMM where a = 1;
-3	15000
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
-
-select uniqExact(_part), count() from xMM where a = 66;
-0	0
-Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges
-
-select uniqExact(_part), count() from xMM where a <> 66;
-6	30000
-Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges
-
-select uniqExact(_part), count() from xMM where a = 2;
-2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from xMM where a = 1;
-2	15000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00';
-1	10000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
-select uniqExact(_part), count() from xMM where a <> 66;
-5	30000
-Selected 5 parts by partition key, 5 parts by primary key, 5 marks by primary key, 5 marks to read from 5 ranges
-
-select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3;
-2	5001
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
-
-select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3;
-1	5000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
-
diff --git a/tests/queries/0_stateless/01508_partition_pruning.queries b/tests/queries/0_stateless/01508_partition_pruning_long.queries
similarity index 100%
rename from tests/queries/0_stateless/01508_partition_pruning.queries
rename to tests/queries/0_stateless/01508_partition_pruning_long.queries
diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.reference b/tests/queries/0_stateless/01508_partition_pruning_long.reference
new file mode 100644
index 00000000000..70f529c6058
--- /dev/null
+++ b/tests/queries/0_stateless/01508_partition_pruning_long.reference
@@ -0,0 +1,244 @@
+--------- tMM ----------------------------
+select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-15');
+0	0
+Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges
+
+select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01');
+2	2880
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15');
+1	1440
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15';
+0	0
+Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009;
+2	10000
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816;
+2	2880
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015;
+1	1440
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15';
+1	1440
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00';
+3	15000
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
+
+select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00');
+6	30000
+Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges
+
+select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00');
+0	0
+Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges
+
+select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00';
+2	6440
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00';
+2	10000
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00';
+2	2880
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00';
+1	1440
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00';
+2	6440
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010;
+4	20000
+Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009;
+2	10000
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00';
+1	1440
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00';
+3	11440
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00';
+1	1440
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00';
+3	11440
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00';
+1	1440
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010;
+2	10000
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010;
+3	9999
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
+
+select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15';
+2	10000
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01';
+4	20000
+Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges
+
+select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01';
+2	10000
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010;
+2	9999
+Selected 2/3 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010;
+1	10000
+Selected 1/3 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010;
+2	20000
+Selected 2/3 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+--------- tDD ----------------------------
+select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24');
+1	10000
+Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24');
+1	10000
+Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24';
+1	10000
+Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26';
+3	40000
+Selected 3/4 parts by partition key, 3 parts by primary key, 4/7 marks by primary key, 4 marks to read from 3 ranges
+
+select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26';
+3	40000
+Selected 3/4 parts by partition key, 3 parts by primary key, 4/7 marks by primary key, 4 marks to read from 3 ranges
+
+--------- sDD ----------------------------
+select uniqExact(_part), count() from sDD;
+6	30000
+Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges
+
+select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010;
+3	9999
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
+
+select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010;
+2	9999
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110;
+0	0
+Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges
+
+select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC');
+3	11440
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
+
+select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC');
+2	10000
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from sDD where d >= 1598918400000;
+4	20000
+Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges
+
+select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010;
+3	10001
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
+
+--------- xMM ----------------------------
+select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00';
+2	10000
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00';
+3	10001
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
+
+select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00';
+2	10000
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1;
+1	1
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3;
+2	5001
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3;
+1	5000
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1;
+2	10000
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from xMM where a = 1;
+3	15000
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
+
+select uniqExact(_part), count() from xMM where a = 66;
+0	0
+Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges
+
+select uniqExact(_part), count() from xMM where a <> 66;
+6	30000
+Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges
+
+select uniqExact(_part), count() from xMM where a = 2;
+2	10000
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from xMM where a = 1;
+2	15000
+Selected 2/5 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00';
+1	10000
+Selected 1/5 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
+select uniqExact(_part), count() from xMM where a <> 66;
+5	30000
+Selected 5/5 parts by partition key, 5 parts by primary key, 5/10 marks by primary key, 5 marks to read from 5 ranges
+
+select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3;
+2	5001
+Selected 2/5 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
+
+select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3;
+1	5000
+Selected 1/5 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
+
diff --git a/tests/queries/0_stateless/01508_partition_pruning.sh b/tests/queries/0_stateless/01508_partition_pruning_long.sh
similarity index 88%
rename from tests/queries/0_stateless/01508_partition_pruning.sh
rename to tests/queries/0_stateless/01508_partition_pruning_long.sh
index b5ec6388d5c..1b3c524ac77 100755
--- a/tests/queries/0_stateless/01508_partition_pruning.sh
+++ b/tests/queries/0_stateless/01508_partition_pruning_long.sh
@@ -4,8 +4,8 @@
 # Description of test result:
 #   Test the correctness of the partition
 #   pruning
-#   
-#   Script executes queries from a file 01508_partition_pruning.queries  (1 line = 1 query) 
+#
+#   Script executes queries from a file 01508_partition_pruning_long.queries  (1 line = 1 query)
 #   Queries are started with 'select' (but NOT with 'SELECT') are executed with log_level=debug
 #-------------------------------------------------------------------------------------------
 
@@ -18,7 +18,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 #export CURDIR=.
 
 
-queries="${CURDIR}/01508_partition_pruning.queries"
+queries="${CURDIR}/01508_partition_pruning_long.queries"
 while IFS= read -r sql
 do
   [ -z "$sql" ] && continue
@@ -30,9 +30,7 @@ do
     ${CLICKHOUSE_CLIENT} --query "$sql" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$"
     CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/--send_logs_level=debug/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/g')
     echo ""
-  else  
+  else
     ${CLICKHOUSE_CLIENT} --query "$sql"
-  fi  
+  fi
 done < "$queries"
-
-
diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.reference b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql
similarity index 92%
rename from tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql
rename to tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql
index 6aa38a914f7..87c66609421 100644
--- a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql
+++ b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql
@@ -1,7 +1,7 @@
 drop table if exists data_01513;
 create table data_01513 (key String) engine=MergeTree() order by key;
 -- 10e3 groups, 1e3 keys each
-insert into data_01513 select number%10e3 from numbers(toUInt64(2e6));
+insert into data_01513 select number%10e3 from numbers(2e6);
 -- reduce number of parts to 1
 optimize table data_01513 final;
 
diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
index 4c85a1d418a..a3f2106cd5f 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
@@ -1,6 +1,9 @@
-2000-01-01 00:00:00	0
-2020-01-01 00:00:00	0
-2000-01-01 00:00:00	1
-2020-01-01 00:00:00	1
-2000-01-01 00:00:00	2
-2020-01-01 00:00:00	2
+2000-01-01 00:00:00	0	
+2020-01-01 00:00:00	0	
+2000-01-01 00:00:00	1	
+2020-01-01 00:00:00	1	
+2000-01-01 00:00:00	2	
+2020-01-01 00:00:00	2	
+1
+499999
+5
diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
index d332946605d..25c47c008bd 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
@@ -1,15 +1,40 @@
 DROP TABLE IF EXISTS select_final;
 
-CREATE TABLE select_final (t DateTime, x Int32) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY x; 
+SET do_not_merge_across_partitions_select_final = 1;
 
-INSERT INTO select_final SELECT toDate('2000-01-01'), number FROM numbers(2);
-INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1 FROM numbers(2);
+CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); 
 
-INSERT INTO select_final SELECT toDate('2020-01-01'), number FROM numbers(2);
-INSERT INTO select_final SELECT toDate('2020-01-01'), number + 1 FROM numbers(2);
+INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(2);
+INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1, '' FROM numbers(2);
+
+INSERT INTO select_final SELECT toDate('2020-01-01'), number, '' FROM numbers(2);
+INSERT INTO select_final SELECT toDate('2020-01-01'), number + 1, '' FROM numbers(2);
 
 
-SELECT * FROM select_final FINAL ORDER BY x SETTINGS do_not_merge_across_partitions_select_final = 1;
+SELECT * FROM select_final FINAL ORDER BY x;
+
+TRUNCATE TABLE select_final;
+
+INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(2);
+INSERT INTO select_final SELECT toDate('2000-01-01'), number, 'updated' FROM numbers(2);
+
+OPTIMIZE TABLE select_final FINAL;
+
+INSERT INTO select_final SELECT toDate('2020-01-01'), number, '' FROM numbers(2);
+INSERT INTO select_final SELECT toDate('2020-01-01'), number, 'updated' FROM numbers(2);
+
+SELECT max(x) FROM select_final FINAL where string = 'updated';
+
+TRUNCATE TABLE select_final;
+
+INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(500000);
+
+OPTIMIZE TABLE select_final FINAL;
+
+SELECT max(x) FROM select_final FINAL;
+
+SYSTEM FLUSH LOGS;
+
+SELECT length(thread_ids) FROM system.query_log WHERE query='SELECT max(x) FROM select_final FINAL;' AND type='QueryFinish' AND current_database = currentDatabase() ORDER BY event_time DESC LIMIT 1;
 
 DROP TABLE select_final;
-
diff --git a/tests/queries/0_stateless/01526_initial_query_id.sh b/tests/queries/0_stateless/01526_initial_query_id.sh
index e77764ee34e..f9d739b57cd 100755
--- a/tests/queries/0_stateless/01526_initial_query_id.sh
+++ b/tests/queries/0_stateless/01526_initial_query_id.sh
@@ -1,6 +1,8 @@
 #!/usr/bin/env bash
 set -ue
 
+unset CLICKHOUSE_LOG_COMMENT
+
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql
index 6374d6fca21..38c80617334 100644
--- a/tests/queries/0_stateless/01533_multiple_nested.sql
+++ b/tests/queries/0_stateless/01533_multiple_nested.sql
@@ -8,7 +8,7 @@ CREATE TABLE nested
     col2 Nested(a UInt32, n Nested(s String, b UInt32)),
     col3 Nested(n1 Nested(a UInt32, b UInt32), n2 Nested(s String, t String))
 )
-ENGINE = MergeTree 
+ENGINE = MergeTree
 ORDER BY tuple()
 SETTINGS min_bytes_for_wide_part = 0;
 
@@ -55,7 +55,7 @@ CREATE TABLE nested
     id UInt32,
     col1 Nested(a UInt32, n Nested(s String, b UInt32))
 )
-ENGINE = MergeTree 
+ENGINE = MergeTree
 ORDER BY id
 SETTINGS min_bytes_for_wide_part = 0;
 
@@ -64,3 +64,5 @@ SELECT id % 10, sum(length(col1)), sumArray(arrayMap(x -> length(x), col1.n.b))
 
 SELECT arraySum(col1.a), arrayMap(x -> x * x * 2, col1.a) FROM nested ORDER BY id LIMIT 5;
 SELECT untuple(arrayJoin(arrayJoin(col1.n))) FROM nested ORDER BY id LIMIT 10 OFFSET 10;
+
+DROP TABLE nested;
diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.reference b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.reference
similarity index 100%
rename from tests/queries/0_stateless/01541_max_memory_usage_for_user.reference
rename to tests/queries/0_stateless/01541_max_memory_usage_for_user_long.reference
diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh
similarity index 94%
rename from tests/queries/0_stateless/01541_max_memory_usage_for_user.sh
rename to tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh
index c81bd1a6ce4..32877bfd0fe 100755
--- a/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh
+++ b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh
@@ -66,4 +66,7 @@ echo 'OK'
 
 ${CLICKHOUSE_CLIENT} --query "DROP USER test_01541";
 
+# Reset max_memory_usage_for_user, so it will not affect other tests
+${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=0 -q "SELECT 1 FORMAT Null"
+
 exit 0
diff --git a/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference b/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference
index 6f71b6263c0..443b90b80a5 100644
--- a/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference
+++ b/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference
@@ -3,6 +3,7 @@ Invocation with constant
 1858-11-17
 2020-11-01
 \N
+\N
 or null
 2020-11-01
 \N
diff --git a/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql b/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql
index 4e50351d191..5e682a942d5 100644
--- a/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql
+++ b/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql
@@ -5,6 +5,7 @@ SELECT fromModifiedJulianDay(-1);
 SELECT fromModifiedJulianDay(0);
 SELECT fromModifiedJulianDay(59154);
 SELECT fromModifiedJulianDay(NULL);
+SELECT fromModifiedJulianDay(CAST(NULL, 'Nullable(Int64)'));
 SELECT fromModifiedJulianDay(-678942); -- { serverError 490 }
 SELECT fromModifiedJulianDay(2973484); -- { serverError 490 }
 
diff --git a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh
index d7ee2840763..8c4900043d0 100755
--- a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh
+++ b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh
@@ -4,6 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > 1g.csv
+yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > ${CLICKHOUSE_TMP}/1g.csv
 
-$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('1g.csv', 'TSV', 'URL String')"
\ No newline at end of file
+$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('${CLICKHOUSE_TMP}/1g.csv', 'TSV', 'URL String')"
diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference
index feca2cae5ea..a1a1814a581 100644
--- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference
+++ b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference
@@ -23,7 +23,7 @@ Expression (Projection)
     FinishSorting
       Expression (Before ORDER BY)
         SettingQuotaAndLimits (Set limits and quota after reading from storage)
-          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree with order)
 SELECT
     timestamp,
     key
@@ -37,7 +37,7 @@ Expression (Projection)
     FinishSorting
       Expression (Before ORDER BY)
         SettingQuotaAndLimits (Set limits and quota after reading from storage)
-          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree with order)
 SELECT
     timestamp,
     key
diff --git a/tests/queries/0_stateless/01564_test_hint_woes.reference b/tests/queries/0_stateless/01564_test_hint_woes.reference
index 892ca733d7c..9ce4572eab4 100644
--- a/tests/queries/0_stateless/01564_test_hint_woes.reference
+++ b/tests/queries/0_stateless/01564_test_hint_woes.reference
@@ -29,3 +29,5 @@ INSERT INTO t0(c0, c1) VALUES ("1",1) ; -- { clientError 47 }
 INSERT INTO t0(c0, c1) VALUES ('1', 1) ;
 -- the return code must be zero after the final query has failed with expected error
 insert into values_01564 values (11); -- { serverError 469 }
+drop table t0;
+drop table values_01564;
diff --git a/tests/queries/0_stateless/01564_test_hint_woes.sql b/tests/queries/0_stateless/01564_test_hint_woes.sql
index ec2c319e8d1..fee85130b03 100644
--- a/tests/queries/0_stateless/01564_test_hint_woes.sql
+++ b/tests/queries/0_stateless/01564_test_hint_woes.sql
@@ -49,3 +49,6 @@ INSERT INTO t0(c0, c1) VALUES ('1', 1) ;
 
 -- the return code must be zero after the final query has failed with expected error
 insert into values_01564 values (11); -- { serverError 469 }
+
+drop table t0;
+drop table values_01564;
diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference
index 679695dd6db..334ebc7eb1f 100644
--- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference
+++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference
@@ -35,18 +35,18 @@ Expression (Projection)
       Expression ((Before ORDER BY + Add table aliases))
         SettingQuotaAndLimits (Set limits and quota after reading from storage)
           Union
-            ReadFromStorage (MergeTree  with order)
-            ReadFromStorage (MergeTree  with order)
-            ReadFromStorage (MergeTree  with order)
+            ReadFromStorage (MergeTree with order)
+            ReadFromStorage (MergeTree with order)
+            ReadFromStorage (MergeTree with order)
 Expression (Projection)
   Limit (preliminary LIMIT)
     FinishSorting
       Expression (Before ORDER BY)
         SettingQuotaAndLimits (Set limits and quota after reading from storage)
           Union
-            ReadFromStorage (MergeTree  with order)
-            ReadFromStorage (MergeTree  with order)
-            ReadFromStorage (MergeTree  with order)
+            ReadFromStorage (MergeTree with order)
+            ReadFromStorage (MergeTree with order)
+            ReadFromStorage (MergeTree with order)
 optimize_aggregation_in_order
 Expression ((Projection + Before ORDER BY))
   Aggregating
@@ -58,17 +58,17 @@ Expression ((Projection + Before ORDER BY))
     Expression ((Before GROUP BY + Add table aliases))
       SettingQuotaAndLimits (Set limits and quota after reading from storage)
         Union
-          ReadFromStorage (MergeTree  with order)
-          ReadFromStorage (MergeTree  with order)
-          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree with order)
+          ReadFromStorage (MergeTree with order)
+          ReadFromStorage (MergeTree with order)
 Expression ((Projection + Before ORDER BY))
   Aggregating
     Expression (Before GROUP BY)
       SettingQuotaAndLimits (Set limits and quota after reading from storage)
         Union
-          ReadFromStorage (MergeTree  with order)
-          ReadFromStorage (MergeTree  with order)
-          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree with order)
+          ReadFromStorage (MergeTree with order)
+          ReadFromStorage (MergeTree with order)
 second-index
 1
 1
diff --git a/tests/queries/0_stateless/01576_if_null_external_aggregation.sql b/tests/queries/0_stateless/01576_if_null_external_aggregation.sql
index b9c36a9cecc..cffeb73b1ca 100644
--- a/tests/queries/0_stateless/01576_if_null_external_aggregation.sql
+++ b/tests/queries/0_stateless/01576_if_null_external_aggregation.sql
@@ -3,5 +3,5 @@ SET max_bytes_before_external_group_by = 200000000;
 SET max_memory_usage = 1500000000;
 SET max_threads = 12;
 
-SELECT bitAnd(number, pow(2, 20) - 1) as k, argMaxIf(k, number % 2 = 0 ? number : Null, number > 42),  uniq(number) AS u FROM numbers(1000000) GROUP BY k format Null;
+SELECT bitAnd(number, toUInt64(pow(2, 20) - 1)) as k, argMaxIf(k, number % 2 = 0 ? number : Null, number > 42),  uniq(number) AS u FROM numbers(1000000) GROUP BY k format Null;
 
diff --git a/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql b/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql
index 4b613b6d7ce..28507e25fd4 100644
--- a/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql
+++ b/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql
@@ -9,3 +9,5 @@ INSERT INTO low_card VALUES ( '1' );
 SELECT * FROM low_card;
 SELECT * FROM low_card WHERE lc = '1';
 SELECT CAST(lc AS String) FROM low_card;
+
+DROP TABLE low_card;
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 45cb4ac3994..d2543f0db75 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -2,7 +2,7 @@
 
 set allow_experimental_window_functions = 1;
 -- just something basic
-select number, count() over (partition by intDiv(number, 3) order by number) from numbers(10);
+select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10);
 0	1
 1	2
 2	3
@@ -14,7 +14,7 @@ select number, count() over (partition by intDiv(number, 3) order by number) fro
 8	3
 9	1
 -- proper calculation across blocks
-select number, max(number) over (partition by intDiv(number, 3) order by number desc) from numbers(10) settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2;
 2	2
 1	2
 0	2
@@ -26,9 +26,9 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 6	8
 9	9
 -- not a window function
-select number, abs(number) over (partition by toString(intDiv(number, 3))) from numbers(10); -- { serverError 63 }
+select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 }
 -- no partition by
-select number, avg(number) over (order by number) from numbers(10);
+select number, avg(number) over (order by number rows unbounded preceding) from numbers(10);
 0	0
 1	0.5
 2	1
@@ -40,7 +40,7 @@ select number, avg(number) over (order by number) from numbers(10);
 8	4
 9	4.5
 -- no order by
-select number, quantileExact(number) over (partition by intDiv(number, 3)) from numbers(10);
+select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10);
 0	0
 1	1
 2	1
@@ -52,7 +52,7 @@ select number, quantileExact(number) over (partition by intDiv(number, 3)) from
 8	7
 9	9
 -- can add an alias after window spec
-select number, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10);
+select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10);
 0	0
 1	1
 2	1
@@ -65,14 +65,14 @@ select number, quantileExact(number) over (partition by intDiv(number, 3)) q fro
 9	9
 -- can't reference it yet -- the window functions are calculated at the
 -- last stage of select, after all other functions.
-select q * 10, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); -- { serverError 47 }
+select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 }
 -- must work in WHERE if you wrap it in a subquery
-select * from (select count(*) over () c from numbers(3)) where c > 0;
+select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0;
 1
 2
 3
 -- should work in ORDER BY
-select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number;
 9	9
 6	8
 7	8
@@ -84,14 +84,14 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 1	2
 2	2
 -- also works in ORDER BY if you wrap it in a subquery
-select * from (select count(*) over () c from numbers(3)) order by c;
+select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c;
 1
 2
 3
 -- Example with window function only in ORDER BY. Here we make a rank of all
 -- numbers sorted descending, and then sort by this rank descending, and must get
 -- the ascending order.
-select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc) desc;
+select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc;
 0
 1
 2
@@ -100,23 +100,23 @@ select * from (select * from numbers(5) order by rand()) order by count() over (
 -- Aggregate functions as window function arguments. This query is semantically
 -- the same as the above one, only we replace `number` with
 -- `any(number) group by number` and so on.
-select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc) desc;
+select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc;
 0
 1
 2
 3
 4
 -- some more simple cases w/aggregate functions
-select sum(any(number)) over () from numbers(1);
+select sum(any(number)) over (rows unbounded preceding) from numbers(1);
 0
-select sum(any(number) + 1) over () from numbers(1);
+select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1);
 1
-select sum(any(number + 1)) over () from numbers(1);
+select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1);
 1
 -- different windows
 -- an explain test would also be helpful, but it's too immature now and I don't
 -- want to change reference all the time
-select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 5) order by number) as m from numbers(31) order by number settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2;
 0	2	1
 1	2	2
 2	2	3
@@ -151,7 +151,7 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 -- two functions over the same window
 -- an explain test would also be helpful, but it's too immature now and I don't
 -- want to change reference all the time
-select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 3) order by number desc) as m from numbers(7) order by number settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2;
 0	2	3
 1	2	2
 2	2	1
@@ -163,22 +163,26 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 select median(x) over (partition by x) from (select 1 x);
 1
 -- an empty window definition is valid as well
-select groupArray(number) over () from numbers(3);
+select groupArray(number) over (rows unbounded preceding) from numbers(3);
 [0]
 [0,1]
 [0,1,2]
+select groupArray(number) over () from numbers(3);
+[0,1,2]
+[0,1,2]
+[0,1,2]
 -- This one tests we properly process the window  function arguments.
 -- Seen errors like 'column `1` not found' from count(1).
-select count(1) over (), max(number + 1) over () from numbers(3);
+select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3);
 1	3
 -- Should work in DISTINCT
-select distinct sum(0) over () from numbers(2);
+select distinct sum(0) over (rows unbounded preceding) from numbers(2);
 0
-select distinct any(number) over () from numbers(2);
+select distinct any(number) over (rows unbounded preceding) from numbers(2);
 0
 -- Various kinds of aliases are properly substituted into various parts of window
 -- function definition.
-with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x) from numbers(7);
+with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7);
 0	1
 0	3
 0	6
@@ -192,8 +196,8 @@ select 1 window w1 as ();
 select sum(number) over w1, sum(number) over w2
 from numbers(10)
 window
-    w1 as (),
-    w2 as (partition by intDiv(number, 3))
+    w1 as (rows unbounded preceding),
+    w2 as (partition by intDiv(number, 3) rows unbounded preceding)
 ;
 0	0
 1	1
@@ -205,12 +209,14 @@ window
 28	13
 36	21
 45	9
+-- FIXME both functions should use the same window, but they don't. Add an
+-- EXPLAIN test for this.
 select
     sum(number) over w1,
-    sum(number) over (partition by intDiv(number, 3))
+    sum(number) over (partition by intDiv(number, 3) rows unbounded preceding)
 from numbers(10)
 window
-    w1 as (partition by intDiv(number, 3))
+    w1 as (partition by intDiv(number, 3) rows unbounded preceding)
 ;
 0	0
 1	1
@@ -222,3 +228,768 @@ window
 13	13
 21	21
 9	9
+-- RANGE frame
+-- It's the default
+select sum(number) over () from numbers(3);
+3
+3
+3
+-- Try some mutually prime sizes of partition, group and block, for the number
+-- of rows that is their least common multiple + 1, so that we see all the
+-- interesting corner cases.
+select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 5
+;
+0	0	0	2
+1	0	1	3
+2	0	0	2
+3	1	1	3
+4	1	0	1
+5	1	1	3
+6	2	0	2
+7	2	1	3
+8	2	0	2
+9	3	1	3
+10	3	0	1
+11	3	1	3
+12	4	0	2
+13	4	1	3
+14	4	0	2
+15	5	1	3
+16	5	0	1
+17	5	1	3
+18	6	0	2
+19	6	1	3
+20	6	0	2
+21	7	1	3
+22	7	0	1
+23	7	1	3
+24	8	0	2
+25	8	1	3
+26	8	0	2
+27	9	1	3
+28	9	0	1
+29	9	1	3
+30	10	0	1
+select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 2
+;
+0	0	0	2
+1	0	1	4
+2	0	2	5
+3	0	0	2
+4	0	1	4
+5	1	2	5
+6	1	0	2
+7	1	1	3
+8	1	2	5
+9	1	0	2
+10	2	1	3
+11	2	2	5
+12	2	0	1
+13	2	1	3
+14	2	2	5
+15	3	0	2
+16	3	1	4
+17	3	2	5
+18	3	0	2
+19	3	1	4
+20	4	2	5
+21	4	0	2
+22	4	1	3
+23	4	2	5
+24	4	0	2
+25	5	1	3
+26	5	2	5
+27	5	0	1
+28	5	1	3
+29	5	2	5
+30	6	0	1
+select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 3
+;
+0	0	0	3
+1	0	1	5
+2	0	0	3
+3	0	1	5
+4	0	0	3
+5	1	1	5
+6	1	0	2
+7	1	1	5
+8	1	0	2
+9	1	1	5
+10	2	0	3
+11	2	1	5
+12	2	0	3
+13	2	1	5
+14	2	0	3
+15	3	1	5
+16	3	0	2
+17	3	1	5
+18	3	0	2
+19	3	1	5
+20	4	0	3
+21	4	1	5
+22	4	0	3
+23	4	1	5
+24	4	0	3
+25	5	1	5
+26	5	0	2
+27	5	1	5
+28	5	0	2
+29	5	1	5
+30	6	0	1
+select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 2
+;
+0	0	0	1
+1	0	1	2
+2	0	2	3
+3	1	3	2
+4	1	4	3
+5	1	0	1
+6	2	1	1
+7	2	2	2
+8	2	3	3
+9	3	4	3
+10	3	0	1
+11	3	1	2
+12	4	2	1
+13	4	3	2
+14	4	4	3
+15	5	0	1
+16	5	1	2
+17	5	2	3
+18	6	3	2
+19	6	4	3
+20	6	0	1
+21	7	1	1
+22	7	2	2
+23	7	3	3
+24	8	4	3
+25	8	0	1
+26	8	1	2
+27	9	2	1
+28	9	3	2
+29	9	4	3
+30	10	0	1
+select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 3
+;
+0	0	0	1
+1	0	1	2
+2	1	2	1
+3	1	3	2
+4	2	4	2
+5	2	0	1
+6	3	1	1
+7	3	2	2
+8	4	3	1
+9	4	4	2
+10	5	0	1
+11	5	1	2
+12	6	2	1
+13	6	3	2
+14	7	4	2
+15	7	0	1
+16	8	1	1
+17	8	2	2
+18	9	3	1
+19	9	4	2
+20	10	0	1
+21	10	1	2
+22	11	2	1
+23	11	3	2
+24	12	4	2
+25	12	0	1
+26	13	1	1
+27	13	2	2
+28	14	3	1
+29	14	4	2
+30	15	0	1
+select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 5
+;
+0	0	0	1
+1	0	1	2
+2	1	2	2
+3	1	0	1
+4	2	1	1
+5	2	2	2
+6	3	0	1
+7	3	1	2
+8	4	2	2
+9	4	0	1
+10	5	1	1
+11	5	2	2
+12	6	0	1
+13	6	1	2
+14	7	2	2
+15	7	0	1
+16	8	1	1
+17	8	2	2
+18	9	0	1
+19	9	1	2
+20	10	2	2
+21	10	0	1
+22	11	1	1
+23	11	2	2
+24	12	0	1
+25	12	1	2
+26	13	2	2
+27	13	0	1
+28	14	1	1
+29	14	2	2
+30	15	0	1
+-- A case where the partition end is in the current block, and the frame end
+-- is triggered by the partition end.
+select min(number) over (partition by p)  from (select number, intDiv(number, 3) p from numbers(10));
+0
+0
+0
+3
+3
+3
+6
+6
+6
+9
+-- UNBOUNDED FOLLOWING frame end
+select
+    min(number) over wa, min(number) over wo,
+    max(number) over wa, max(number) over wo
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(31))
+window
+    wa as (partition by p order by o
+        range between unbounded preceding and unbounded following),
+    wo as (partition by p order by o
+        rows between unbounded preceding and unbounded following)
+settings max_block_size = 2;
+0	0	2	2
+0	0	2	2
+0	0	2	2
+3	3	5	5
+3	3	5	5
+3	3	5	5
+6	6	8	8
+6	6	8	8
+6	6	8	8
+9	9	11	11
+9	9	11	11
+9	9	11	11
+12	12	14	14
+12	12	14	14
+12	12	14	14
+15	15	17	17
+15	15	17	17
+15	15	17	17
+18	18	20	20
+18	18	20	20
+18	18	20	20
+21	21	23	23
+21	21	23	23
+21	21	23	23
+24	24	26	26
+24	24	26	26
+24	24	26	26
+27	27	29	29
+27	27	29	29
+27	27	29	29
+30	30	30	30
+-- ROWS offset frame start
+select number, p,
+    count(*) over (partition by p order by number
+        rows between 1 preceding and unbounded following),
+    count(*) over (partition by p order by number
+        rows between current row and unbounded following),
+    count(*) over (partition by p order by number
+        rows between 1 following and unbounded following)
+from (select number, intDiv(number, 5) p from numbers(31))
+order by p, number
+settings max_block_size = 2;
+0	0	5	5	4
+1	0	5	4	3
+2	0	4	3	2
+3	0	3	2	1
+4	0	2	1	0
+5	1	5	5	4
+6	1	5	4	3
+7	1	4	3	2
+8	1	3	2	1
+9	1	2	1	0
+10	2	5	5	4
+11	2	5	4	3
+12	2	4	3	2
+13	2	3	2	1
+14	2	2	1	0
+15	3	5	5	4
+16	3	5	4	3
+17	3	4	3	2
+18	3	3	2	1
+19	3	2	1	0
+20	4	5	5	4
+21	4	5	4	3
+22	4	4	3	2
+23	4	3	2	1
+24	4	2	1	0
+25	5	5	5	4
+26	5	5	4	3
+27	5	4	3	2
+28	5	3	2	1
+29	5	2	1	0
+30	6	1	1	0
+-- ROWS offset frame start and end
+select number, p,
+    count(*) over (partition by p order by number
+        rows between 2 preceding and 2 following)
+from (select number, intDiv(number, 7) p from numbers(71))
+order by p, number
+settings max_block_size = 2;
+0	0	3
+1	0	4
+2	0	5
+3	0	5
+4	0	5
+5	0	4
+6	0	3
+7	1	3
+8	1	4
+9	1	5
+10	1	5
+11	1	5
+12	1	4
+13	1	3
+14	2	3
+15	2	4
+16	2	5
+17	2	5
+18	2	5
+19	2	4
+20	2	3
+21	3	3
+22	3	4
+23	3	5
+24	3	5
+25	3	5
+26	3	4
+27	3	3
+28	4	3
+29	4	4
+30	4	5
+31	4	5
+32	4	5
+33	4	4
+34	4	3
+35	5	3
+36	5	4
+37	5	5
+38	5	5
+39	5	5
+40	5	4
+41	5	3
+42	6	3
+43	6	4
+44	6	5
+45	6	5
+46	6	5
+47	6	4
+48	6	3
+49	7	3
+50	7	4
+51	7	5
+52	7	5
+53	7	5
+54	7	4
+55	7	3
+56	8	3
+57	8	4
+58	8	5
+59	8	5
+60	8	5
+61	8	4
+62	8	3
+63	9	3
+64	9	4
+65	9	5
+66	9	5
+67	9	5
+68	9	4
+69	9	3
+70	10	1
+SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4);
+1
+2
+3
+3
+-- frame boundaries that runs into the partition end
+select
+    count() over (partition by intDiv(number, 3)
+        rows between 100 following and unbounded following),
+    count() over (partition by intDiv(number, 3)
+        rows between current row and 100 following)
+from numbers(10);
+0	3
+0	2
+0	1
+0	3
+0	2
+0	1
+0	3
+0	2
+0	1
+0	1
+-- seen a use-after-free under MSan in this query once
+SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null;
+-- a corner case
+select count() over ();
+1
+-- RANGE CURRENT ROW frame start
+select number, p, o,
+    count(*) over (partition by p order by o
+        range between current row and unbounded following)
+from (select number, intDiv(number, 5) p, mod(number, 3) o
+    from numbers(31))
+order by p, o, number
+settings max_block_size = 2;
+0	0	0	5
+3	0	0	5
+1	0	1	3
+4	0	1	3
+2	0	2	1
+6	1	0	5
+9	1	0	5
+7	1	1	3
+5	1	2	2
+8	1	2	2
+12	2	0	5
+10	2	1	4
+13	2	1	4
+11	2	2	2
+14	2	2	2
+15	3	0	5
+18	3	0	5
+16	3	1	3
+19	3	1	3
+17	3	2	1
+21	4	0	5
+24	4	0	5
+22	4	1	3
+20	4	2	2
+23	4	2	2
+27	5	0	5
+25	5	1	4
+28	5	1	4
+26	5	2	2
+29	5	2	2
+30	6	0	1
+select
+    count(*) over (rows between  current row and current row),
+    count(*) over (range between  current row and current row)
+from numbers(3);
+1	3
+1	3
+1	3
+-- RANGE OFFSET
+-- a basic RANGE OFFSET frame
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11))
+window w as (order by x asc range between 1 preceding and 2 following)
+order by x;
+0	0	2	3
+1	0	3	4
+2	1	4	4
+3	2	5	4
+4	3	6	4
+5	4	7	4
+6	5	8	4
+7	6	9	4
+8	7	10	4
+9	8	10	3
+10	9	10	2
+-- overflow conditions
+select x, min(x) over w, max(x) over w, count(x) over w
+from (
+    select toUInt8(if(mod(number, 2),
+        toInt64(255 - intDiv(number, 2)),
+        toInt64(intDiv(number, 2)))) x
+    from numbers(10)
+)
+window w as (order by x range between 1 preceding and 2 following)
+order by x;
+0	0	2	3
+1	0	3	4
+2	1	4	4
+3	2	4	3
+4	3	4	2
+251	251	253	3
+252	251	254	4
+253	252	255	4
+254	253	255	3
+255	254	255	2
+select x, min(x) over w, max(x) over w, count(x) over w
+from (
+    select toInt8(multiIf(
+        mod(number, 3) == 0, toInt64(intDiv(number, 3)),
+        mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)),
+        toInt64(-128 + intDiv(number, 3)))) x
+    from numbers(15)
+)
+window w as (order by x range between 1 preceding and 2 following)
+order by x;
+-128	-128	-126	3
+-127	-128	-125	4
+-126	-127	-124	4
+-125	-126	-124	3
+-124	-125	-124	2
+0	0	2	3
+1	0	3	4
+2	1	4	4
+3	2	4	3
+4	3	4	2
+123	123	125	3
+124	123	126	4
+125	124	127	4
+126	125	127	3
+127	126	127	2
+-- RANGE OFFSET ORDER BY DESC
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between 1 preceding and 2 following)
+order by x
+settings max_block_size = 1;
+0	0	1	2
+1	0	2	3
+2	0	3	4
+3	1	4	4
+4	2	5	4
+5	3	6	4
+6	4	7	4
+7	5	8	4
+8	6	9	4
+9	7	10	4
+10	8	10	3
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between 1 preceding and unbounded following)
+order by x
+settings max_block_size = 2;
+0	0	1	2
+1	0	2	3
+2	0	3	4
+3	0	4	5
+4	0	5	6
+5	0	6	7
+6	0	7	8
+7	0	8	9
+8	0	9	10
+9	0	10	11
+10	0	10	11
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between unbounded preceding and 2 following)
+order by x
+settings max_block_size = 3;
+0	0	10	11
+1	0	10	11
+2	0	10	11
+3	1	10	10
+4	2	10	9
+5	3	10	8
+6	4	10	7
+7	5	10	6
+8	6	10	5
+9	7	10	4
+10	8	10	3
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between unbounded preceding and 2 preceding)
+order by x
+settings max_block_size = 4;
+0	2	10	9
+1	3	10	8
+2	4	10	7
+3	5	10	6
+4	6	10	5
+5	7	10	4
+6	8	10	3
+7	9	10	2
+8	10	10	1
+9	0	0	0
+10	0	0	0
+-- Check that we put windows in such an order that we can reuse the sort.
+-- First, check that at least the result is correct when we have many windows
+-- with different sort order.
+select
+    number,
+    count(*) over (partition by p order by number),
+    count(*) over (partition by p order by number, o),
+    count(*) over (),
+    count(*) over (order by number),
+    count(*) over (order by o),
+    count(*) over (order by o, number),
+    count(*) over (order by number, o),
+    count(*) over (partition by p order by o, number),
+    count(*) over (partition by p),
+    count(*) over (partition by p order by o),
+    count(*) over (partition by p, o order by number)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+order by number
+;
+0	1	1	16	1	4	1	1	1	3	1	1
+1	2	2	16	2	7	5	2	2	3	2	1
+2	3	3	16	3	10	8	3	3	3	3	1
+3	1	1	16	4	13	11	4	2	3	2	1
+4	2	2	16	5	16	14	5	3	3	3	1
+5	3	3	16	6	4	2	6	1	3	1	1
+6	1	1	16	7	7	6	7	1	3	1	1
+7	2	2	16	8	10	9	8	2	3	2	1
+8	3	3	16	9	13	12	9	3	3	3	1
+9	1	1	16	10	16	15	10	3	3	3	1
+10	2	2	16	11	4	3	11	1	3	1	1
+11	3	3	16	12	7	7	12	2	3	2	1
+12	1	1	16	13	10	10	13	1	3	1	1
+13	2	2	16	14	13	13	14	2	3	2	1
+14	3	3	16	15	16	16	15	3	3	3	1
+15	1	1	16	16	4	4	16	1	1	1	1
+-- The EXPLAIN for the above query would be difficult to understand, so check some
+-- simple cases instead.
+explain select
+    count(*) over (partition by p),
+    count(*) over (),
+    count(*) over (partition by p order by o)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+;
+Expression ((Projection + Before ORDER BY))
+  Window (Window step for window \'\')
+    Window (Window step for window \'PARTITION BY p\')
+      Window (Window step for window \'PARTITION BY p ORDER BY o ASC\')
+        MergingSorted (Merge sorted streams for window \'PARTITION BY p ORDER BY o ASC\')
+          MergeSorting (Merge sorted blocks for window \'PARTITION BY p ORDER BY o ASC\')
+            PartialSorting (Sort each block for window \'PARTITION BY p ORDER BY o ASC\')
+              Expression ((Before window functions + (Projection + Before ORDER BY)))
+                SettingQuotaAndLimits (Set limits and quota after reading from storage)
+                  ReadFromStorage (SystemNumbers)
+explain select
+    count(*) over (order by o, number),
+    count(*) over (order by number)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+;
+Expression ((Projection + Before ORDER BY))
+  Window (Window step for window \'ORDER BY o ASC, number ASC\')
+    MergingSorted (Merge sorted streams for window \'ORDER BY o ASC, number ASC\')
+      MergeSorting (Merge sorted blocks for window \'ORDER BY o ASC, number ASC\')
+        PartialSorting (Sort each block for window \'ORDER BY o ASC, number ASC\')
+          Window (Window step for window \'ORDER BY number ASC\')
+            MergingSorted (Merge sorted streams for window \'ORDER BY number ASC\')
+              MergeSorting (Merge sorted blocks for window \'ORDER BY number ASC\')
+                PartialSorting (Sort each block for window \'ORDER BY number ASC\')
+                  Expression ((Before window functions + (Projection + Before ORDER BY)))
+                    SettingQuotaAndLimits (Set limits and quota after reading from storage)
+                      ReadFromStorage (SystemNumbers)
+-- A test case for the sort comparator found by fuzzer.
+SELECT
+    max(number) OVER (ORDER BY number DESC NULLS FIRST),
+    max(number) OVER (ORDER BY number ASC NULLS FIRST)
+FROM numbers(2)
+;
+1	0
+1	1
+-- some true window functions -- rank and friends
+select number, p, o,
+    count(*) over w,
+    rank() over w,
+    dense_rank() over w,
+    row_number() over w
+from (select number, intDiv(number, 5) p, mod(number, 3) o
+    from numbers(31) order by o, number) t
+window w as (partition by p order by o)
+order by p, o, number
+settings max_block_size = 2;
+0	0	0	2	1	1	1
+3	0	0	2	1	1	2
+1	0	1	4	3	2	3
+4	0	1	4	3	2	4
+2	0	2	5	5	3	5
+6	1	0	2	1	1	1
+9	1	0	2	1	1	2
+7	1	1	3	3	2	3
+5	1	2	5	4	3	4
+8	1	2	5	4	3	5
+12	2	0	1	1	1	1
+10	2	1	3	2	2	2
+13	2	1	3	2	2	3
+11	2	2	5	4	3	4
+14	2	2	5	4	3	5
+15	3	0	2	1	1	2
+18	3	0	2	1	1	1
+16	3	1	4	3	2	3
+19	3	1	4	3	2	4
+17	3	2	5	5	3	5
+21	4	0	2	1	1	1
+24	4	0	2	1	1	2
+22	4	1	3	3	2	3
+20	4	2	5	4	3	5
+23	4	2	5	4	3	4
+27	5	0	1	1	1	1
+25	5	1	3	2	2	2
+28	5	1	3	2	2	3
+26	5	2	5	4	3	4
+29	5	2	5	4	3	5
+30	6	0	1	1	1	1
+-- our replacement for lag/lead
+select
+    anyOrNull(number)
+        over (order by number rows between 1 preceding and 1 preceding),
+    anyOrNull(number)
+        over (order by number rows between 1 following and 1 following)
+from numbers(5);
+\N	1
+0	2
+1	3
+2	4
+3	\N
+-- case-insensitive SQL-standard synonyms for any and anyLast
+select
+    number,
+    fIrSt_VaLue(number) over w,
+    lAsT_vAlUe(number) over w
+from numbers(10)
+window w as (order by number range between 1 preceding and 1 following)
+order by number
+;
+0	0	1
+1	0	2
+2	1	3
+3	2	4
+4	3	5
+5	4	6
+6	5	7
+7	6	8
+8	7	9
+9	8	9
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 95afb9be408..03bd8371e23 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -3,77 +3,78 @@
 set allow_experimental_window_functions = 1;
 
 -- just something basic
-select number, count() over (partition by intDiv(number, 3) order by number) from numbers(10);
+select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10);
 
 -- proper calculation across blocks
-select number, max(number) over (partition by intDiv(number, 3) order by number desc) from numbers(10) settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2;
 
 -- not a window function
-select number, abs(number) over (partition by toString(intDiv(number, 3))) from numbers(10); -- { serverError 63 }
+select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 }
 
 -- no partition by
-select number, avg(number) over (order by number) from numbers(10);
+select number, avg(number) over (order by number rows unbounded preceding) from numbers(10);
 
 -- no order by
-select number, quantileExact(number) over (partition by intDiv(number, 3)) from numbers(10);
+select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10);
 
 -- can add an alias after window spec
-select number, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10);
+select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10);
 
 -- can't reference it yet -- the window functions are calculated at the
 -- last stage of select, after all other functions.
-select q * 10, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); -- { serverError 47 }
+select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 }
 
 -- must work in WHERE if you wrap it in a subquery
-select * from (select count(*) over () c from numbers(3)) where c > 0;
+select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0;
 
 -- should work in ORDER BY
-select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number;
 
 -- also works in ORDER BY if you wrap it in a subquery
-select * from (select count(*) over () c from numbers(3)) order by c;
+select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c;
 
 -- Example with window function only in ORDER BY. Here we make a rank of all
 -- numbers sorted descending, and then sort by this rank descending, and must get
 -- the ascending order.
-select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc) desc;
+select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc;
 
 -- Aggregate functions as window function arguments. This query is semantically
 -- the same as the above one, only we replace `number` with
 -- `any(number) group by number` and so on.
-select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc) desc;
+select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc;
 -- some more simple cases w/aggregate functions
-select sum(any(number)) over () from numbers(1);
-select sum(any(number) + 1) over () from numbers(1);
-select sum(any(number + 1)) over () from numbers(1);
+select sum(any(number)) over (rows unbounded preceding) from numbers(1);
+select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1);
+select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1);
 
 -- different windows
 -- an explain test would also be helpful, but it's too immature now and I don't
 -- want to change reference all the time
-select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 5) order by number) as m from numbers(31) order by number settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2;
 
 -- two functions over the same window
 -- an explain test would also be helpful, but it's too immature now and I don't
 -- want to change reference all the time
-select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 3) order by number desc) as m from numbers(7) order by number settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2;
 
 -- check that we can work with constant columns
 select median(x) over (partition by x) from (select 1 x);
 
 -- an empty window definition is valid as well
+select groupArray(number) over (rows unbounded preceding) from numbers(3);
 select groupArray(number) over () from numbers(3);
 
 -- This one tests we properly process the window  function arguments.
 -- Seen errors like 'column `1` not found' from count(1).
-select count(1) over (), max(number + 1) over () from numbers(3);
+select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3);
 
 -- Should work in DISTINCT
-select distinct sum(0) over () from numbers(2);
-select distinct any(number) over () from numbers(2);
+select distinct sum(0) over (rows unbounded preceding) from numbers(2);
+select distinct any(number) over (rows unbounded preceding) from numbers(2);
 
 -- Various kinds of aliases are properly substituted into various parts of window
 -- function definition.
-with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x) from numbers(7);
+with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7);
 
 -- WINDOW clause
 select 1 window w1 as ();
@@ -81,14 +82,266 @@ select 1 window w1 as ();
 select sum(number) over w1, sum(number) over w2
 from numbers(10)
 window
-    w1 as (),
-    w2 as (partition by intDiv(number, 3))
+    w1 as (rows unbounded preceding),
+    w2 as (partition by intDiv(number, 3) rows unbounded preceding)
 ;
 
+-- FIXME both functions should use the same window, but they don't. Add an
+-- EXPLAIN test for this.
 select
     sum(number) over w1,
-    sum(number) over (partition by intDiv(number, 3))
+    sum(number) over (partition by intDiv(number, 3) rows unbounded preceding)
 from numbers(10)
 window
-    w1 as (partition by intDiv(number, 3))
+    w1 as (partition by intDiv(number, 3) rows unbounded preceding)
+;
+
+-- RANGE frame
+-- It's the default
+select sum(number) over () from numbers(3);
+
+-- Try some mutually prime sizes of partition, group and block, for the number
+-- of rows that is their least common multiple + 1, so that we see all the
+-- interesting corner cases.
+select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 5
+;
+
+select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 2
+;
+
+select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 3
+;
+
+select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 2
+;
+
+select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 3
+;
+
+select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c
+from numbers(31)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 5
+;
+
+-- A case where the partition end is in the current block, and the frame end
+-- is triggered by the partition end.
+select min(number) over (partition by p)  from (select number, intDiv(number, 3) p from numbers(10));
+
+-- UNBOUNDED FOLLOWING frame end
+select
+    min(number) over wa, min(number) over wo,
+    max(number) over wa, max(number) over wo
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(31))
+window
+    wa as (partition by p order by o
+        range between unbounded preceding and unbounded following),
+    wo as (partition by p order by o
+        rows between unbounded preceding and unbounded following)
+settings max_block_size = 2;
+
+-- ROWS offset frame start
+select number, p,
+    count(*) over (partition by p order by number
+        rows between 1 preceding and unbounded following),
+    count(*) over (partition by p order by number
+        rows between current row and unbounded following),
+    count(*) over (partition by p order by number
+        rows between 1 following and unbounded following)
+from (select number, intDiv(number, 5) p from numbers(31))
+order by p, number
+settings max_block_size = 2;
+
+-- ROWS offset frame start and end
+select number, p,
+    count(*) over (partition by p order by number
+        rows between 2 preceding and 2 following)
+from (select number, intDiv(number, 7) p from numbers(71))
+order by p, number
+settings max_block_size = 2;
+
+SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4);
+
+-- frame boundaries that runs into the partition end
+select
+    count() over (partition by intDiv(number, 3)
+        rows between 100 following and unbounded following),
+    count() over (partition by intDiv(number, 3)
+        rows between current row and 100 following)
+from numbers(10);
+
+-- seen a use-after-free under MSan in this query once
+SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null;
+
+-- a corner case
+select count() over ();
+
+-- RANGE CURRENT ROW frame start
+select number, p, o,
+    count(*) over (partition by p order by o
+        range between current row and unbounded following)
+from (select number, intDiv(number, 5) p, mod(number, 3) o
+    from numbers(31))
+order by p, o, number
+settings max_block_size = 2;
+
+select
+    count(*) over (rows between  current row and current row),
+    count(*) over (range between  current row and current row)
+from numbers(3);
+
+-- RANGE OFFSET
+-- a basic RANGE OFFSET frame
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11))
+window w as (order by x asc range between 1 preceding and 2 following)
+order by x;
+
+-- overflow conditions
+select x, min(x) over w, max(x) over w, count(x) over w
+from (
+    select toUInt8(if(mod(number, 2),
+        toInt64(255 - intDiv(number, 2)),
+        toInt64(intDiv(number, 2)))) x
+    from numbers(10)
+)
+window w as (order by x range between 1 preceding and 2 following)
+order by x;
+
+select x, min(x) over w, max(x) over w, count(x) over w
+from (
+    select toInt8(multiIf(
+        mod(number, 3) == 0, toInt64(intDiv(number, 3)),
+        mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)),
+        toInt64(-128 + intDiv(number, 3)))) x
+    from numbers(15)
+)
+window w as (order by x range between 1 preceding and 2 following)
+order by x;
+
+-- RANGE OFFSET ORDER BY DESC
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between 1 preceding and 2 following)
+order by x
+settings max_block_size = 1;
+
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between 1 preceding and unbounded following)
+order by x
+settings max_block_size = 2;
+
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between unbounded preceding and 2 following)
+order by x
+settings max_block_size = 3;
+
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between unbounded preceding and 2 preceding)
+order by x
+settings max_block_size = 4;
+
+
+-- Check that we put windows in such an order that we can reuse the sort.
+-- First, check that at least the result is correct when we have many windows
+-- with different sort order.
+select
+    number,
+    count(*) over (partition by p order by number),
+    count(*) over (partition by p order by number, o),
+    count(*) over (),
+    count(*) over (order by number),
+    count(*) over (order by o),
+    count(*) over (order by o, number),
+    count(*) over (order by number, o),
+    count(*) over (partition by p order by o, number),
+    count(*) over (partition by p),
+    count(*) over (partition by p order by o),
+    count(*) over (partition by p, o order by number)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+order by number
+;
+
+-- The EXPLAIN for the above query would be difficult to understand, so check some
+-- simple cases instead.
+explain select
+    count(*) over (partition by p),
+    count(*) over (),
+    count(*) over (partition by p order by o)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+;
+
+explain select
+    count(*) over (order by o, number),
+    count(*) over (order by number)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+;
+
+-- A test case for the sort comparator found by fuzzer.
+SELECT
+    max(number) OVER (ORDER BY number DESC NULLS FIRST),
+    max(number) OVER (ORDER BY number ASC NULLS FIRST)
+FROM numbers(2)
+;
+
+-- some true window functions -- rank and friends
+select number, p, o,
+    count(*) over w,
+    rank() over w,
+    dense_rank() over w,
+    row_number() over w
+from (select number, intDiv(number, 5) p, mod(number, 3) o
+    from numbers(31) order by o, number) t
+window w as (partition by p order by o)
+order by p, o, number
+settings max_block_size = 2;
+
+-- our replacement for lag/lead
+select
+    anyOrNull(number)
+        over (order by number rows between 1 preceding and 1 preceding),
+    anyOrNull(number)
+        over (order by number rows between 1 following and 1 following)
+from numbers(5);
+
+-- case-insensitive SQL-standard synonyms for any and anyLast
+select
+    number,
+    fIrSt_VaLue(number) over w,
+    lAsT_vAlUe(number) over w
+from numbers(10)
+window w as (order by number range between 1 preceding and 1 following)
+order by number
 ;
diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh
index 7f111538a06..6ae103bdf6e 100755
--- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh
+++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh
@@ -28,7 +28,7 @@ function kill_mutation_thread
         # find any mutation and kill it
         mutation_id=$($CLICKHOUSE_CLIENT --query "SELECT mutation_id FROM system.mutations WHERE is_done=0 and database='${CLICKHOUSE_DATABASE}' and table='concurrent_mutate_kill' LIMIT 1")
         if [ ! -z "$mutation_id" ]; then
-            $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id'" 1> /dev/null
+            $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id' and table='concurrent_mutate_kill' and database='${CLICKHOUSE_DATABASE}'" 1> /dev/null
             sleep 1
         fi
     done
diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh
index 60e2adb4204..bfa68328c06 100755
--- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh
+++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh
@@ -40,7 +40,7 @@ function kill_mutation_thread
         # find any mutation and kill it
         mutation_id=$($CLICKHOUSE_CLIENT --query "SELECT mutation_id FROM system.mutations WHERE is_done = 0 and table like 'concurrent_kill_%' and database='${CLICKHOUSE_DATABASE}' LIMIT 1")
         if [ ! -z "$mutation_id" ]; then
-            $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id'" 1> /dev/null
+            $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id' and table like 'concurrent_kill_%' and database='${CLICKHOUSE_DATABASE}'" 1> /dev/null
             sleep 1
         fi
     done
diff --git a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql
index b33b74c918d..5de4210d3f2 100644
--- a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql
+++ b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql
@@ -10,8 +10,8 @@ set max_block_size=40960;
 --     MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption
 --     MergeSortingTransform: Memory usage is lowered from 186.25 MiB to 95.00 MiB
 --     MergeSortingTransform: Re-merging is not useful (memory usage was not lowered by remerge_sort_lowered_memory_bytes_ratio=2.0)
-select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(toUInt64(3e6)) order by k limit 400e3 format Null; -- { serverError 241 }
-select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(toUInt64(3e6)) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 }
+select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 format Null; -- { serverError 241 }
+select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 }
 
 -- remerge_sort_lowered_memory_bytes_ratio 1.9 is good (need at least 1.91/0.98=1.94)
 --     MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption
@@ -26,4 +26,4 @@ select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v
 --     MergeSortingTransform: Memory usage is lowered from 188.13 MiB to 95.00 MiB
 --     MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 809600 rows) to save memory consumption
 --     MergeSortingTransform: Memory usage is lowered from 188.13 MiB to 95.00 MiB
-select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(toUInt64(3e6)) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=1.9 format Null;
+select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=1.9 format Null;
diff --git a/tests/queries/0_stateless/01602_runningConcurrency.reference b/tests/queries/0_stateless/01602_runningConcurrency.reference
new file mode 100644
index 00000000000..1bd238ccde8
--- /dev/null
+++ b/tests/queries/0_stateless/01602_runningConcurrency.reference
@@ -0,0 +1,19 @@
+Invocation with Date columns
+1
+2
+3
+2
+1
+Invocation with DateTime
+1
+2
+3
+2
+1
+Invocation with DateTime64
+1
+2
+3
+2
+1
+Erroneous cases
diff --git a/tests/queries/0_stateless/01602_runningConcurrency.sql b/tests/queries/0_stateless/01602_runningConcurrency.sql
new file mode 100644
index 00000000000..55b3aae867a
--- /dev/null
+++ b/tests/queries/0_stateless/01602_runningConcurrency.sql
@@ -0,0 +1,51 @@
+--
+SELECT 'Invocation with Date columns';
+
+DROP TABLE IF EXISTS runningConcurrency_test;
+CREATE TABLE runningConcurrency_test(begin Date, end Date) ENGINE = Memory;
+
+INSERT INTO runningConcurrency_test VALUES ('2020-12-01', '2020-12-10'), ('2020-12-02', '2020-12-10'), ('2020-12-03', '2020-12-12'), ('2020-12-10', '2020-12-12'), ('2020-12-13', '2020-12-20');
+SELECT runningConcurrency(begin, end) FROM runningConcurrency_test;
+
+DROP TABLE runningConcurrency_test;
+
+--
+SELECT 'Invocation with DateTime';
+
+DROP TABLE IF EXISTS runningConcurrency_test;
+CREATE TABLE runningConcurrency_test(begin DateTime, end DateTime) ENGINE = Memory;
+
+INSERT INTO runningConcurrency_test VALUES ('2020-12-01 00:00:00', '2020-12-01 00:59:59'), ('2020-12-01 00:30:00', '2020-12-01 00:59:59'), ('2020-12-01 00:40:00', '2020-12-01 01:30:30'), ('2020-12-01 01:10:00', '2020-12-01 01:30:30'), ('2020-12-01 01:50:00', '2020-12-01 01:59:59');
+SELECT runningConcurrency(begin, end) FROM runningConcurrency_test;
+
+DROP TABLE runningConcurrency_test;
+
+--
+SELECT 'Invocation with DateTime64';
+
+DROP TABLE IF EXISTS runningConcurrency_test;
+CREATE TABLE runningConcurrency_test(begin DateTime64(3), end DateTime64(3)) ENGINE = Memory;
+
+INSERT INTO runningConcurrency_test VALUES ('2020-12-01 00:00:00.000', '2020-12-01 00:00:00.100'), ('2020-12-01 00:00:00.010', '2020-12-01 00:00:00.100'), ('2020-12-01 00:00:00.020', '2020-12-01 00:00:00.200'), ('2020-12-01 00:00:00.150', '2020-12-01 00:00:00.200'), ('2020-12-01 00:00:00.250', '2020-12-01 00:00:00.300');
+SELECT runningConcurrency(begin, end) FROM runningConcurrency_test;
+
+DROP TABLE runningConcurrency_test;
+
+--
+SELECT 'Erroneous cases';
+
+-- Constant columns are currently not supported.
+SELECT runningConcurrency(toDate(arrayJoin([1, 2])), toDate('2000-01-01')); -- { serverError 44 }
+
+-- Unsupported data types
+SELECT runningConcurrency('strings are', 'not supported'); -- { serverError 43 }
+SELECT runningConcurrency(NULL, NULL); -- { serverError 43 }
+SELECT runningConcurrency(CAST(NULL, 'Nullable(DateTime)'), CAST(NULL, 'Nullable(DateTime)')); -- { serverError 43 }
+
+-- Mismatching data types
+SELECT runningConcurrency(toDate('2000-01-01'), toDateTime('2000-01-01 00:00:00')); -- { serverError 43 }
+
+-- begin > end
+SELECT runningConcurrency(toDate('2000-01-02'), toDate('2000-01-01')); -- { serverError 117 }
+
+                                                       
diff --git a/tests/queries/0_stateless/01606_git_import.sh b/tests/queries/0_stateless/01606_git_import.sh
index 16a0b92abe7..6d425c9bceb 100755
--- a/tests/queries/0_stateless/01606_git_import.sh
+++ b/tests/queries/0_stateless/01606_git_import.sh
@@ -6,6 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Clone some not too large repository and create a database from it.
 
+cd $CLICKHOUSE_TMP || exit
+
 # Protection for network errors
 for _ in {1..10}; do
     rm -rf ./clickhouse-odbc
diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference
index ac91b53b754..e46fd479413 100644
--- a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference
+++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference
@@ -5,5 +5,7 @@ SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FO
 1,10
 EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n);
 SELECT
-    identity(cast(0, \'UInt64\')) AS n,
+    identity(CAST(0, \'UInt64\')) AS n,
     toUInt64(10 / n)
+SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0);
+0
diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql
index abf67a8ed6a..59f057d1ec5 100644
--- a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql
+++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql
@@ -2,3 +2,4 @@
 SELECT * FROM (SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n)) FORMAT CSV;
 SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FORMAT CSV;
 EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n);
+SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0);
diff --git a/tests/queries/0_stateless/01621_clickhouse_compressor.sh b/tests/queries/0_stateless/01621_clickhouse_compressor.sh
index 5292bcef52a..3157cb0e887 100755
--- a/tests/queries/0_stateless/01621_clickhouse_compressor.sh
+++ b/tests/queries/0_stateless/01621_clickhouse_compressor.sh
@@ -7,27 +7,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 set -e
 
-TEMP_DIR="$(mktemp -d /tmp/clickhouse.test..XXXXXX)"
-cd "${TEMP_DIR:?}"
-
-function cleanup()
-{
-    rm -fr "${TEMP_DIR:?}"
-}
-trap cleanup EXIT
-
 # This is random garbage, so compression ratio will be very low.
-tr -cd 'a-z0-9' < /dev/urandom | head -c1M > input
+tr -cd 'a-z0-9' < /dev/urandom | head -c1M > ${CLICKHOUSE_TMP}/input
 
 # stdin/stdout streams
-$CLICKHOUSE_COMPRESSOR < input > output
-diff -q <($CLICKHOUSE_COMPRESSOR --decompress < output) input
+$CLICKHOUSE_COMPRESSOR < ${CLICKHOUSE_TMP}/input > ${CLICKHOUSE_TMP}/output
+diff -q <($CLICKHOUSE_COMPRESSOR --decompress < ${CLICKHOUSE_TMP}/output) ${CLICKHOUSE_TMP}/input
 
 # positional arguments, and that fact that input/output will be overwritten
-$CLICKHOUSE_COMPRESSOR input output
-diff -q <($CLICKHOUSE_COMPRESSOR --decompress output) input
+$CLICKHOUSE_COMPRESSOR ${CLICKHOUSE_TMP}/input ${CLICKHOUSE_TMP}/output
+diff -q <($CLICKHOUSE_COMPRESSOR --decompress ${CLICKHOUSE_TMP}/output) ${CLICKHOUSE_TMP}/input
 
 # --offset-in-decompressed-block
-diff -q <($CLICKHOUSE_COMPRESSOR --decompress --offset-in-decompressed-block 10 output) <(tail -c+$((10+1)) input)
+diff -q <($CLICKHOUSE_COMPRESSOR --decompress --offset-in-decompressed-block 10 ${CLICKHOUSE_TMP}/output) <(tail -c+$((10+1)) ${CLICKHOUSE_TMP}/input)
 
 # TODO: --offset-in-compressed-file using some .bin file (via clickhouse-local + check-marks)
diff --git a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh
index e7deace8b46..7afdbbc6b66 100755
--- a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh
+++ b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh
@@ -7,12 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 PORT="$(($RANDOM%63000+2001))"
 
-TEMP_FILE="$CURDIR/01622_defaults_for_url_engine.tmp"
+TEMP_FILE="${CLICKHOUSE_TMP}/01622_defaults_for_url_engine.tmp"
 
 function thread1
 {
-    while true; do 
-        echo -e "HTTP/1.1 200 OK\n\n{\"a\": 1}" | nc -l -p $1 -q 1; 
+    while true; do
+        echo -e "HTTP/1.1 200 OK\n\n{\"a\": 1}" | nc -l -p $1 -q 1;
     done
 }
 
diff --git a/tests/queries/0_stateless/01622_multiple_ttls.reference b/tests/queries/0_stateless/01622_multiple_ttls.reference
new file mode 100644
index 00000000000..d9ebb694584
--- /dev/null
+++ b/tests/queries/0_stateless/01622_multiple_ttls.reference
@@ -0,0 +1,22 @@
+TTL WHERE
+1970-10-10	2
+1970-10-10	5
+1970-10-10	8
+2000-10-10	1
+2000-10-10	2
+2000-10-10	4
+2000-10-10	5
+2000-10-10	7
+2000-10-10	8
+TTL GROUP BY
+1970-10-01	0	4950
+2000-10-01	0	450
+2000-10-01	1	460
+2000-10-01	2	470
+2000-10-01	3	480
+2000-10-01	4	490
+2000-10-01	5	500
+2000-10-01	6	510
+2000-10-01	7	520
+2000-10-01	8	530
+2000-10-01	9	540
diff --git a/tests/queries/0_stateless/01622_multiple_ttls.sql b/tests/queries/0_stateless/01622_multiple_ttls.sql
new file mode 100644
index 00000000000..aa2eeb5759b
--- /dev/null
+++ b/tests/queries/0_stateless/01622_multiple_ttls.sql
@@ -0,0 +1,44 @@
+SELECT 'TTL WHERE';
+DROP TABLE IF EXISTS ttl_where;
+
+CREATE TABLE ttl_where
+(
+    `d` Date,
+    `i` UInt32
+)
+ENGINE = MergeTree
+ORDER BY tuple()
+TTL d + toIntervalYear(10) DELETE WHERE i % 3 = 0,
+    d + toIntervalYear(40) DELETE WHERE i % 3 = 1;
+
+-- This test will fail at 2040-10-10
+
+INSERT INTO ttl_where SELECT toDate('2000-10-10'), number FROM numbers(10);
+INSERT INTO ttl_where SELECT toDate('1970-10-10'), number FROM numbers(10);
+OPTIMIZE TABLE ttl_where FINAL;
+
+SELECT * FROM ttl_where ORDER BY d, i;
+
+DROP TABLE ttl_where;
+
+SELECT 'TTL GROUP BY';
+DROP TABLE IF EXISTS ttl_group_by;
+
+CREATE TABLE ttl_group_by
+(
+    `d` Date,
+    `i` UInt32,
+    `v` UInt64
+)
+ENGINE = MergeTree
+ORDER BY (toStartOfMonth(d), i % 10)
+TTL d + toIntervalYear(10) GROUP BY toStartOfMonth(d), i % 10 SET d = any(toStartOfMonth(d)), i = any(i % 10), v = sum(v),
+    d + toIntervalYear(40) GROUP BY toStartOfMonth(d) SET d = any(toStartOfMonth(d)), v = sum(v);
+
+INSERT INTO ttl_group_by SELECT toDate('2000-10-10'), number, number FROM numbers(100);
+INSERT INTO ttl_group_by SELECT toDate('1970-10-10'), number, number FROM numbers(100);
+OPTIMIZE TABLE ttl_group_by FINAL;
+
+SELECT * FROM ttl_group_by ORDER BY d, i;
+
+DROP TABLE ttl_group_by;
diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.reference b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.sql
similarity index 96%
rename from tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
rename to tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.sql
index f059da20755..7a92f40b3f0 100644
--- a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
+++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.sql
@@ -5,7 +5,7 @@ create table data_01641 (key Int, value String) engine=MergeTree order by (key,
 -- peak memory usage is 170MiB
 set max_memory_usage='200Mi';
 system stop merges data_01641;
-insert into data_01641 select number, toString(number) from numbers(toUInt64(120e6));
+insert into data_01641 select number, toString(number) from numbers(120e6);
 
 -- peak:
 -- - is 21MiB if background merges already scheduled
diff --git a/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql
index 50596680618..c3e459dfc49 100644
--- a/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql
+++ b/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql
@@ -5,7 +5,7 @@ CREATE TABLE partitioned_table (
     partitioner UInt8,
     value String
 )
-ENGINE ReplicatedMergeTree('/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table', '1')
+ENGINE ReplicatedMergeTree('/clickhouse/01650_drop_part_and_deduplication_partitioned_table', '1')
 ORDER BY key
 PARTITION BY partitioner;
 
@@ -16,24 +16,24 @@ INSERT INTO partitioned_table VALUES (11, 1, 'AA'), (22, 2, 'BB'), (33, 3, 'CC')
 
 SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name;
 
-SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value;
+SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value;
 
 INSERT INTO partitioned_table VALUES (33, 3, 'CC'); -- must be deduplicated
 
 SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name;
 
-SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value;
+SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value;
 
 ALTER TABLE partitioned_table DROP PART '3_1_1_0';
 
 SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name;
 
-SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value;
+SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value;
 
 INSERT INTO partitioned_table VALUES (33, 3, 'CC'); -- mustn't be deduplicated
 
 SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name;
 
-SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value;
+SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value;
 
 DROP TABLE IF EXISTS partitioned_table;
diff --git a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql
index b3d3ad81834..d664ec606b5 100644
--- a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql
+++ b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql
@@ -4,3 +4,5 @@ SELECT ignore(10, ignore(*), ignore(ignore(-2, 1025, *)), NULL, *), * FROM lc_nu
 
 
 SELECT ignore(toLowCardinality(1), toLowCardinality(2), 3);
+
+DROP TABLE lc_null_int8_defnull;
diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
new file mode 100644
index 00000000000..19487c9f942
--- /dev/null
+++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
@@ -0,0 +1,140 @@
+---------Q1----------
+2	2	2	20
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL INNER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON a = table2.a
+WHERE table2.b = toUInt32(20)
+---------Q2----------
+2	2	2	20
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL INNER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON a = table2.a
+WHERE (table2.a < table2.b) AND (table2.b = toUInt32(20))
+---------Q3----------
+---------Q4----------
+6	40
+SELECT
+    a,
+    table2.b
+FROM table1
+ALL INNER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON a = toUInt32(10 - table2.a)
+WHERE (b = 6) AND (table2.b > 20)
+---------Q5----------
+SELECT
+    a,
+    table2.b
+FROM table1
+ALL INNER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+    WHERE 0
+) AS table2 ON a = table2.a
+WHERE 0
+---------Q6----------
+---------Q7----------
+0	0	0	0
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL INNER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON a = table2.a
+WHERE (table2.b < toUInt32(40)) AND (b < 1)
+---------Q8----------
+---------Q9---will not be optimized----------
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL LEFT JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON (a = table2.a) AND (b = toUInt32(10))
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL RIGHT JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON (a = table2.a) AND (b = toUInt32(10))
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL FULL OUTER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON (a = table2.a) AND (b = toUInt32(10))
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL FULL OUTER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON (a = table2.a) AND (table2.b = toUInt32(10))
+WHERE a < toUInt32(20)
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+CROSS JOIN table2
diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
new file mode 100644
index 00000000000..23871a9c47c
--- /dev/null
+++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
@@ -0,0 +1,48 @@
+DROP TABLE IF EXISTS table1;
+DROP TABLE IF EXISTS table2;
+
+CREATE TABLE table1 (a UInt32, b UInt32) ENGINE = Memory;
+CREATE TABLE table2 (a UInt32, b UInt32) ENGINE = Memory;
+
+INSERT INTO table1 SELECT number, number FROM numbers(10);
+INSERT INTO table2 SELECT number * 2, number * 20 FROM numbers(6);
+
+SELECT '---------Q1----------';
+SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(20));
+EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(20));
+
+SELECT '---------Q2----------';
+SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.a < table2.b) AND (table2.b = toUInt32(20));
+EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.a < table2.b) AND (table2.b = toUInt32(20));
+
+SELECT '---------Q3----------';
+SELECT * FROM table1 JOIN table2 ON (table1.a = toUInt32(table2.a + 5)) AND (table2.a < table1.b) AND (table2.b > toUInt32(20)); -- { serverError 48 }
+
+SELECT '---------Q4----------';
+SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20);
+EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20);
+
+SELECT '---------Q5----------';
+SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table1.b = 6) AND (table2.b > 20) AND (10 < 6);
+EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table1.b = 6) AND (table2.b > 20) AND (10 < 6);
+
+SELECT '---------Q6----------';
+SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.b = 6) AND (table2.b > 20); -- { serverError 403 } 
+
+SELECT '---------Q7----------';
+SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b < 1;
+EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b < 1;
+SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b > 10;
+
+SELECT '---------Q8----------';
+SELECT * FROM table1 INNER JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(table1, 10)); -- { serverError 47 }
+
+SELECT '---------Q9---will not be optimized----------';
+EXPLAIN SYNTAX SELECT * FROM table1 LEFT JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10));
+EXPLAIN SYNTAX SELECT * FROM table1 RIGHT JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10));
+EXPLAIN SYNTAX SELECT * FROM table1 FULL JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10));
+EXPLAIN SYNTAX SELECT * FROM table1 FULL JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(10)) WHERE table1.a < toUInt32(20);
+EXPLAIN SYNTAX SELECT * FROM table1 , table2;
+
+DROP TABLE table1;
+DROP TABLE table2;
diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference
index e12ee221a7b..c80d31a343a 100644
--- a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference
+++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference
@@ -11,7 +11,7 @@ arraySort(used_table_functions)
 ['numbers']
 
 arraySort(used_functions)
-['addDays','array','arrayFlatten','cast','crc32','modulo','plus','pow','round','substring','tanh','toDate','toDayOfYear','toTypeName','toWeek']
+['CAST', 'addDays','array','arrayFlatten','crc32','modulo','plus','pow','round','substring','tanh','toDate','toDayOfYear','toTypeName','toWeek']
 
 arraySort(used_data_type_families)
 ['Array','Int32','Nullable','String']
diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
index b584f2c38c8..3a890ce16f9 100644
--- a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
+++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
@@ -1,3 +1,5 @@
+SET database_atomic_wait_for_drop_and_detach_synchronously=1;
+
 SELECT uniqArray([1, 1, 2]),
        SUBSTRING('Hello, world', 7, 5),
        POW(1, 2), ROUND(TANh(1)), CrC32(''),
@@ -58,3 +60,5 @@ WHERE current_database = currentDatabase() AND type == 'QueryFinish' AND (query
 ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
 SELECT '';
 
+DROP TABLE test_query_log_factories_info1.memory_table;
+DROP DATABASE test_query_log_factories_info1;
diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
new file mode 100644
index 00000000000..87659c32e39
--- /dev/null
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
@@ -0,0 +1,25 @@
+aaaaaaaaa	bbbbbbbbb
+:0
+:0
+:0
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+:0
+aaaaaaaaa
+bbbbbbbbb
+ccccccccc
+:107
+:79
+:35
+:35
+:35
+699415
+aaaaaaaaa	bbbbbbbbb
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+699415	0
+:0
+:107
+:79
diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
new file mode 100755
index 00000000000..593f0e59ea7
--- /dev/null
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+set -eu
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# Data preparation.
+# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as:
+#  "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')"
+user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+
+mkdir -p ${user_files_path}/
+echo -n aaaaaaaaa > ${user_files_path}/a.txt
+echo -n bbbbbbbbb > ${user_files_path}/b.txt
+echo -n ccccccccc > ${user_files_path}/c.txt
+echo -n ccccccccc > /tmp/c.txt
+mkdir -p ${user_files_path}/dir
+
+
+### 1st TEST in CLIENT mode.
+${CLICKHOUSE_CLIENT} --query "drop table if exists data;"
+${CLICKHOUSE_CLIENT} --query "create table data (A String, B String) engine=MergeTree() order by A;"
+
+
+# Valid cases:
+${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/c.txt'), * from data";echo ":"$?
+${CLICKHOUSE_CLIENT} --multiquery --query "
+	create table filenames(name String) engine=MergeTree() order by tuple();
+	insert into filenames values ('a.txt'), ('b.txt'), ('c.txt');
+	select file(name) from filenames format TSV;
+	drop table if exists filenames;
+"
+
+# Invalid cases: (Here using sub-shell to catch exception avoiding the test quit)
+# Test non-exists file
+echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null
+# Test isDir
+echo "clickhouse-client --query "'"select file('"'${user_files_path}/dir'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null
+# Test path out of the user_files directory. It's not allowed in client mode
+echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null
+
+# Test relative path consists of ".." whose absolute path is out of the user_files directory.
+echo "clickhouse-client --query "'"select file('"'${user_files_path}/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null
+
+
+### 2nd TEST in LOCAL mode.
+
+echo -n aaaaaaaaa > a.txt
+echo -n bbbbbbbbb > b.txt
+echo -n ccccccccc > c.txt
+mkdir -p dir
+#Test for large files, with length : 699415
+c_count=$(wc -c ${CURDIR}/01518_nullable_aggregate_states2.reference | awk '{print $1}')
+echo $c_count
+
+# Valid cases:
+# The default dir is the CWD path in LOCAL mode
+${CLICKHOUSE_LOCAL} --query "
+	drop table if exists data;
+	create table data (A String, B String) engine=MergeTree() order by A;
+	select file('a.txt'), file('b.txt');
+	insert into data select file('a.txt'), file('b.txt');
+	insert into data select file('a.txt'), file('b.txt');
+	select file('c.txt'), * from data;
+	select file('/tmp/c.txt'), * from data;
+	select $c_count, $c_count -length(file('${CURDIR}/01518_nullable_aggregate_states2.reference'))
+"
+echo ":"$?
+
+
+# Invalid cases: (Here using sub-shell to catch exception avoiding the test quit)
+# Test non-exists file
+echo "clickhouse-local --query "'"select file('"'nonexist.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+
+# Test isDir
+echo "clickhouse-local --query "'"select file('"'dir'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+
+# Restore
+rm -rf a.txt b.txt c.txt dir
+rm -rf ${user_files_path}/a.txt
+rm -rf ${user_files_path}/b.txt
+rm -rf ${user_files_path}/c.txt
+rm -rf /tmp/c.txt
+rm -rf ${user_files_path}/dir
diff --git a/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql b/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql
index ca324acdce3..e233f1e6169 100644
--- a/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql
+++ b/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql
@@ -4,3 +4,5 @@ INSERT INTO data_01655 VALUES (1);
 SELECT * FROM data_01655 SETTINGS merge_tree_min_rows_for_concurrent_read=0, merge_tree_min_bytes_for_concurrent_read=0;
 -- UINT64_MAX
 SELECT * FROM data_01655 SETTINGS merge_tree_min_rows_for_concurrent_read=18446744073709551615, merge_tree_min_bytes_for_concurrent_read=18446744073709551615;
+
+DROP TABLE data_01655;
diff --git a/tests/queries/0_stateless/01666_gcd_ubsan.reference b/tests/queries/0_stateless/01666_gcd_ubsan.reference
index 2500ef1deae..af041bdcbde 100644
--- a/tests/queries/0_stateless/01666_gcd_ubsan.reference
+++ b/tests/queries/0_stateless/01666_gcd_ubsan.reference
@@ -4,7 +4,7 @@ SELECT gcd(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT gcd(9223372036854775808, -1); -- { serverError 407 }
-SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
+SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));
diff --git a/tests/queries/0_stateless/01666_gcd_ubsan.sql b/tests/queries/0_stateless/01666_gcd_ubsan.sql
index bde2b624cc0..a1f501cda0b 100644
--- a/tests/queries/0_stateless/01666_gcd_ubsan.sql
+++ b/tests/queries/0_stateless/01666_gcd_ubsan.sql
@@ -4,7 +4,7 @@ SELECT gcd(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT gcd(9223372036854775808, -1); -- { serverError 407 }
-SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
+SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));
diff --git a/tests/queries/0_stateless/01666_lcm_ubsan.reference b/tests/queries/0_stateless/01666_lcm_ubsan.reference
index ed9a6aed42b..d8ed13191d3 100644
--- a/tests/queries/0_stateless/01666_lcm_ubsan.reference
+++ b/tests/queries/0_stateless/01666_lcm_ubsan.reference
@@ -4,7 +4,7 @@ SELECT lcm(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT lcm(9223372036854775808, -1); -- { serverError 407 }
-SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
+SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));
diff --git a/tests/queries/0_stateless/01666_lcm_ubsan.sql b/tests/queries/0_stateless/01666_lcm_ubsan.sql
index 5cc3546e941..b3b869c80ed 100644
--- a/tests/queries/0_stateless/01666_lcm_ubsan.sql
+++ b/tests/queries/0_stateless/01666_lcm_ubsan.sql
@@ -4,7 +4,7 @@ SELECT lcm(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT lcm(9223372036854775808, -1); -- { serverError 407 }
-SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
+SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));
diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference
new file mode 100644
index 00000000000..a08a20dc95d
--- /dev/null
+++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference
@@ -0,0 +1,15 @@
+Spin up a long running query
+Check if another query with some marks to read is throttled
+yes
+Check if another query with less marks to read is passed
+0	100
+Modify min_marks_to_honor_max_concurrent_queries to 1
+Check if another query with less marks to read is throttled
+yes
+Modify max_concurrent_queries to 2
+Check if another query is passed
+0	100
+Modify max_concurrent_queries back to 1
+Check if another query with less marks to read is throttled
+yes
+finished	long_running_query	default	select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null
diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
new file mode 100755
index 00000000000..e32a83c9560
--- /dev/null
+++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+function wait_for_query_to_start()
+{
+    while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT sum(read_rows) FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done
+}
+
+${CLICKHOUSE_CLIENT} --multiline --multiquery --query "
+drop table if exists simple;
+
+create table simple (i int, j int) engine = MergeTree order by i
+settings index_granularity = 1, max_concurrent_queries = 1, min_marks_to_honor_max_concurrent_queries = 2;
+
+insert into simple select number, number + 100 from numbers(1000);
+"
+
+echo "Spin up a long running query"
+${CLICKHOUSE_CLIENT} --query "select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null" --query_id "long_running_query" > /dev/null 2>&1 &
+wait_for_query_to_start 'long_running_query'
+
+# query which reads marks >= min_marks_to_honor_max_concurrent_queries is throttled
+echo "Check if another query with some marks to read is throttled"
+${CLICKHOUSE_CLIENT} --query "select * from simple" 2> /dev/null;
+CODE=$?
+[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1;
+echo "yes"
+
+# query which reads marks less than min_marks_to_honor_max_concurrent_queries is allowed
+echo "Check if another query with less marks to read is passed"
+${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0"
+
+# We can modify the settings to take effect for future queries
+echo "Modify min_marks_to_honor_max_concurrent_queries to 1"
+${CLICKHOUSE_CLIENT} --query "alter table simple modify setting min_marks_to_honor_max_concurrent_queries = 1"
+
+# Now smaller queries are also throttled
+echo "Check if another query with less marks to read is throttled"
+${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" 2> /dev/null;
+CODE=$?
+[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1;
+echo "yes"
+
+echo "Modify max_concurrent_queries to 2"
+${CLICKHOUSE_CLIENT} --query "alter table simple modify setting max_concurrent_queries = 2"
+
+# Now more queries are accepted
+echo "Check if another query is passed"
+${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0"
+
+echo "Modify max_concurrent_queries back to 1"
+${CLICKHOUSE_CLIENT} --query "alter table simple modify setting max_concurrent_queries = 1"
+
+# Now queries are throttled again
+echo "Check if another query with less marks to read is throttled"
+${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" 2> /dev/null;
+CODE=$?
+[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1;
+echo "yes"
+
+${CLICKHOUSE_CLIENT} --query "KILL QUERY WHERE query_id = 'long_running_query' SYNC"
+wait
+
+${CLICKHOUSE_CLIENT} --multiline --multiquery --query "
+drop table simple
+"
diff --git a/tests/queries/0_stateless/01669_columns_declaration_serde.sql b/tests/queries/0_stateless/01669_columns_declaration_serde.sql
index 8e3354d63cd..a6bf1184e9f 100644
--- a/tests/queries/0_stateless/01669_columns_declaration_serde.sql
+++ b/tests/queries/0_stateless/01669_columns_declaration_serde.sql
@@ -22,12 +22,12 @@ DROP TABLE IF EXISTS test_r1;
 DROP TABLE IF EXISTS test_r2;
 
 CREATE TABLE test_r1 (x UInt64, "\\" String DEFAULT '\r\n\t\\' || '
-') ENGINE = ReplicatedMergeTree('/clickhouse/test', 'r1') ORDER BY "\\";
+') ENGINE = ReplicatedMergeTree('/clickhouse/test_01669', 'r1') ORDER BY "\\";
 
 INSERT INTO test_r1 ("\\") VALUES ('\\');
 
 CREATE TABLE test_r2 (x UInt64, "\\" String DEFAULT '\r\n\t\\' || '
-') ENGINE = ReplicatedMergeTree('/clickhouse/test', 'r2') ORDER BY "\\";
+') ENGINE = ReplicatedMergeTree('/clickhouse/test_01669', 'r2') ORDER BY "\\";
 
 SYSTEM SYNC REPLICA test_r2;
 
diff --git a/tests/queries/0_stateless/01671_ddl_hang_timeout.reference b/tests/queries/0_stateless/01671_ddl_hang_timeout.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01671_ddl_hang_timeout.sh b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
new file mode 100755
index 00000000000..2ca97e3978b
--- /dev/null
+++ b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+function thread_create_drop_table {
+    while true; do
+        REPLICA=$(($RANDOM % 10))
+        $CLICKHOUSE_CLIENT --query "CREATE TABLE IF NOT EXISTS t1 (x UInt64, s Array(Nullable(String))) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01671/test_01671', 'r_$REPLICA') order by x" 2>/dev/null
+        sleep 0.0$RANDOM
+        $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t1"
+    done
+}
+
+function thread_alter_table {
+    while true; do
+        $CLICKHOUSE_CLIENT --query "ALTER TABLE $CLICKHOUSE_DATABASE.t1 on cluster test_shard_localhost ADD COLUMN newcol UInt32" >/dev/null 2>&1
+        sleep 0.0$RANDOM
+    done
+}
+
+export -f thread_create_drop_table
+export -f thread_alter_table
+timeout 20 bash -c "thread_create_drop_table" &
+timeout 20 bash -c 'thread_alter_table' &
+wait
+sleep 1
+
+$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t1";
diff --git a/tests/queries/0_stateless/01671_merge_join_and_constants.reference b/tests/queries/0_stateless/01671_merge_join_and_constants.reference
index 114fc9ff91f..efd814df893 100644
--- a/tests/queries/0_stateless/01671_merge_join_and_constants.reference
+++ b/tests/queries/0_stateless/01671_merge_join_and_constants.reference
@@ -3,3 +3,4 @@
 │ b │ 2018-01-01 │ B │ 2018-01-01 │ 0.10      │
 │ c │ 2018-01-01 │ C │ 2018-01-01 │ 0.10      │
 └───┴────────────┴───┴────────────┴───────────┘
+\N	\N	\N	\N	0	0
diff --git a/tests/queries/0_stateless/01671_merge_join_and_constants.sql b/tests/queries/0_stateless/01671_merge_join_and_constants.sql
index 8611a96a723..c34f8e6705d 100644
--- a/tests/queries/0_stateless/01671_merge_join_and_constants.sql
+++ b/tests/queries/0_stateless/01671_merge_join_and_constants.sql
@@ -11,5 +11,7 @@ set join_algorithm = 'partial_merge';
 
 SELECT * FROM table1 AS t1 ALL LEFT JOIN (SELECT *, '0.10', c, d AS b FROM table2) AS t2 USING (a, b) ORDER BY d ASC FORMAT PrettyCompact settings max_rows_in_join = 1;
 
+SELECT pow('0.0000000257', NULL), pow(pow(NULL, NULL), NULL) - NULL, (val + NULL) = (rval * 0), * FROM (SELECT (val + 256) = (NULL * NULL), toLowCardinality(toNullable(dummy)) AS val FROM system.one) AS s1 ANY LEFT JOIN (SELECT toLowCardinality(dummy) AS rval FROM system.one) AS s2 ON (val + 0) = (rval * 255) settings max_rows_in_join = 1;
+
 DROP TABLE IF EXISTS table1;
 DROP TABLE IF EXISTS table2;
diff --git a/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.reference b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.reference
new file mode 100644
index 00000000000..9daeafb9864
--- /dev/null
+++ b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.reference
@@ -0,0 +1 @@
+test
diff --git a/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.sh b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.sh
new file mode 100755
index 00000000000..ee75f675eb3
--- /dev/null
+++ b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT --param_paramName="test" -q "WITH subquery AS (SELECT {paramName:String}) SELECT * FROM subquery"
diff --git a/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.reference b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.reference
new file mode 100644
index 00000000000..0777c3c2625
--- /dev/null
+++ b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.reference
@@ -0,0 +1,4 @@
+Value
+Value
+Value
+Value
diff --git a/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.sql b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.sql
new file mode 100644
index 00000000000..c98cb0a5e0f
--- /dev/null
+++ b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.sql
@@ -0,0 +1,5 @@
+SELECT dictGet('simple_executable_cache_dictionary_no_implicit_key', 'value', toUInt64(1));
+SELECT dictGet('simple_executable_cache_dictionary_implicit_key', 'value', toUInt64(1));
+
+SELECT dictGet('complex_executable_cache_dictionary_no_implicit_key', 'value', (toUInt64(1), 'FirstKey'));
+SELECT dictGet('complex_executable_cache_dictionary_implicit_key', 'value', (toUInt64(1), 'FirstKey'));
diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.reference b/tests/queries/0_stateless/01674_filter_by_uint8.reference
new file mode 100644
index 00000000000..6b522898280
--- /dev/null
+++ b/tests/queries/0_stateless/01674_filter_by_uint8.reference
@@ -0,0 +1,8 @@
+0
+0
+255
+1	['foo','bar']	1	1
+2	['foo','bar']	2	1
+3	['foo','bar']	3	1
+4	['foo','bar']	4	1
+5	['foo','bar']	5	1
diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.sql b/tests/queries/0_stateless/01674_filter_by_uint8.sql
new file mode 100644
index 00000000000..960153d9c5a
--- /dev/null
+++ b/tests/queries/0_stateless/01674_filter_by_uint8.sql
@@ -0,0 +1,14 @@
+-- ORDER BY is to trigger comparison at uninitialized memory after bad filtering.
+SELECT ignore(number) FROM numbers(256) ORDER BY arrayFilter(x -> materialize(255), materialize([257])) LIMIT 1;
+SELECT ignore(number) FROM numbers(256) ORDER BY arrayFilter(x -> materialize(255), materialize(['257'])) LIMIT 1;
+
+SELECT count() FROM numbers(256) WHERE toUInt8(number);
+
+DROP TABLE IF EXISTS t_filter;
+CREATE TABLE t_filter(s String, a Array(FixedString(3)), u UInt64, f UInt8)
+ENGINE = MergeTree ORDER BY u;
+
+INSERT INTO t_filter SELECT toString(number), ['foo', 'bar'], number, toUInt8(number) FROM numbers(1000);
+SELECT * FROM t_filter WHERE f LIMIT 5;
+
+DROP TABLE IF EXISTS t_filter;
diff --git a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference
new file mode 100644
index 00000000000..63b3707b9b4
--- /dev/null
+++ b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference
@@ -0,0 +1,9 @@
+
+
+Here is CDTATA.
+This is a white space test.
+This is a complex test. <script type="text/javascript">Hello, world</script> world <style> hello
+hello, world
+
+hello, world
+white space collapse
diff --git a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
new file mode 100644
index 00000000000..65c243687c1
--- /dev/null
+++ b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
@@ -0,0 +1,15 @@
+SELECT htmlOrXmlCoarseParse('<script>Here is script.</script>');
+SELECT htmlOrXmlCoarseParse('<style>Here is style.</style>');
+SELECT htmlOrXmlCoarseParse('<![CDATA[Here is CDTATA.]]>');
+SELECT htmlOrXmlCoarseParse('This is a     white   space test.');
+SELECT htmlOrXmlCoarseParse('This is a complex test. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><![CDATA[<script type="text/javascript">Hello, world</script> ]]><hello />world<![CDATA[ <style> ]]> hello</style>\n<script><![CDATA[</script>]]>hello</script>\n</html>');
+DROP TABLE IF EXISTS defaults;
+CREATE TABLE defaults
+(
+    stringColumn String
+) ENGINE = Memory();
+
+INSERT INTO defaults values ('<common tag>hello, world<tag>'), ('<script desc=content> some content </script>'), ('<![CDATA[hello, world]]>'), ('white space    collapse');
+
+SELECT htmlOrXmlCoarseParse(stringColumn) FROM defaults;
+DROP table defaults;
diff --git a/tests/queries/0_stateless/01674_unicode_asan.reference b/tests/queries/0_stateless/01674_unicode_asan.reference
new file mode 100644
index 00000000000..aa47d0d46d4
--- /dev/null
+++ b/tests/queries/0_stateless/01674_unicode_asan.reference
@@ -0,0 +1,2 @@
+0
+0
diff --git a/tests/queries/0_stateless/01674_unicode_asan.sql b/tests/queries/0_stateless/01674_unicode_asan.sql
new file mode 100644
index 00000000000..2ba34b46f93
--- /dev/null
+++ b/tests/queries/0_stateless/01674_unicode_asan.sql
@@ -0,0 +1,3 @@
+SELECT positionCaseInsensitiveUTF8('иголка.ру', 'иголка.р�\0') AS res;
+SELECT positionCaseInsensitiveUTF8('иголка.ру', randomString(rand() % 100)) FROM system.numbers; -- { serverError 2 }
+SELECT sum(ignore(positionCaseInsensitiveUTF8('иголка.ру', randomString(rand() % 2)))) FROM numbers(1000000);
diff --git a/tests/queries/0_stateless/01674_where_prewhere_array_crash.reference b/tests/queries/0_stateless/01674_where_prewhere_array_crash.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql b/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql
new file mode 100644
index 00000000000..d6eef000b36
--- /dev/null
+++ b/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql
@@ -0,0 +1,5 @@
+drop table if exists tab;
+create table tab  (x UInt64, `arr.a` Array(UInt64), `arr.b` Array(UInt64)) engine = MergeTree order by x;
+select x from tab array join arr prewhere x != 0 where arr; -- { serverError 47; }
+select x from tab array join arr prewhere arr where x != 0; -- { serverError 47; }
+drop table if exists tab;
diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.reference b/tests/queries/0_stateless/01675_data_type_coroutine.reference
new file mode 100644
index 00000000000..7326d960397
--- /dev/null
+++ b/tests/queries/0_stateless/01675_data_type_coroutine.reference
@@ -0,0 +1 @@
+Ok
diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.sh b/tests/queries/0_stateless/01675_data_type_coroutine.sh
new file mode 100755
index 00000000000..781e43e4134
--- /dev/null
+++ b/tests/queries/0_stateless/01675_data_type_coroutine.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+I=0
+while true
+do
+    I=$((I + 1))
+    TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I")
+    ${CLICKHOUSE_CLIENT} --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break;
+done
+
+#echo "I = ${I}"
+echo 'Ok'
diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
new file mode 100755
index 00000000000..08e07044841
--- /dev/null
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+function test_completion_word()
+{
+    local w=$1 && shift
+
+    local w_len=${#w}
+    local compword_begin=${w:0:$((w_len-3))}
+    local compword_end=${w:$((w_len-3))}
+
+    # NOTE: here and below you should escape variables of the expect.
+    timeout 22s expect << EOF
+log_user 0
+set timeout 3
+match_max 100000
+# A default timeout action is to do nothing, change it to fail
+expect_after {
+    timeout {
+        exit 1
+    }
+}
+
+spawn bash -c "$CLICKHOUSE_CLIENT_BINARY $CLICKHOUSE_CLIENT_OPT"
+expect ":) "
+
+# Make a query
+send -- "SET $compword_begin"
+expect "SET $compword_begin"
+
+# Wait for suggestions to load, they are loaded in background
+set is_done 0
+while {\$is_done == 0} {
+    send -- "\\t"
+    expect {
+        "$compword_begin$compword_end" {
+            set is_done 1
+        }
+        default {
+            sleep 1
+        }
+    }
+}
+
+send -- "\\3\\4"
+expect eof
+EOF
+}
+
+# last 3 bytes will be completed,
+# so take this in mind when you will update the list.
+compwords_positive=(
+    # system.functions
+    concatAssumeInjective
+    # system.table_engines
+    ReplacingMergeTree
+    # system.formats
+    JSONEachRow
+    # system.table_functions
+    clusterAllReplicas
+    # system.data_type_families
+    SimpleAggregateFunction
+    # system.merge_tree_settings
+    write_ahead_log_interval_ms_to_fsync
+    # system.settings
+    max_concurrent_queries_for_all_users
+    # system.clusters
+    test_shard_localhost
+    # system.errors, also it is very rare to cover system_events_show_zero_values
+    CONDITIONAL_TREE_PARENT_NOT_FOUND
+    # system.events, also it is very rare to cover system_events_show_zero_values
+    WriteBufferFromFileDescriptorWriteFailed
+    # system.asynchronous_metrics, also this metric has zero value
+    #
+    # NOTE: that there is no ability to complete metrics like
+    # jemalloc.background_thread.num_runs, due to "." is used as a word breaker
+    # (and this cannot be changed -- db.table)
+    ReplicasMaxAbsoluteDelay
+    # system.metrics
+    PartsPreCommitted
+    # system.macros
+    default_path_test
+    # system.storage_policies, egh not uniq
+    default
+    # system.aggregate_function_combinators
+    uniqCombined64ForEach
+
+    # FIXME: one may add separate case for suggestion_limit
+    # system.databases
+    system
+    # system.tables
+    aggregate_function_combinators
+    # system.columns
+    primary_key_bytes_in_memory_allocated
+    # system.dictionaries
+    # FIXME: none
+)
+for w in "${compwords_positive[@]}"; do
+    test_completion_word "$w" || echo "[FAIL] $w (positive)"
+done
+
+# One negative is enough
+compwords_negative=(
+    # system.clusters
+    test_shard_localhost_no_such_cluster
+)
+for w in "${compwords_negative[@]}"; do
+    test_completion_word "$w" && echo "[FAIL] $w (negative)"
+done
+
+exit 0
diff --git a/tests/queries/0_stateless/01676_dictget_in_default_expression.reference b/tests/queries/0_stateless/01676_dictget_in_default_expression.reference
new file mode 100644
index 00000000000..608f9904d26
--- /dev/null
+++ b/tests/queries/0_stateless/01676_dictget_in_default_expression.reference
@@ -0,0 +1,11 @@
+2	20
+3	15
+status:
+LOADED
+status_after_detach_and_attach:
+NOT_LOADED
+2	20
+3	15
+4	40
+status:
+LOADED
diff --git a/tests/queries/0_stateless/01676_dictget_in_default_expression.sql b/tests/queries/0_stateless/01676_dictget_in_default_expression.sql
new file mode 100644
index 00000000000..cd4feaf5709
--- /dev/null
+++ b/tests/queries/0_stateless/01676_dictget_in_default_expression.sql
@@ -0,0 +1,31 @@
+DROP DATABASE IF EXISTS test_01676 SYNC;
+
+CREATE DATABASE test_01676;
+
+CREATE TABLE test_01676.dict_data (key UInt64, value UInt64) ENGINE=MergeTree ORDER BY tuple();
+INSERT INTO test_01676.dict_data VALUES (2,20), (3,30), (4,40), (5,50);
+
+CREATE DICTIONARY test_01676.dict (key UInt64, value UInt64) PRIMARY KEY key SOURCE(CLICKHOUSE(DB 'test_01676' TABLE 'dict_data' HOST '127.0.0.1' PORT tcpPort())) LIFETIME(0) LAYOUT(HASHED());
+
+CREATE TABLE test_01676.table (x UInt64, y UInt64 DEFAULT dictGet('test_01676.dict', 'value', x)) ENGINE=MergeTree ORDER BY tuple();
+INSERT INTO test_01676.table (x) VALUES (2);
+INSERT INTO test_01676.table VALUES (toUInt64(3), toUInt64(15));
+
+SELECT * FROM test_01676.table ORDER BY x;
+
+SELECT 'status:';
+SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict';
+
+DETACH DATABASE test_01676;
+ATTACH DATABASE test_01676;
+
+SELECT 'status_after_detach_and_attach:';
+SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict';
+
+INSERT INTO test_01676.table (x) VALUES (toInt64(4));
+SELECT * FROM test_01676.table ORDER BY x;
+
+SELECT 'status:';
+SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict';
+
+DROP DATABASE test_01676;
diff --git a/tests/queries/0_stateless/01676_reinterpret_as.reference b/tests/queries/0_stateless/01676_reinterpret_as.reference
new file mode 100644
index 00000000000..459ca166dc1
--- /dev/null
+++ b/tests/queries/0_stateless/01676_reinterpret_as.reference
@@ -0,0 +1,41 @@
+Into String
+1
+Into FixedString
+1
+1\0
+1\0\0
+1\0\0\0
+1
+Into Numeric Representable
+Integer and Integer types
+1	1
+1	1
+257	257
+257	257
+257	257
+257	257
+257	257
+257	257
+257	257
+257	257
+257	257
+Integer and Float types
+1045220557	1045220557
+4596373779694328218	4596373779694328218
+0.2	1045220557
+0.2	4596373779694328218
+Integer and String types
+1	1	49
+1	1	49
+11	11	12593
+Dates
+1970-01-01	1970-01-01
+1970-01-01 03:00:00	1970-01-01 03:00:00
+1970-01-01 03:00:00.000	1970-01-01 03:00:00.000
+Decimals
+5.00	0.49
+5.00	0.49
+5.00	0.49
+5.00	0.49
+0.00
+ReinterpretErrors
diff --git a/tests/queries/0_stateless/01676_reinterpret_as.sql b/tests/queries/0_stateless/01676_reinterpret_as.sql
new file mode 100644
index 00000000000..5eb94ed0a13
--- /dev/null
+++ b/tests/queries/0_stateless/01676_reinterpret_as.sql
@@ -0,0 +1,42 @@
+SELECT 'Into String';
+SELECT reinterpret(49, 'String');
+SELECT 'Into FixedString';
+SELECT reinterpret(49, 'FixedString(1)');
+SELECT reinterpret(49, 'FixedString(2)');
+SELECT reinterpret(49, 'FixedString(3)');
+SELECT reinterpret(49, 'FixedString(4)');
+SELECT reinterpretAsFixedString(49);
+SELECT 'Into Numeric Representable';
+SELECT 'Integer and Integer types';
+SELECT reinterpret(257, 'UInt8'), reinterpretAsUInt8(257);
+SELECT reinterpret(257, 'Int8'), reinterpretAsInt8(257);
+SELECT reinterpret(257, 'UInt16'), reinterpretAsUInt16(257);
+SELECT reinterpret(257, 'Int16'), reinterpretAsInt16(257);
+SELECT reinterpret(257, 'UInt32'), reinterpretAsUInt32(257);
+SELECT reinterpret(257, 'Int32'), reinterpretAsInt32(257);
+SELECT reinterpret(257, 'UInt64'), reinterpretAsUInt64(257);
+SELECT reinterpret(257, 'Int64'), reinterpretAsInt64(257);
+SELECT reinterpret(257, 'Int128'), reinterpretAsInt128(257);
+SELECT reinterpret(257, 'UInt256'), reinterpretAsUInt256(257);
+SELECT reinterpret(257, 'Int256'), reinterpretAsInt256(257);
+SELECT 'Integer and Float types';
+SELECT reinterpret(toFloat32(0.2), 'UInt32'), reinterpretAsUInt32(toFloat32(0.2));
+SELECT reinterpret(toFloat64(0.2), 'UInt64'), reinterpretAsUInt64(toFloat64(0.2));
+SELECT reinterpretAsFloat32(a), reinterpretAsUInt32(toFloat32(0.2)) as a;
+SELECT reinterpretAsFloat64(a), reinterpretAsUInt64(toFloat64(0.2)) as a;
+SELECT 'Integer and String types';
+SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('1') as a;
+SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('11') as a;
+SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt16('11') as a;
+SELECT 'Dates';
+SELECT reinterpret(0, 'Date'), reinterpret('', 'Date');
+SELECT reinterpret(0, 'DateTime'), reinterpret('', 'DateTime');
+SELECT reinterpret(0, 'DateTime64'), reinterpret('', 'DateTime64');
+SELECT 'Decimals';
+SELECT reinterpret(toDecimal32(5, 2), 'Decimal32(2)'), reinterpret('1', 'Decimal32(2)');
+SELECT reinterpret(toDecimal64(5, 2), 'Decimal64(2)'), reinterpret('1', 'Decimal64(2)');;
+SELECT reinterpret(toDecimal128(5, 2), 'Decimal128(2)'), reinterpret('1', 'Decimal128(2)');
+SELECT reinterpret(toDecimal256(5, 2), 'Decimal256(2)'), reinterpret('1', 'Decimal256(2)');
+SELECT reinterpret(toDateTime64(0, 0), 'Decimal64(2)');
+SELECT 'ReinterpretErrors';
+SELECT reinterpret('123', 'FixedString(1)'); -- {serverError 43}
diff --git a/tests/queries/0_stateless/01676_round_int_ubsan.reference b/tests/queries/0_stateless/01676_round_int_ubsan.reference
new file mode 100644
index 00000000000..cec4825deb9
--- /dev/null
+++ b/tests/queries/0_stateless/01676_round_int_ubsan.reference
@@ -0,0 +1 @@
+9223372036854775700
diff --git a/tests/queries/0_stateless/01676_round_int_ubsan.sql b/tests/queries/0_stateless/01676_round_int_ubsan.sql
new file mode 100644
index 00000000000..45aa5706a05
--- /dev/null
+++ b/tests/queries/0_stateless/01676_round_int_ubsan.sql
@@ -0,0 +1,6 @@
+-- Overflow during integer rounding is implementation specific behaviour.
+-- This test allows to be aware if the impkementation changes.
+-- Changing the implementation specific behaviour is Ok.
+-- and should not be treat as incompatibility (simply update test result then).
+
+SELECT round(-9223372036854775808, -2);
diff --git a/tests/queries/0_stateless/01677_array_enumerate_bug.reference b/tests/queries/0_stateless/01677_array_enumerate_bug.reference
new file mode 100644
index 00000000000..9c0e526801f
--- /dev/null
+++ b/tests/queries/0_stateless/01677_array_enumerate_bug.reference
@@ -0,0 +1,2 @@
+[1,1,2]
+[1,1,1]
diff --git a/tests/queries/0_stateless/01677_array_enumerate_bug.sql b/tests/queries/0_stateless/01677_array_enumerate_bug.sql
new file mode 100644
index 00000000000..0db0c51fe5b
--- /dev/null
+++ b/tests/queries/0_stateless/01677_array_enumerate_bug.sql
@@ -0,0 +1,13 @@
+-- there was a bug - missing check of the total size of keys for the case with hash table with 128bit key.
+
+SELECT arrayEnumerateUniq(arrayEnumerateUniq([toInt256(10), toInt256(100), toInt256(2)]), [toInt256(123), toInt256(1023), toInt256(123)]);
+
+SELECT arrayEnumerateUniq(
+    [111111, 222222, 333333],
+    [444444, 555555, 666666],
+    [111111, 222222, 333333],
+    [444444, 555555, 666666],
+    [111111, 222222, 333333],
+    [444444, 555555, 666666],
+    [111111, 222222, 333333],
+    [444444, 555555, 666666]);
diff --git a/tests/queries/0_stateless/01677_bit_float.reference b/tests/queries/0_stateless/01677_bit_float.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01677_bit_float.sql b/tests/queries/0_stateless/01677_bit_float.sql
new file mode 100644
index 00000000000..3692d8ac6a5
--- /dev/null
+++ b/tests/queries/0_stateless/01677_bit_float.sql
@@ -0,0 +1,9 @@
+SELECT bitAnd(0, inf); -- { serverError 43 }
+SELECT bitXor(0, inf); -- { serverError 43 }
+SELECT bitOr(0, inf); -- { serverError 43 }
+SELECT bitTest(inf, 0); -- { serverError 43 }
+SELECT bitTest(0, inf); -- { serverError 43 }
+SELECT bitRotateLeft(inf, 0); -- { serverError 43 }
+SELECT bitRotateRight(inf, 0); -- { serverError 43 }
+SELECT bitShiftLeft(inf, 0); -- { serverError 43 }
+SELECT bitShiftRight(inf, 0); -- { serverError 43 }
diff --git a/tests/queries/0_stateless/01678_great_circle_angle.reference b/tests/queries/0_stateless/01678_great_circle_angle.reference
new file mode 100644
index 00000000000..0373970e3bd
--- /dev/null
+++ b/tests/queries/0_stateless/01678_great_circle_angle.reference
@@ -0,0 +1,5 @@
+0.1224
+0.7071
+0.7135
+10007554
+10007554
diff --git a/tests/queries/0_stateless/01678_great_circle_angle.sql b/tests/queries/0_stateless/01678_great_circle_angle.sql
new file mode 100644
index 00000000000..124c7bfadf2
--- /dev/null
+++ b/tests/queries/0_stateless/01678_great_circle_angle.sql
@@ -0,0 +1,6 @@
+SELECT round(greatCircleAngle(0, 45, 0.1, 45.1), 4);
+SELECT round(greatCircleAngle(0, 45, 1, 45), 4);
+SELECT round(greatCircleAngle(0, 45, 1, 45.1), 4);
+
+SELECT round(greatCircleDistance(0, 0, 0, 90), 4);
+SELECT round(greatCircleDistance(0, 0, 90, 0), 4);
diff --git a/tests/queries/0_stateless/01679_format_readable_time_delta_inf.reference b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.reference
new file mode 100644
index 00000000000..5446cd475b0
--- /dev/null
+++ b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.reference
@@ -0,0 +1,3 @@
+inf
+-inf
+nan
diff --git a/tests/queries/0_stateless/01679_format_readable_time_delta_inf.sql b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.sql
new file mode 100644
index 00000000000..ac92dec2bee
--- /dev/null
+++ b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.sql
@@ -0,0 +1 @@
+SELECT formatReadableTimeDelta(arrayJoin([inf, -inf, nan]));
diff --git a/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.reference b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.reference
new file mode 100644
index 00000000000..d86bac9de59
--- /dev/null
+++ b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.reference
@@ -0,0 +1 @@
+OK
diff --git a/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.sh b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.sh
new file mode 100755
index 00000000000..e8d89c2e45a
--- /dev/null
+++ b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS collapsing_merge_tree"
+
+${CLICKHOUSE_CLIENT} --query "CREATE TABLE collapsing_merge_tree (key UInt32, sign Int8, date Datetime) ENGINE=CollapsingMergeTree(sign) PARTITION BY date ORDER BY key"
+
+${CLICKHOUSE_CLIENT} --query "INSERT INTO collapsing_merge_tree VALUES (1, -117, '2020-01-01')" 2>&1 | grep -q 'Incorrect data: Sign = -117' && echo 'OK' || echo 'FAIL'; 
+
+${CLICKHOUSE_CLIENT} --query "DROP TABLE collapsing_merge_tree;"
+
diff --git a/tests/queries/0_stateless/01680_date_time_add_ubsan.reference b/tests/queries/0_stateless/01680_date_time_add_ubsan.reference
new file mode 100644
index 00000000000..dec7d2fabd2
--- /dev/null
+++ b/tests/queries/0_stateless/01680_date_time_add_ubsan.reference
@@ -0,0 +1 @@
+\N
diff --git a/tests/queries/0_stateless/01680_date_time_add_ubsan.sql b/tests/queries/0_stateless/01680_date_time_add_ubsan.sql
new file mode 100644
index 00000000000..f4690116e1a
--- /dev/null
+++ b/tests/queries/0_stateless/01680_date_time_add_ubsan.sql
@@ -0,0 +1,2 @@
+SELECT DISTINCT result FROM (SELECT toStartOfFifteenMinutes(toDateTime(toStartOfFifteenMinutes(toDateTime(1000.0001220703125) + (number * 65536))) + (number * 9223372036854775807)) AS result FROM system.numbers LIMIT 1048576) ORDER BY result DESC NULLS FIRST FORMAT Null;
+SELECT round(round(round(round(round(100)), round(round(round(round(NULL), round(65535)), toTypeName(now() + 9223372036854775807) LIKE 'DateTime%DateTime%DateTime%DateTime%', round(-2)), 255), round(NULL))));
diff --git a/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.reference b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.sql b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.sql
new file mode 100644
index 00000000000..181e7109cd4
--- /dev/null
+++ b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.sql
@@ -0,0 +1 @@
+SELECT count() FROM (SELECT 2000 AS d_year UNION DISTINCT SELECT 2000 AS d_year) WHERE d_year = 2002
diff --git a/tests/queries/0_stateless/01681_arg_min_max_if_fix.reference b/tests/queries/0_stateless/01681_arg_min_max_if_fix.reference
new file mode 100644
index 00000000000..75a0b4104b3
--- /dev/null
+++ b/tests/queries/0_stateless/01681_arg_min_max_if_fix.reference
@@ -0,0 +1 @@
+0	0	2
diff --git a/tests/queries/0_stateless/01681_arg_min_max_if_fix.sql b/tests/queries/0_stateless/01681_arg_min_max_if_fix.sql
new file mode 100644
index 00000000000..b0aab898536
--- /dev/null
+++ b/tests/queries/0_stateless/01681_arg_min_max_if_fix.sql
@@ -0,0 +1 @@
+SELECT bitAnd(number, toUInt64(pow(257, 20) - 1048576)) AS k, argMaxIf(k, if((number % 255) = 256, toInt256(65535), number), number > 42), uniq(number) AS u FROM numbers(2) GROUP BY toInt256(-2, NULL), k;
diff --git a/tests/queries/0_stateless/01681_bloom_filter_nullable_column.reference b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.reference
new file mode 100644
index 00000000000..2616e6c2a5c
--- /dev/null
+++ b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.reference
@@ -0,0 +1,10 @@
+NullableTuple with transform_null_in=0
+NullableTuple with transform_null_in=1
+NullableColumnFromCast with transform_null_in=0
+1	test
+NullableColumnFromCast with transform_null_in=1
+1	test
+NullableColumnFromTable with transform_null_in=0
+1	test
+NullableColumnFromTable with transform_null_in=1
+1	test
diff --git a/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql
new file mode 100644
index 00000000000..4af1f74fca6
--- /dev/null
+++ b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql
@@ -0,0 +1,51 @@
+DROP TABLE IF EXISTS bloom_filter_nullable_index;
+CREATE TABLE bloom_filter_nullable_index
+    (
+        order_key UInt64,
+        str Nullable(String),
+
+        INDEX idx (str) TYPE bloom_filter GRANULARITY 1
+    )
+    ENGINE = MergeTree() 
+    ORDER BY order_key SETTINGS index_granularity = 6;
+
+INSERT INTO bloom_filter_nullable_index VALUES (1, 'test');
+INSERT INTO bloom_filter_nullable_index VALUES (2, 'test2');
+
+SELECT 'NullableTuple with transform_null_in=0';
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 0;
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 0;
+
+SELECT 'NullableTuple with transform_null_in=1';
+
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 1; -- { serverError 20 }
+
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 1; -- { serverError 20 }
+
+
+SELECT 'NullableColumnFromCast with transform_null_in=0';
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT cast('test', 'Nullable(String)')) SETTINGS transform_null_in = 0;
+
+SELECT 'NullableColumnFromCast with transform_null_in=1';
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT cast('test', 'Nullable(String)')) SETTINGS transform_null_in = 1;
+
+DROP TABLE IF EXISTS nullable_string_value;
+CREATE TABLE nullable_string_value (value Nullable(String)) ENGINE=TinyLog;
+INSERT INTO nullable_string_value VALUES ('test');
+
+SELECT 'NullableColumnFromTable with transform_null_in=0';
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT value FROM nullable_string_value) SETTINGS transform_null_in = 0;
+
+SELECT 'NullableColumnFromTable with transform_null_in=1';
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT value FROM nullable_string_value) SETTINGS transform_null_in = 1;
+
+DROP TABLE nullable_string_value; 
+DROP TABLE bloom_filter_nullable_index;
diff --git a/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference
new file mode 100644
index 00000000000..7326d960397
--- /dev/null
+++ b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference
@@ -0,0 +1 @@
+Ok
diff --git a/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sh b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sh
new file mode 100755
index 00000000000..0bbf8942c1a
--- /dev/null
+++ b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# We throw our own exception from operator new.
+# In previous versions of Hyperscan it triggered debug assertion as it only expected std::bad_alloc.
+
+M=1000000
+
+while true
+do
+    $CLICKHOUSE_CLIENT --allow_hyperscan 1 --max_memory_usage $M --format Null --query "
+        SELECT [1, 2, 3, 11] = arraySort(multiMatchAllIndices('фабрикант', ['', 'рикан', 'а', 'f[a${RANDOM}e]b[ei]rl', 'ф[иа${RANDOM}эе]б[еэи][рпл]', 'афиукд', 'a[f${RANDOM}t],th', '^ф[аие${RANDOM}э]?б?[еэи]?$', 'бе${RANDOM}рлик', 'fa${RANDOM}b', 'фа[беьв]+е?[рл${RANDOM}ко]']))
+    " 2>&1 | grep -q 'Memory limit' || break;
+
+    M=$((M + 100000))
+done
+
+echo 'Ok'
diff --git a/tests/queries/0_stateless/01682_gather_utils_ubsan.reference b/tests/queries/0_stateless/01682_gather_utils_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01682_gather_utils_ubsan.sql b/tests/queries/0_stateless/01682_gather_utils_ubsan.sql
new file mode 100644
index 00000000000..2388586e8fe
--- /dev/null
+++ b/tests/queries/0_stateless/01682_gather_utils_ubsan.sql
@@ -0,0 +1 @@
+SELECT arrayResize([1, 2, 3], -9223372036854775808); -- { serverError 128 }
diff --git a/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.reference b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.reference
new file mode 100644
index 00000000000..be589c9ceb0
--- /dev/null
+++ b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.reference
@@ -0,0 +1,4 @@
+1
+1
+2
+2
diff --git a/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.sql b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.sql
new file mode 100644
index 00000000000..eaf15ed9fd8
--- /dev/null
+++ b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.sql
@@ -0,0 +1,23 @@
+DROP TABLE IF EXISTS tmp_01683;
+DROP TABLE IF EXISTS dist_01683;
+
+SET prefer_localhost_replica=0;
+-- To suppress "Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done."
+SET send_logs_level='error';
+
+CREATE TABLE tmp_01683 (n Int8) ENGINE=Memory;
+CREATE TABLE dist_01683 (n UInt64) Engine=Distributed(test_cluster_two_shards, currentDatabase(), tmp_01683, n);
+
+SET insert_distributed_sync=1;
+INSERT INTO dist_01683 VALUES (1),(2);
+
+SET insert_distributed_sync=0;
+INSERT INTO dist_01683 VALUES (1),(2);
+SYSTEM FLUSH DISTRIBUTED dist_01683;
+
+-- TODO: cover distributed_directory_monitor_batch_inserts=1
+
+SELECT * FROM tmp_01683 ORDER BY n;
+
+DROP TABLE tmp_01683;
+DROP TABLE dist_01683;
diff --git a/tests/queries/0_stateless/01683_intdiv_ubsan.reference b/tests/queries/0_stateless/01683_intdiv_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01683_intdiv_ubsan.sql b/tests/queries/0_stateless/01683_intdiv_ubsan.sql
new file mode 100644
index 00000000000..adac2505745
--- /dev/null
+++ b/tests/queries/0_stateless/01683_intdiv_ubsan.sql
@@ -0,0 +1 @@
+SELECT DISTINCT intDiv(number, nan) FROM numbers(10); -- { serverError 153 }
diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.reference b/tests/queries/0_stateless/01683_text_log_deadlock.reference
new file mode 100644
index 00000000000..76de47c4a46
--- /dev/null
+++ b/tests/queries/0_stateless/01683_text_log_deadlock.reference
@@ -0,0 +1 @@
+queries 25000
diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.sh b/tests/queries/0_stateless/01683_text_log_deadlock.sh
new file mode 100755
index 00000000000..ee772bffa27
--- /dev/null
+++ b/tests/queries/0_stateless/01683_text_log_deadlock.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_BENCHMARK --secure -i 25000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries 25000'
diff --git a/tests/queries/0_stateless/01684_geohash_ubsan.reference b/tests/queries/0_stateless/01684_geohash_ubsan.reference
new file mode 100644
index 00000000000..2ae4be53dea
--- /dev/null
+++ b/tests/queries/0_stateless/01684_geohash_ubsan.reference
@@ -0,0 +1 @@
+['ypzpgxczgpyr']
diff --git a/tests/queries/0_stateless/01684_geohash_ubsan.sql b/tests/queries/0_stateless/01684_geohash_ubsan.sql
new file mode 100644
index 00000000000..e7eb9c526b4
--- /dev/null
+++ b/tests/queries/0_stateless/01684_geohash_ubsan.sql
@@ -0,0 +1 @@
+SELECT geohashesInBox(100.0000991821289, 100.0000991821289, 1000.0001220703125, 1000.0001220703125, 0);
diff --git a/tests/queries/0_stateless/01684_insert_specify_shard_id.reference b/tests/queries/0_stateless/01684_insert_specify_shard_id.reference
new file mode 100644
index 00000000000..1cd79d87135
--- /dev/null
+++ b/tests/queries/0_stateless/01684_insert_specify_shard_id.reference
@@ -0,0 +1,120 @@
+0
+0
+1
+1
+2
+2
+3
+3
+4
+4
+5
+5
+6
+6
+7
+7
+8
+8
+9
+9
+0
+0
+1
+1
+2
+2
+3
+3
+4
+4
+5
+5
+6
+6
+7
+7
+8
+8
+9
+9
+0
+0
+1
+1
+2
+2
+3
+3
+4
+4
+5
+5
+6
+6
+7
+7
+8
+8
+9
+9
+10
+10
+11
+11
+12
+12
+13
+13
+14
+14
+15
+15
+16
+16
+17
+17
+18
+18
+19
+19
+0
+0
+1
+1
+2
+2
+3
+3
+4
+4
+5
+5
+6
+6
+7
+7
+8
+8
+9
+9
+10
+10
+11
+11
+12
+12
+13
+13
+14
+14
+15
+15
+16
+16
+17
+17
+18
+18
+19
+19
diff --git a/tests/queries/0_stateless/01684_insert_specify_shard_id.sql b/tests/queries/0_stateless/01684_insert_specify_shard_id.sql
new file mode 100644
index 00000000000..ce1c7807b59
--- /dev/null
+++ b/tests/queries/0_stateless/01684_insert_specify_shard_id.sql
@@ -0,0 +1,37 @@
+DROP TABLE IF EXISTS x;
+DROP TABLE IF EXISTS x_dist;
+DROP TABLE IF EXISTS y;
+DROP TABLE IF EXISTS y_dist;
+
+CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number;
+CREATE TABLE y AS system.numbers ENGINE = MergeTree ORDER BY number;
+
+CREATE TABLE x_dist as x ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), x);
+CREATE TABLE y_dist as y ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), y);
+
+-- insert into first shard
+INSERT INTO x_dist SELECT * FROM numbers(10) settings insert_shard_id = 1;
+INSERT INTO y_dist SELECT * FROM numbers(10) settings insert_shard_id = 1;
+
+SELECT * FROM x_dist ORDER by number;
+SELECT * FROM y_dist ORDER by number;
+
+-- insert into second shard
+INSERT INTO x_dist SELECT * FROM numbers(10, 10) settings insert_shard_id = 2;
+INSERT INTO y_dist SELECT * FROM numbers(10, 10) settings insert_shard_id = 2;
+
+SELECT * FROM x_dist ORDER by number;
+SELECT * FROM y_dist ORDER by number;
+
+-- no sharding key
+INSERT INTO x_dist SELECT * FROM numbers(10); -- { serverError 55 }
+INSERT INTO y_dist SELECT * FROM numbers(10); -- { serverError 55 }
+
+-- invalid shard id
+INSERT INTO x_dist SELECT * FROM numbers(10) settings insert_shard_id = 3; -- { serverError 1003 }
+INSERT INTO y_dist SELECT * FROM numbers(10) settings insert_shard_id = 3; -- { serverError 1003 }
+
+DROP TABLE x;
+DROP TABLE x_dist;
+DROP TABLE y;
+DROP TABLE y_dist;
diff --git a/tests/queries/0_stateless/01685_json_extract_double_as_float.reference b/tests/queries/0_stateless/01685_json_extract_double_as_float.reference
new file mode 100644
index 00000000000..f3f4206b425
--- /dev/null
+++ b/tests/queries/0_stateless/01685_json_extract_double_as_float.reference
@@ -0,0 +1,10 @@
+1.1	1.1	1.1	1.1
+0.01	0.01	0.01	0.01
+0
+\N
+-1e300
+-inf
+0
+0
+0
+0
diff --git a/tests/queries/0_stateless/01685_json_extract_double_as_float.sql b/tests/queries/0_stateless/01685_json_extract_double_as_float.sql
new file mode 100644
index 00000000000..c9aa2518085
--- /dev/null
+++ b/tests/queries/0_stateless/01685_json_extract_double_as_float.sql
@@ -0,0 +1,24 @@
+WITH '{ "v":1.1}' AS raw
+SELECT
+    JSONExtract(raw, 'v', 'float') AS float32_1,
+    JSONExtract(raw, 'v', 'Float32') AS float32_2,
+    JSONExtractFloat(raw, 'v') AS float64_1,
+    JSONExtract(raw, 'v', 'double') AS float64_2;
+
+WITH '{ "v":1E-2}' AS raw
+SELECT
+    JSONExtract(raw, 'v', 'float') AS float32_1,
+    JSONExtract(raw, 'v', 'Float32') AS float32_2,
+    JSONExtractFloat(raw, 'v') AS float64_1,
+    JSONExtract(raw, 'v', 'double') AS float64_2;
+
+SELECT JSONExtract('{"v":1.1}', 'v', 'UInt64');
+SELECT JSONExtract('{"v":1.1}', 'v', 'Nullable(UInt64)');
+
+SELECT JSONExtract('{"v":-1e300}', 'v', 'Float64');
+SELECT JSONExtract('{"v":-1e300}', 'v', 'Float32');
+
+SELECT JSONExtract('{"v":-1e300}', 'v', 'UInt64');
+SELECT JSONExtract('{"v":-1e300}', 'v', 'Int64');
+SELECT JSONExtract('{"v":-1e300}', 'v', 'UInt8');
+SELECT JSONExtract('{"v":-1e300}', 'v', 'Int8');
diff --git a/tests/queries/0_stateless/01686_event_time_microseconds_part_log.reference b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.reference
new file mode 100644
index 00000000000..9766475a418
--- /dev/null
+++ b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.reference
@@ -0,0 +1 @@
+ok
diff --git a/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql
new file mode 100644
index 00000000000..a1b419527d4
--- /dev/null
+++ b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql
@@ -0,0 +1,23 @@
+DROP TABLE IF EXISTS table_with_single_pk;
+
+CREATE TABLE table_with_single_pk
+(
+  key UInt8,
+  value String
+)
+ENGINE = MergeTree
+ORDER BY key;
+
+INSERT INTO table_with_single_pk SELECT number, toString(number % 10) FROM numbers(10000000);
+
+SYSTEM FLUSH LOGS;
+
+WITH (
+         SELECT (event_time, event_time_microseconds)
+         FROM system.part_log
+         ORDER BY event_time DESC
+         LIMIT 1
+    ) AS time
+SELECT if(dateDiff('second', toDateTime(time.2), toDateTime(time.1)) = 0, 'ok', 'fail');
+
+DROP TABLE IF EXISTS table_with_single_pk;
diff --git a/tests/queries/0_stateless/01686_rocksdb.reference b/tests/queries/0_stateless/01686_rocksdb.reference
new file mode 100644
index 00000000000..fa4e12d51ff
--- /dev/null
+++ b/tests/queries/0_stateless/01686_rocksdb.reference
@@ -0,0 +1,15 @@
+123	Hello, world (123)
+--
+--
+123	Hello, world (123)
+4567	Hello, world (4567)
+--
+--
+0	Hello, world (0)
+--
+123	Hello, world (123)
+456	Hello, world (456)
+--
+99	Hello, world (99)
+999	Hello, world (999)
+9999	Hello, world (9999)
diff --git a/tests/queries/0_stateless/01686_rocksdb.sql b/tests/queries/0_stateless/01686_rocksdb.sql
new file mode 100644
index 00000000000..c9b133acff3
--- /dev/null
+++ b/tests/queries/0_stateless/01686_rocksdb.sql
@@ -0,0 +1,27 @@
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB PRIMARY KEY(key);
+
+INSERT INTO test SELECT number, format('Hello, world ({})', toString(number)) FROM numbers(10000);
+
+SELECT * FROM test WHERE key = 123;
+SELECT '--';
+SELECT * FROM test WHERE key = -123;
+SELECT '--';
+SELECT * FROM test WHERE key = 123 OR key = 4567 ORDER BY key;
+SELECT '--';
+SELECT * FROM test WHERE key = NULL;
+SELECT '--';
+SELECT * FROM test WHERE key = NULL OR key = 0;
+SELECT '--';
+SELECT * FROM test WHERE key IN (123, 456, -123) ORDER BY key;
+SELECT '--';
+SELECT * FROM test WHERE key = 'Hello'; -- { serverError 53 }
+
+DETACH TABLE test NO DELAY;
+ATTACH TABLE test;
+
+SELECT * FROM test WHERE key IN (99, 999, 9999, -123) ORDER BY key;
+
+DROP TABLE IF EXISTS test;
+
diff --git a/tests/queries/0_stateless/01690_quantilesTiming_ubsan.reference b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.reference
new file mode 100644
index 00000000000..b3c946cbaec
--- /dev/null
+++ b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.reference
@@ -0,0 +1,2 @@
+[0]
+[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
diff --git a/tests/queries/0_stateless/01690_quantilesTiming_ubsan.sql b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.sql
new file mode 100644
index 00000000000..b2a5ab61efc
--- /dev/null
+++ b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.sql
@@ -0,0 +1,31 @@
+-- NOTE: that due to overflows it may give different result before
+-- quantilesTimingWeighted() had been converted to double:
+--
+-- Before:
+--
+--     SELECT quantilesTimingWeighted(1)(number, 9223372036854775807)
+--     FROM numbers(2)
+--
+--     ┌─quantilesTimingWeighted(1)(number, 9223372036854775807)─┐
+--     │ [1]                                                     │
+--     └─────────────────────────────────────────────────────────┘
+--
+-- After:
+--
+--     SELECT quantilesTimingWeighted(1)(number, 9223372036854775807)
+--     FROM numbers(2)
+--
+--     ┌─quantilesTimingWeighted(1)(number, 9223372036854775807)─┐
+--     │ [0]                                                     │
+--     └─────────────────────────────────────────────────────────┘
+
+SELECT quantilesTimingWeighted(0.1)(number, 9223372036854775807) FROM numbers(2);
+
+-- same UB, but in the inner loop
+SELECT quantilesTimingWeighted(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(number, 9223372036854775807)
+FROM
+(
+    SELECT number
+    FROM system.numbers
+    LIMIT 100
+);
diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.reference b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
new file mode 100644
index 00000000000..3adc9a17e5c
--- /dev/null
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
@@ -0,0 +1,17 @@
+-- { echo }
+SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow');
+1970-01-01 03:00:00.00
+SELECT toDateTime64(-2, 2, 'Europe/Moscow');
+1970-01-01 03:00:00.00
+SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow'));
+1970-01-01 03:00:00
+SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow'));
+2020-01-01 00:00:00
+SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null;
+SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow');
+1970-01-01 03:00:00.00
+SELECT toDateTime64(-2., 2, 'Europe/Moscow');
+1970-01-01 03:00:00.00
+SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow');
+2106-02-07 09:00:00.00
+SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null;
diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.sql b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
new file mode 100644
index 00000000000..92d5a33328f
--- /dev/null
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
@@ -0,0 +1,10 @@
+-- { echo }
+SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow');
+SELECT toDateTime64(-2, 2, 'Europe/Moscow');
+SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow'));
+SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow'));
+SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null;
+SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow');
+SELECT toDateTime64(-2., 2, 'Europe/Moscow');
+SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow');
+SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null;
diff --git a/tests/queries/0_stateless/01691_parser_data_type_exponential.reference b/tests/queries/0_stateless/01691_parser_data_type_exponential.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01691_parser_data_type_exponential.sh b/tests/queries/0_stateless/01691_parser_data_type_exponential.sh
new file mode 100755
index 00000000000..2b1d34982a2
--- /dev/null
+++ b/tests/queries/0_stateless/01691_parser_data_type_exponential.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# Check that DataType parser does not have exponential complexity in the case found by fuzzer.
+for _ in {1..10}; do ${CLICKHOUSE_CLIENT} -n --testmode --query "SELECT CAST(1 AS A2222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222220000000000000000000000000000000000000000000000000000000000000000000000000000002260637443813394204222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpio22222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggre222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 22222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 2222222222222eFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222222222222222222222222200000000000000000000178859639454016722222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpio22222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222222222222222222222222200000000000000000000178859639454016722222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmap, 22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222, 222222222222222ggregateFuncpion(groupBitmap222222222222222222222222222222222222222222222222222222222222222222222222000000000000000000001788596394540167623222222222222222222ggregateFu22222222222222222222222222222222222, UInt33)); -- { clientError 62 }"; done
diff --git a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference
new file mode 100644
index 00000000000..a0562e40027
--- /dev/null
+++ b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference
@@ -0,0 +1,9 @@
+-- { echo }
+select toDateTime64(toDateTime(1), 2);
+1970-01-01 03:00:01.00
+select toDateTime64(toDate(1), 2);
+1970-01-02 00:00:00.00
+select toDateTime64(toDateTime(1), 2, 'GMT');
+1970-01-01 00:00:01.00
+select toDateTime64(toDate(1), 2, 'GMT');
+1970-01-02 00:00:00.00
diff --git a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql
new file mode 100644
index 00000000000..60f76e9192c
--- /dev/null
+++ b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql
@@ -0,0 +1,5 @@
+-- { echo }
+select toDateTime64(toDateTime(1), 2);
+select toDateTime64(toDate(1), 2);
+select toDateTime64(toDateTime(1), 2, 'GMT');
+select toDateTime64(toDate(1), 2, 'GMT');
diff --git a/tests/queries/0_stateless/01698_map_populate_overflow.reference b/tests/queries/0_stateless/01698_map_populate_overflow.reference
new file mode 100644
index 00000000000..24e0038125a
--- /dev/null
+++ b/tests/queries/0_stateless/01698_map_populate_overflow.reference
@@ -0,0 +1 @@
+([18446744073709551615],[0])
diff --git a/tests/queries/0_stateless/01698_map_populate_overflow.sql b/tests/queries/0_stateless/01698_map_populate_overflow.sql
new file mode 100644
index 00000000000..90c47ff3949
--- /dev/null
+++ b/tests/queries/0_stateless/01698_map_populate_overflow.sql
@@ -0,0 +1,2 @@
+SELECT mapPopulateSeries([0xFFFFFFFFFFFFFFFF], [0], 0xFFFFFFFFFFFFFFFF);
+SELECT mapPopulateSeries([toUInt64(1)], [1], 0xFFFFFFFFFFFFFFFF); -- { serverError 128 }
diff --git a/tests/queries/0_stateless/01699_timezoneOffset.reference b/tests/queries/0_stateless/01699_timezoneOffset.reference
new file mode 100644
index 00000000000..e70c5fa62ee
--- /dev/null
+++ b/tests/queries/0_stateless/01699_timezoneOffset.reference
@@ -0,0 +1,183 @@
+DST boundary test for Europe/Moscow:
+0	1981-04-01 22:40:00	10800	355002000
+1	1981-04-01 22:50:00	10800	355002600
+2	1981-04-02 00:00:00	14400	355003200
+3	1981-04-02 00:10:00	14400	355003800
+0	1981-09-30 23:00:00	14400	370724400
+1	1981-09-30 23:10:00	14400	370725000
+2	1981-09-30 23:20:00	14400	370725600
+3	1981-09-30 23:30:00	14400	370726200
+4	1981-09-30 23:40:00	14400	370726800
+5	1981-09-30 23:50:00	14400	370727400
+6	1981-09-30 23:00:00	10800	370728000
+7	1981-09-30 23:10:00	10800	370728600
+8	1981-09-30 23:20:00	10800	370729200
+9	1981-09-30 23:30:00	10800	370729800
+10	1981-09-30 23:40:00	10800	370730400
+11	1981-09-30 23:50:00	10800	370731000
+12	1981-10-01 00:00:00	10800	370731600
+13	1981-10-01 00:10:00	10800	370732200
+14	1981-10-01 00:20:00	10800	370732800
+15	1981-10-01 00:30:00	10800	370733400
+16	1981-10-01 00:40:00	10800	370734000
+17	1981-10-01 00:50:00	10800	370734600
+DST boundary test for Asia/Tehran:
+0	2020-03-21 22:40:00	12600	1584817800
+1	2020-03-21 22:50:00	12600	1584818400
+2	2020-03-22 00:00:00	16200	1584819000
+3	2020-03-22 00:10:00	16200	1584819600
+0	2020-09-20 23:00:00	16200	1600626600
+1	2020-09-20 23:10:00	16200	1600627200
+2	2020-09-20 23:20:00	16200	1600627800
+3	2020-09-20 23:30:00	16200	1600628400
+4	2020-09-20 23:40:00	16200	1600629000
+5	2020-09-20 23:50:00	16200	1600629600
+6	2020-09-20 23:00:00	12600	1600630200
+7	2020-09-20 23:10:00	12600	1600630800
+8	2020-09-20 23:20:00	12600	1600631400
+9	2020-09-20 23:30:00	12600	1600632000
+10	2020-09-20 23:40:00	12600	1600632600
+11	2020-09-20 23:50:00	12600	1600633200
+12	2020-09-21 00:00:00	12600	1600633800
+13	2020-09-21 00:10:00	12600	1600634400
+14	2020-09-21 00:20:00	12600	1600635000
+15	2020-09-21 00:30:00	12600	1600635600
+16	2020-09-21 00:40:00	12600	1600636200
+17	2020-09-21 00:50:00	12600	1600636800
+DST boundary test for Australia/Lord_Howe. This is a special timezone with DST offset is 30mins with the timezone epoc also lays at half hour
+37800
+39600
+DST boundary test for Australia/Lord_Howe:
+0	2020-10-04 01:40:00	37800	1601737800
+1	2020-10-04 01:50:00	37800	1601738400
+2	2020-10-04 02:00:00	39600	1601739000
+3	2020-10-04 02:10:00	39600	1601739600
+0	2019-04-07 01:00:00	39600	1554559200
+1	2019-04-07 01:10:00	39600	1554559800
+2	2019-04-07 01:20:00	39600	1554560400
+3	2019-04-07 01:30:00	39600	1554561000
+4	2019-04-07 01:40:00	39600	1554561600
+5	2019-04-07 01:50:00	39600	1554562200
+6	2019-04-07 01:00:00	37800	1554562800
+7	2019-04-07 01:10:00	37800	1554563400
+8	2019-04-07 01:20:00	37800	1554564000
+9	2019-04-07 02:30:00	37800	1554564600
+10	2019-04-07 02:40:00	37800	1554565200
+11	2019-04-07 02:50:00	37800	1554565800
+12	2019-04-07 02:00:00	37800	1554566400
+13	2019-04-07 02:10:00	37800	1554567000
+14	2019-04-07 02:20:00	37800	1554567600
+15	2019-04-07 03:30:00	37800	1554568200
+16	2019-04-07 03:40:00	37800	1554568800
+17	2019-04-07 03:50:00	37800	1554569400
+4 days test in batch comparing with manually computation result for Europe/Moscow:
+4 days test in batch comparing with manually computation result for Asia/Tehran:
+The result maybe wrong for toDateTime processing Australia/Lord_Howe
+1601739000	2020-10-04 02:00:00	39600	37800
+1601739600	2020-10-04 02:10:00	39600	37800
+1601740200	2020-10-04 02:20:00	39600	37800
+1601740800	2020-10-04 03:30:00	39600	41400
+1601741400	2020-10-04 03:40:00	39600	41400
+1601742000	2020-10-04 03:50:00	39600	41400
+1601742600	2020-10-04 03:00:00	39600	37800
+1601743200	2020-10-04 03:10:00	39600	37800
+1601743800	2020-10-04 03:20:00	39600	37800
+1601744400	2020-10-04 04:30:00	39600	41400
+1601745000	2020-10-04 04:40:00	39600	41400
+1601745600	2020-10-04 04:50:00	39600	41400
+1601746200	2020-10-04 04:00:00	39600	37800
+1601746800	2020-10-04 04:10:00	39600	37800
+1601747400	2020-10-04 04:20:00	39600	37800
+1601748000	2020-10-04 05:30:00	39600	41400
+1554562800	2019-04-07 01:00:00	37800	36000
+1554563400	2019-04-07 01:10:00	37800	36000
+1554564000	2019-04-07 01:20:00	37800	36000
+1554564600	2019-04-07 02:30:00	37800	39600
+1554565200	2019-04-07 02:40:00	37800	39600
+1554565800	2019-04-07 02:50:00	37800	39600
+1554566400	2019-04-07 02:00:00	37800	36000
+1554567000	2019-04-07 02:10:00	37800	36000
+1554567600	2019-04-07 02:20:00	37800	36000
+1554568200	2019-04-07 03:30:00	37800	39600
+1554568800	2019-04-07 03:40:00	37800	39600
+1554569400	2019-04-07 03:50:00	37800	39600
+Moscow DST Years:
+11	1981-06-01 00:00:00	14400
+12	1982-06-01 00:00:00	14400
+13	1983-06-01 00:00:00	14400
+14	1984-06-01 00:00:00	14400
+15	1985-06-01 00:00:00	14400
+16	1986-06-01 00:00:00	14400
+17	1987-06-01 00:00:00	14400
+18	1988-06-01 00:00:00	14400
+19	1989-06-01 00:00:00	14400
+20	1990-06-01 00:00:00	14400
+22	1992-06-01 00:00:00	14400
+23	1993-06-01 00:00:00	14400
+24	1994-06-01 00:00:00	14400
+25	1995-06-01 00:00:00	14400
+26	1996-06-01 00:00:00	14400
+27	1997-06-01 00:00:00	14400
+28	1998-06-01 00:00:00	14400
+29	1999-06-01 00:00:00	14400
+30	2000-06-01 00:00:00	14400
+31	2001-06-01 00:00:00	14400
+32	2002-06-01 00:00:00	14400
+33	2003-06-01 00:00:00	14400
+34	2004-06-01 00:00:00	14400
+35	2005-06-01 00:00:00	14400
+36	2006-06-01 00:00:00	14400
+37	2007-06-01 00:00:00	14400
+38	2008-06-01 00:00:00	14400
+39	2009-06-01 00:00:00	14400
+40	2010-06-01 00:00:00	14400
+41	2011-06-01 00:00:00	14400
+42	2012-06-01 00:00:00	14400
+43	2013-06-01 00:00:00	14400
+44	2014-06-01 00:00:00	14400
+Moscow DST Years with perment DST from 2011-2014:
+2011-01-01 00:00:00	2011-03-27 00:00:00	86	2011_10800
+2011-03-28 00:00:00	2011-12-31 00:00:00	279	2011_14400
+2012-01-01 00:00:00	2012-12-31 00:00:00	366	2012_14400
+2013-01-01 00:00:00	2013-12-31 00:00:00	365	2013_14400
+2014-01-01 00:00:00	2014-10-26 00:00:00	299	2014_14400
+2014-10-27 00:00:00	2014-12-31 00:00:00	66	2014_10800
+Tehran DST Years:
+8	1978-06-01 00:00:00	18000
+9	1979-06-01 00:00:00	16200
+10	1980-06-01 00:00:00	16200
+21	1991-06-01 00:00:00	16200
+22	1992-06-01 00:00:00	16200
+23	1993-06-01 00:00:00	16200
+24	1994-06-01 00:00:00	16200
+25	1995-06-01 00:00:00	16200
+26	1996-06-01 00:00:00	16200
+27	1997-06-01 00:00:00	16200
+28	1998-06-01 00:00:00	16200
+29	1999-06-01 00:00:00	16200
+30	2000-06-01 00:00:00	16200
+31	2001-06-01 00:00:00	16200
+32	2002-06-01 00:00:00	16200
+33	2003-06-01 00:00:00	16200
+34	2004-06-01 00:00:00	16200
+35	2005-06-01 00:00:00	16200
+38	2008-06-01 00:00:00	16200
+39	2009-06-01 00:00:00	16200
+40	2010-06-01 00:00:00	16200
+41	2011-06-01 00:00:00	16200
+42	2012-06-01 00:00:00	16200
+43	2013-06-01 00:00:00	16200
+44	2014-06-01 00:00:00	16200
+45	2015-06-01 00:00:00	16200
+46	2016-06-01 00:00:00	16200
+47	2017-06-01 00:00:00	16200
+48	2018-06-01 00:00:00	16200
+49	2019-06-01 00:00:00	16200
+50	2020-06-01 00:00:00	16200
+Shanghai DST Years:
+16	1986-08-01 00:00:00	32400
+17	1987-08-01 00:00:00	32400
+18	1988-08-01 00:00:00	32400
+19	1989-08-01 00:00:00	32400
+20	1990-08-01 00:00:00	32400
+21	1991-08-01 00:00:00	32400
diff --git a/tests/queries/0_stateless/01699_timezoneOffset.sql b/tests/queries/0_stateless/01699_timezoneOffset.sql
new file mode 100644
index 00000000000..1b3f05ecdd7
--- /dev/null
+++ b/tests/queries/0_stateless/01699_timezoneOffset.sql
@@ -0,0 +1,46 @@
+
+/* Test the DST(daylight saving time) offset changing boundary*/
+SELECT 'DST boundary test for Europe/Moscow:';
+SELECT number,(toDateTime('1981-04-01 22:40:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4);
+SELECT number,(toDateTime('1981-09-30 23:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18);
+
+SELECT 'DST boundary test for Asia/Tehran:';
+SELECT number,(toDateTime('2020-03-21 22:40:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4);
+SELECT number,(toDateTime('2020-09-20 23:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18);
+
+SELECT 'DST boundary test for Australia/Lord_Howe. This is a special timezone with DST offset is 30mins with the timezone epoc also lays at half hour';
+SELECT timezoneOffset(toDateTime('2018-08-21 22:20:00', 'Australia/Lord_Howe'));
+SELECT timezoneOffset(toDateTime('2018-02-21 22:20:00', 'Australia/Lord_Howe'));
+
+SELECT 'DST boundary test for Australia/Lord_Howe:';
+SELECT number,(toDateTime('2020-10-04 01:40:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4);
+SELECT number,(toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18);
+
+
+/* The Batch Part. Test period is whole 4 days*/
+SELECT '4 days test in batch comparing with manually computation result for Europe/Moscow:';
+SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+
+SELECT '4 days test in batch comparing with manually computation result for Asia/Tehran:';
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-09-20 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+
+/* During this test we got unexpected result comes from the toDateTime() function when process the special time zone of 'Australia/Lord_Howe', which may be some kind of bugs. */
+SELECT 'The result maybe wrong for toDateTime processing Australia/Lord_Howe';
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-10-04 01:40:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(18) where res != calc;
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(18) where res != calc;
+
+
+/* Find all the years had followed DST during given period*/
+
+SELECT 'Moscow DST Years:';
+SELECT number, (toDateTime('1970-06-01 00:00:00', 'Europe/Moscow') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 10800;
+SELECT 'Moscow DST Years with perment DST from 2011-2014:';
+SELECT min((toDateTime('2011-01-01 00:00:00', 'Europe/Moscow') + INTERVAL number DAY) as day) as start, max(day) as end, count(1), concat(toString(toYear(day)),'_',toString(timezoneOffset(day)))as DST from numbers(365*4+1) group by DST order by start;
+
+SELECT 'Tehran DST Years:';
+SELECT number, (toDateTime('1970-06-01 00:00:00', 'Asia/Tehran') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 12600;
+SELECT 'Shanghai DST Years:';
+SELECT number, (toDateTime('1970-08-01 00:00:00', 'Asia/Shanghai') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 28800;
+
diff --git a/tests/queries/0_stateless/01700_deltasum.reference b/tests/queries/0_stateless/01700_deltasum.reference
new file mode 100644
index 00000000000..be5b176c627
--- /dev/null
+++ b/tests/queries/0_stateless/01700_deltasum.reference
@@ -0,0 +1,9 @@
+2
+6
+7
+7
+7
+5
+2
+2.25
+6.5
diff --git a/tests/queries/0_stateless/01700_deltasum.sql b/tests/queries/0_stateless/01700_deltasum.sql
new file mode 100644
index 00000000000..93edb2e477d
--- /dev/null
+++ b/tests/queries/0_stateless/01700_deltasum.sql
@@ -0,0 +1,9 @@
+select deltaSum(arrayJoin([1, 2, 3]));
+select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4]));
+select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));
+select deltaSum(arrayJoin([1, 2, 3, 0, 3, 3, 3, 3, 3, 4, 2, 3]));
+select deltaSum(arrayJoin([1, 2, 3, 0, 0, 0, 0, 3, 3, 3, 3, 3, 4, 2, 3]));
+select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([0, 1])) as rows union all select deltaSumState(arrayJoin([4, 5])) as rows);
+select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([4, 5])) as rows union all select deltaSumState(arrayJoin([0, 1])) as rows);
+select deltaSum(arrayJoin([2.25, 3, 4.5]));
+select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([0.1, 0.3, 0.5])) as rows union all select deltaSumState(arrayJoin([4.1, 5.1, 6.6])) as rows);
diff --git a/tests/queries/0_stateless/01700_mod_negative_type_promotion.reference b/tests/queries/0_stateless/01700_mod_negative_type_promotion.reference
new file mode 100644
index 00000000000..b3a93b2203a
--- /dev/null
+++ b/tests/queries/0_stateless/01700_mod_negative_type_promotion.reference
@@ -0,0 +1,9 @@
+-199	Int16
+-199	Int32
+97	Int64
+17	Int64
+-199	Int32
+9	UInt8
+199	UInt16
+-199	Float64
+-199	Float64
diff --git a/tests/queries/0_stateless/01700_mod_negative_type_promotion.sql b/tests/queries/0_stateless/01700_mod_negative_type_promotion.sql
new file mode 100644
index 00000000000..93bb7071659
--- /dev/null
+++ b/tests/queries/0_stateless/01700_mod_negative_type_promotion.sql
@@ -0,0 +1,12 @@
+SELECT toInt32(-199) % 200 as k, toTypeName(k);
+SELECT toInt32(-199) % toUInt16(200) as k, toTypeName(k);
+SELECT toInt32(-199) % toUInt32(200) as k, toTypeName(k);
+SELECT toInt32(-199) % toUInt64(200) as k, toTypeName(k);
+
+SELECT toInt32(-199) % toInt16(-200) as k, toTypeName(k);
+
+SELECT 199 % -10 as k, toTypeName(k);
+SELECT 199 % -200 as k, toTypeName(k);
+
+SELECT toFloat64(-199) % 200 as k, toTypeName(k);
+SELECT -199 % toFloat64(200) as k, toTypeName(k);
diff --git a/tests/queries/0_stateless/01700_point_in_polygon_ubsan.reference b/tests/queries/0_stateless/01700_point_in_polygon_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql b/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql
new file mode 100644
index 00000000000..97db40ab65e
--- /dev/null
+++ b/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql
@@ -0,0 +1 @@
+SELECT pointInPolygon((0, 0), [[(0, 0), (10, 10), (256, -9223372036854775808)]]) FORMAT Null;
diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
new file mode 100644
index 00000000000..2fc177c812e
--- /dev/null
+++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
@@ -0,0 +1,16 @@
+block_numbers
+blocks
+1
+========
+block_numbers
+blocks
+1
+========
+block_numbers
+blocks
+========
+1
+failed_parts
+last_part
+leader_election-0000000000
+parallel
diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
new file mode 100644
index 00000000000..d4126098c7c
--- /dev/null
+++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
@@ -0,0 +1,19 @@
+DROP TABLE IF EXISTS sample_table;
+
+CREATE TABLE sample_table (
+    key UInt64
+)
+ENGINE ReplicatedMergeTree('/clickhouse/01700_system_zookeeper_path_in', '1')
+ORDER BY tuple();
+
+SELECT name FROM system.zookeeper WHERE path = '/clickhouse/01700_system_zookeeper_path_in' AND name like 'block%' ORDER BY name;
+SELECT name FROM system.zookeeper WHERE path = '/clickhouse/01700_system_zookeeper_path_in/replicas' ORDER BY name;
+SELECT '========';
+SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/01700_system_zookeeper_path_in') AND name LIKE 'block%' ORDER BY name;
+SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/01700_system_zookeeper_path_in/replicas') ORDER BY name;
+SELECT '========';
+SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/01700_system_zookeeper_path_in','/clickhouse/01700_system_zookeeper_path_in/replicas') AND name LIKE 'block%' ORDER BY name;
+SELECT '========';
+SELECT name FROM system.zookeeper WHERE path IN (SELECT concat('/clickhouse/01700_system_zookeeper_path_in/', name) FROM system.zookeeper WHERE (path = '/clickhouse/01700_system_zookeeper_path_in')) ORDER BY name;
+
+DROP TABLE IF EXISTS sample_table;
diff --git a/tests/queries/0_stateless/01701_if_tuple_segfault.reference b/tests/queries/0_stateless/01701_if_tuple_segfault.reference
new file mode 100644
index 00000000000..001e50da954
--- /dev/null
+++ b/tests/queries/0_stateless/01701_if_tuple_segfault.reference
@@ -0,0 +1,3 @@
+2020-10-01 19:20:30	hello	([0],45)	45	([0,1,2,3,4,5,6,7,8,9,10,11,12],[45,55,65,75,85,95,105,115,125,135,145,155,165])
+([3],4)
+2020-10-01 19:20:30	hello	([0],45)	5	([0,1,2,3,4,5,6,7,8,9,10,11,12],[22,27,32,37,42,47,52,57,62,67,72,77,82])
diff --git a/tests/queries/0_stateless/01701_if_tuple_segfault.sql b/tests/queries/0_stateless/01701_if_tuple_segfault.sql
new file mode 100644
index 00000000000..93b28c578a9
--- /dev/null
+++ b/tests/queries/0_stateless/01701_if_tuple_segfault.sql
@@ -0,0 +1,33 @@
+DROP TABLE IF EXISTS agg_table;
+
+CREATE TABLE IF NOT EXISTS agg_table
+(
+    time DateTime CODEC(DoubleDelta, LZ4),
+    xxx String,
+    two_values Tuple(Array(UInt16), UInt32),
+    agg_simple SimpleAggregateFunction(sum, UInt64),
+    agg SimpleAggregateFunction(sumMap, Tuple(Array(Int16), Array(UInt64)))
+)
+ENGINE = AggregatingMergeTree()
+ORDER BY (xxx, time);
+
+INSERT INTO agg_table SELECT toDateTime('2020-10-01 19:20:30'), 'hello', ([any(number)], sum(number)), sum(number),
+    sumMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))) FROM numbers(10);
+
+SELECT * FROM agg_table;
+
+SELECT if(xxx = 'x', ([2], 3), ([3], 4)) FROM agg_table;
+
+SELECT if(xxx = 'x', ([2], 3), ([3], 4, 'q', 'w', 7)) FROM agg_table; --{ serverError 386 }
+
+ALTER TABLE agg_table UPDATE two_values = (two_values.1, two_values.2) WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2;
+
+ALTER TABLE agg_table UPDATE agg_simple = 5 WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2;
+
+ALTER TABLE agg_table UPDATE agg = (agg.1, agg.2) WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2;
+
+ALTER TABLE agg_table UPDATE agg = (agg.1, arrayMap(x -> toUInt64(x / 2), agg.2)) WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2;
+
+SELECT * FROM agg_table;
+
+DROP TABLE IF EXISTS agg_table;
diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.reference b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.reference
new file mode 100644
index 00000000000..587579af915
--- /dev/null
+++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.reference
@@ -0,0 +1 @@
+Ok.
diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
new file mode 100755
index 00000000000..d3e634eb560
--- /dev/null
+++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash                                                                                                                                                                                                                                           
+                                                                                                                                                                                                                                                              
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)                                                                                                                                                                                                          
+# shellcheck source=../shell_config.sh                                                                                                                                                                                                                        
+. "$CURDIR"/../shell_config.sh   
+
+${CLICKHOUSE_CLIENT} -q "create table insert_big_json(a String, b String) engine=MergeTree() order by tuple()";
+
+python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null  | ${CLICKHOUSE_CLIENT} --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||:
\ No newline at end of file
diff --git a/tests/queries/0_stateless/01702_bitmap_native_integers.reference b/tests/queries/0_stateless/01702_bitmap_native_integers.reference
new file mode 100644
index 00000000000..5be3912b8d5
--- /dev/null
+++ b/tests/queries/0_stateless/01702_bitmap_native_integers.reference
@@ -0,0 +1 @@
+251	65531	4294967291	18446744073709551611	255	65535	4294967295	18446744073709551615
diff --git a/tests/queries/0_stateless/01702_bitmap_native_integers.sql b/tests/queries/0_stateless/01702_bitmap_native_integers.sql
new file mode 100644
index 00000000000..a31de25dc30
--- /dev/null
+++ b/tests/queries/0_stateless/01702_bitmap_native_integers.sql
@@ -0,0 +1,5 @@
+drop table if exists t;
+create table t(i8 Int8, i16 Int16, i32 Int32, i64 Int64) engine Memory;
+insert into t values (-1, -1, -1, -1), (-2, -2, -2, -2), (-3, -3, -3, -3), (-4, -4, -4, -4), (-5, -5, -5, -5);
+select * apply bitmapMin, * apply bitmapMax from (select * apply groupBitmapState from t);
+drop table t;
diff --git a/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.reference b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.reference
new file mode 100644
index 00000000000..2bdcedba90e
--- /dev/null
+++ b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.reference
@@ -0,0 +1,23 @@
+SELECT avg(number + 2) FROM numbers(10)
+value: 	6.5
+EXPLAIN syntax:
+SELECT avg(number) + 2
+FROM numbers(10)
+
+SELECT avg(number - 2) FROM numbers(10)
+value: 	2.5
+EXPLAIN syntax:
+SELECT avg(number) - 2
+FROM numbers(10)
+
+SELECT avg(number * 2) FROM numbers(10)
+value: 	9
+EXPLAIN syntax:
+SELECT avg(number) * 2
+FROM numbers(10)
+
+SELECT avg(number / 2) FROM numbers(10)
+value: 	2.25
+EXPLAIN syntax:
+SELECT avg(number) / 2
+FROM numbers(10)
diff --git a/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.sql b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.sql
new file mode 100644
index 00000000000..8fa4dd0ae47
--- /dev/null
+++ b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.sql
@@ -0,0 +1,22 @@
+SELECT 'SELECT avg(number + 2) FROM numbers(10)';
+SELECT 'value: ', avg(number + 2) FROM numbers(10);
+SELECT 'EXPLAIN syntax:';
+EXPLAIN SYNTAX SELECT avg(number + 2) FROM numbers(10);
+
+SELECT '';
+SELECT 'SELECT avg(number - 2) FROM numbers(10)';
+SELECT 'value: ', avg(number - 2) FROM numbers(10);
+SELECT 'EXPLAIN syntax:';
+EXPLAIN SYNTAX SELECT avg(number - 2) FROM numbers(10);
+
+SELECT '';
+SELECT 'SELECT avg(number * 2) FROM numbers(10)';
+SELECT 'value: ', avg(number * 2) FROM numbers(10);
+SELECT 'EXPLAIN syntax:';
+EXPLAIN SYNTAX SELECT avg(number * 2) FROM numbers(10);
+
+SELECT '';
+SELECT 'SELECT avg(number / 2) FROM numbers(10)';
+SELECT 'value: ', avg(number / 2) FROM numbers(10);
+SELECT 'EXPLAIN syntax:';
+EXPLAIN SYNTAX SELECT avg(number / 2) FROM numbers(10);
diff --git a/tests/queries/0_stateless/01702_system_numbers_scientific_notation.reference b/tests/queries/0_stateless/01702_system_numbers_scientific_notation.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01702_system_numbers_scientific_notation.sql b/tests/queries/0_stateless/01702_system_numbers_scientific_notation.sql
new file mode 100644
index 00000000000..6e037ee4a2e
--- /dev/null
+++ b/tests/queries/0_stateless/01702_system_numbers_scientific_notation.sql
@@ -0,0 +1,5 @@
+select * from numbers(1e2) format Null;
+select * from numbers_mt(1e2) format Null;
+select * from numbers_mt('100') format Null; -- { serverError 43 }
+select * from numbers_mt(inf) format Null; -- { serverError 43 }
+select * from numbers_mt(nan) format Null; -- { serverError 43 }
diff --git a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference
new file mode 100644
index 00000000000..228086615da
--- /dev/null
+++ b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference
@@ -0,0 +1,9 @@
+-- { echo }
+SELECT toString(toDateTime('-922337203.6854775808', 1));
+2106-02-07 15:41:33.6
+SELECT toString(toDateTime('9922337203.6854775808', 1));
+2104-12-30 00:50:11.6
+SELECT toDateTime64(CAST('10000000000.1' AS Decimal64(1)), 1);
+2106-02-07 20:50:08.1
+SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1);
+2011-12-23 00:38:08.1
diff --git a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql
new file mode 100644
index 00000000000..d1f0416149a
--- /dev/null
+++ b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql
@@ -0,0 +1,5 @@
+-- { echo }
+SELECT toString(toDateTime('-922337203.6854775808', 1));
+SELECT toString(toDateTime('9922337203.6854775808', 1));
+SELECT toDateTime64(CAST('10000000000.1' AS Decimal64(1)), 1);
+SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1);
diff --git a/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.reference b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.reference
new file mode 100644
index 00000000000..e92a057f8c3
--- /dev/null
+++ b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.reference
@@ -0,0 +1,6 @@
+22.5
+SELECT sum(number) / 2
+FROM numbers(10)
+22.5
+SELECT sum(number) / 2
+FROM numbers(10)
diff --git a/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.sql b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.sql
new file mode 100644
index 00000000000..1c5271b4717
--- /dev/null
+++ b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.sql
@@ -0,0 +1,6 @@
+SELECT sum(number / 2) FROM numbers(10);
+EXPLAIN SYNTAX SELECT sum(number / 2) FROM numbers(10);
+
+
+SELECT Sum(number / 2) FROM numbers(10);
+EXPLAIN SYNTAX SELECT Sum(number / 2) FROM numbers(10);
diff --git a/tests/queries/0_stateless/01704_transform_with_float_key.reference b/tests/queries/0_stateless/01704_transform_with_float_key.reference
new file mode 100644
index 00000000000..761e15c903c
--- /dev/null
+++ b/tests/queries/0_stateless/01704_transform_with_float_key.reference
@@ -0,0 +1,30 @@
+-
+Hello
+-
+World
+-
+-
+-
+-
+-
+-
+-
+-
+Hello
+-
+World
+-
+-
+-
+-
+-
+-
+-
+Hello
+-
+World
+-
+-
+-
+-
+-
diff --git a/tests/queries/0_stateless/01704_transform_with_float_key.sql b/tests/queries/0_stateless/01704_transform_with_float_key.sql
new file mode 100644
index 00000000000..690c73ee28a
--- /dev/null
+++ b/tests/queries/0_stateless/01704_transform_with_float_key.sql
@@ -0,0 +1,3 @@
+SELECT transform(number / 2, [0.5, 1.5], ['Hello', 'World'], '-') FROM numbers(10);
+SELECT transform(number / 2, [1.0, 2.0], ['Hello', 'World'], '-') FROM numbers(10);
+SELECT transform(number / 2, [1, 2], ['Hello', 'World'], '-') FROM numbers(10);
diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference
new file mode 100644
index 00000000000..5b0f7bdeb2d
--- /dev/null
+++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference
@@ -0,0 +1,66 @@
+SELECT
+    CAST(1, 'INT'),
+    ceil(1),
+    ceil(1),
+    char(49),
+    CHAR_LENGTH('1'),
+    CHARACTER_LENGTH('1'),
+    coalesce(1),
+    concat('1', '1'),
+    corr(1, 1),
+    cos(1),
+    count(),
+    covarPop(1, 1),
+    covarSamp(1, 1),
+    DATABASE(),
+    dateDiff('DAY', toDate('2020-10-24'), toDate('2019-10-24')),
+    exp(1),
+    arrayFlatten([[1]]),
+    floor(1),
+    FQDN(),
+    greatest(1),
+    1,
+    ifNull(1, 1),
+    lower('A'),
+    least(1),
+    length('1'),
+    log(1),
+    position('1', '1'),
+    log(1),
+    log10(1),
+    log2(1),
+    lower('A'),
+    max(1),
+    substring('123', 1, 1),
+    min(1),
+    1 % 1,
+    NOT 1,
+    now(),
+    now64(),
+    nullIf(1, 1),
+    pi(),
+    position('123', '2'),
+    pow(1, 1),
+    pow(1, 1),
+    rand(),
+    replaceAll('1', '1', '2'),
+    reverse('123'),
+    round(1),
+    sin(1),
+    sqrt(1),
+    stddevPop(1),
+    stddevSamp(1),
+    substring('123', 2),
+    substring('123', 2),
+    count(),
+    tan(1),
+    tanh(1),
+    trunc(1),
+    trunc(1),
+    upper('A'),
+    upper('A'),
+    currentUser(),
+    varPop(1),
+    varSamp(1),
+    toWeek(toDate('2020-10-24')),
+    toYearWeek(toDate('2020-10-24'))
diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql
new file mode 100644
index 00000000000..9b35087182c
--- /dev/null
+++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql
@@ -0,0 +1 @@
+EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH('1'), CHARACTER_LENGTH('1'), COALESCE(1), CONCAT('1', '1'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), DATEDIFF('DAY', toDate('2020-10-24'), toDate('2019-10-24')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE('A'), LEAST(1), LENGTH('1'), LN(1), LOCATE('1', '1'), LOG(1), LOG10(1), LOG2(1), LOWER('A'), MAX(1), MID('123', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION('123', '2'), POW(1, 1), POWER(1, 1), RAND(), REPLACE('1', '1', '2'), REVERSE('123'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR('123', 2), SUBSTRING('123', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE('A'), UPPER('A'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate('2020-10-24')), YEARWEEK(toDate('2020-10-24')) format TSVRaw;
diff --git a/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference
new file mode 100644
index 00000000000..0343ad84abb
--- /dev/null
+++ b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference
@@ -0,0 +1,6 @@
+SELECT
+    count(),
+    count(),
+    count(),
+    count(),
+    count(NULL)
diff --git a/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql
new file mode 100644
index 00000000000..d20f23feef8
--- /dev/null
+++ b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql
@@ -0,0 +1,4 @@
+
+set optimize_normalize_count_variants = 1;
+
+explain syntax select count(), count(1), count(-1), sum(1), count(null);
diff --git a/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.reference b/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.sql b/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.sql
new file mode 100644
index 00000000000..fad890c4807
--- /dev/null
+++ b/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.sql
@@ -0,0 +1,12 @@
+drop table if exists x;
+
+create table x (i int) engine MergeTree order by i settings old_parts_lifetime = 10000000000, min_bytes_for_wide_part = 0, inactive_parts_to_throw_insert = 1;
+
+insert into x values (1);
+insert into x values (2);
+
+optimize table x final;
+
+insert into x values (3); -- { serverError 252; }
+
+drop table if exists x;
diff --git a/tests/queries/0_stateless/01710_join_use_nulls.reference b/tests/queries/0_stateless/01710_join_use_nulls.reference
new file mode 100644
index 00000000000..8bd111e0416
--- /dev/null
+++ b/tests/queries/0_stateless/01710_join_use_nulls.reference
@@ -0,0 +1,3 @@
+3
+1
+1
diff --git a/tests/queries/0_stateless/01710_join_use_nulls.sql b/tests/queries/0_stateless/01710_join_use_nulls.sql
new file mode 100644
index 00000000000..b024227d4e2
--- /dev/null
+++ b/tests/queries/0_stateless/01710_join_use_nulls.sql
@@ -0,0 +1,21 @@
+DROP TABLE IF EXISTS X;
+DROP TABLE IF EXISTS Y;
+
+CREATE TABLE X (id Int) ENGINE=Memory;
+CREATE TABLE Y (id Int) ENGINE=Memory;
+
+-- Type mismatch of columns to JOIN by: plus(id, 1) Int64 at left, Y.id Int32 at right.
+SELECT Y.id - 1 FROM X RIGHT JOIN Y ON (X.id + 1) = Y.id SETTINGS join_use_nulls=1; -- { serverError 53 }
+SELECT Y.id - 1 FROM X RIGHT JOIN Y ON (X.id + 1) = toInt64(Y.id) SETTINGS join_use_nulls=1;
+
+-- Logical error: 'Arguments of 'plus' have incorrect data types: '2' of type 'UInt8', '1' of type 'UInt8''.
+-- Because 1 became toNullable(1), i.e.:
+--     2 UInt8 Const(size = 1, UInt8(size = 1))
+--     1 UInt8 Const(size = 1, Nullable(size = 1, UInt8(size = 1), UInt8(size = 1)))
+SELECT 2+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy+1 = Y.dummy SETTINGS join_use_nulls = 1; -- { serverError 53 }
+SELECT 2+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy+1 = toUInt16(Y.dummy) SETTINGS join_use_nulls = 1;
+SELECT X.dummy+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy = Y.dummy SETTINGS join_use_nulls = 1;
+SELECT Y.dummy+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy = Y.dummy SETTINGS join_use_nulls = 1;
+
+DROP TABLE X;
+DROP TABLE Y;
diff --git a/tests/queries/0_stateless/01711_cte_subquery_fix.reference b/tests/queries/0_stateless/01711_cte_subquery_fix.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01711_cte_subquery_fix.sql b/tests/queries/0_stateless/01711_cte_subquery_fix.sql
new file mode 100644
index 00000000000..ddea548eada
--- /dev/null
+++ b/tests/queries/0_stateless/01711_cte_subquery_fix.sql
@@ -0,0 +1,3 @@
+drop table if exists t;
+create table t engine = Memory as with cte as (select * from numbers(10)) select * from cte;
+drop table t;
diff --git a/tests/queries/0_stateless/01711_decimal_multiplication.reference b/tests/queries/0_stateless/01711_decimal_multiplication.reference
new file mode 100644
index 00000000000..37869329ca4
--- /dev/null
+++ b/tests/queries/0_stateless/01711_decimal_multiplication.reference
@@ -0,0 +1,4 @@
+2.0000
+2.0000
+2.0000
+2.0000
diff --git a/tests/queries/0_stateless/01711_decimal_multiplication.sql b/tests/queries/0_stateless/01711_decimal_multiplication.sql
new file mode 100644
index 00000000000..10d23599b4d
--- /dev/null
+++ b/tests/queries/0_stateless/01711_decimal_multiplication.sql
@@ -0,0 +1,4 @@
+SELECT materialize(toDecimal64(4,4)) - materialize(toDecimal32(2,2));
+SELECT toDecimal64(4,4) - materialize(toDecimal32(2,2));
+SELECT materialize(toDecimal64(4,4)) - toDecimal32(2,2);
+SELECT toDecimal64(4,4) - toDecimal32(2,2);
diff --git a/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.reference b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.reference
new file mode 100644
index 00000000000..51acb066394
--- /dev/null
+++ b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.reference
@@ -0,0 +1,6 @@
+1	1
+2	2
+1	1
+2	2
+1	1
+2	2
diff --git a/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql
new file mode 100644
index 00000000000..0acf6992c1e
--- /dev/null
+++ b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql
@@ -0,0 +1,30 @@
+DROP TABLE IF EXISTS old_school_table;
+
+CREATE TABLE old_school_table
+(
+    key UInt64,
+    value String
+)
+ENGINE = MergeTree()
+ORDER BY key
+SETTINGS index_granularity_bytes = 0, enable_mixed_granularity_parts = 0, min_bytes_for_wide_part = 0,
+vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1;
+
+INSERT INTO old_school_table VALUES (1, '1');
+INSERT INTO old_school_table VALUES (2, '2');
+
+OPTIMIZE TABLE old_school_table FINAL;
+
+SELECT * FROM old_school_table ORDER BY key;
+
+OPTIMIZE TABLE old_school_table FINAL; -- just to be sure
+
+SELECT * FROM old_school_table ORDER BY key;
+
+ALTER TABLE old_school_table MODIFY SETTING vertical_merge_algorithm_min_rows_to_activate = 10000, vertical_merge_algorithm_min_columns_to_activate = 10000;
+
+OPTIMIZE TABLE old_school_table FINAL; -- and horizontal merge
+
+SELECT * FROM old_school_table ORDER BY key;
+
+DROP TABLE IF EXISTS old_school_table;
diff --git a/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.reference b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.sql b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.sql
new file mode 100644
index 00000000000..7d4c83c9d3a
--- /dev/null
+++ b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.sql
@@ -0,0 +1,29 @@
+DROP TABLE IF EXISTS ttl_table;
+
+CREATE TABLE ttl_table
+(
+    date Date,
+    value UInt64
+)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01713_table_ttl', '1', date, date, 8192)
+TTL date + INTERVAL 2 MONTH; --{ serverError 36 }
+
+CREATE TABLE ttl_table
+(
+    date Date,
+    value UInt64
+)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01713_table_ttl', '1', date, date, 8192)
+PARTITION BY date; --{ serverError 42 }
+
+CREATE TABLE ttl_table
+(
+    date Date,
+    value UInt64
+)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01713_table_ttl', '1', date, date, 8192)
+ORDER BY value; --{ serverError 42 }
+
+SELECT 1;
+
+DROP TABLE IF EXISTS ttl_table;
diff --git a/tests/queries/0_stateless/01714_alter_drop_version.reference b/tests/queries/0_stateless/01714_alter_drop_version.reference
new file mode 100644
index 00000000000..72749c905a3
--- /dev/null
+++ b/tests/queries/0_stateless/01714_alter_drop_version.reference
@@ -0,0 +1 @@
+1	1	1
diff --git a/tests/queries/0_stateless/01714_alter_drop_version.sql b/tests/queries/0_stateless/01714_alter_drop_version.sql
new file mode 100644
index 00000000000..e3d5db33859
--- /dev/null
+++ b/tests/queries/0_stateless/01714_alter_drop_version.sql
@@ -0,0 +1,23 @@
+DROP TABLE IF EXISTS alter_drop_version;
+
+CREATE TABLE alter_drop_version
+(
+    `key` UInt64,
+    `value` String,
+    `ver` Int8
+)
+ENGINE = ReplacingMergeTree(ver)
+ORDER BY key;
+
+INSERT INTO alter_drop_version VALUES (1, '1', 1);
+
+ALTER TABLE alter_drop_version DROP COLUMN ver; --{serverError 524}
+ALTER TABLE alter_drop_version RENAME COLUMN ver TO rev; --{serverError 524}
+
+DETACH TABLE alter_drop_version;
+
+ATTACH TABLE alter_drop_version;
+
+SELECT * FROM alter_drop_version;
+
+DROP TABLE IF EXISTS alter_drop_version;
diff --git a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.reference b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql
new file mode 100644
index 00000000000..66b53369517
--- /dev/null
+++ b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql
@@ -0,0 +1,28 @@
+DROP TABLE IF EXISTS i20203_1;
+DROP TABLE IF EXISTS i20203_2;
+
+CREATE TABLE i20203_1 (a Int8)
+ENGINE = ReplicatedMergeTree('/clickhouse/01715_background_checker_i20203', 'r1')
+ORDER BY tuple();
+
+CREATE TABLE i20203_2 (a Int8)
+ENGINE = ReplicatedMergeTree('/clickhouse/01715_background_checker_i20203', 'r2')
+ORDER BY tuple();
+
+DETACH TABLE i20203_2;
+INSERT INTO i20203_1 VALUES (2);
+
+DETACH TABLE i20203_1;
+ATTACH TABLE i20203_2;
+
+-- sleep 10 seconds
+SELECT number from numbers(10) where sleepEachRow(1) Format Null;
+
+SELECT num_tries < 50
+FROM system.replication_queue
+WHERE table = 'i20203_2' AND database = currentDatabase();
+
+ATTACH TABLE i20203_1;
+
+DROP TABLE IF EXISTS i20203_1;
+DROP TABLE IF EXISTS i20203_2;
diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.reference b/tests/queries/0_stateless/01715_table_function_view_fix.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.sql b/tests/queries/0_stateless/01715_table_function_view_fix.sql
new file mode 100644
index 00000000000..de5150b7b70
--- /dev/null
+++ b/tests/queries/0_stateless/01715_table_function_view_fix.sql
@@ -0,0 +1 @@
+SELECT view(SELECT 1); -- { clientError 62 }
diff --git a/tests/queries/0_stateless/01716_array_difference_overflow.reference b/tests/queries/0_stateless/01716_array_difference_overflow.reference
new file mode 100644
index 00000000000..5297534679e
--- /dev/null
+++ b/tests/queries/0_stateless/01716_array_difference_overflow.reference
@@ -0,0 +1 @@
+[0,9223372036854710272]
diff --git a/tests/queries/0_stateless/01716_array_difference_overflow.sql b/tests/queries/0_stateless/01716_array_difference_overflow.sql
new file mode 100644
index 00000000000..3d153725294
--- /dev/null
+++ b/tests/queries/0_stateless/01716_array_difference_overflow.sql
@@ -0,0 +1,2 @@
+-- Overflow is Ok and behaves as the CPU does it.
+SELECT arrayDifference([65536, -9223372036854775808]);
diff --git a/tests/queries/0_stateless/01716_decimal_comparison_ubsan.reference b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/01716_decimal_comparison_ubsan.sql b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.sql
new file mode 100644
index 00000000000..f68d9de1995
--- /dev/null
+++ b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.sql
@@ -0,0 +1,2 @@
+SET decimal_check_overflow = 0;
+SELECT toDecimal64(0, 8) = 9223372036854775807;
diff --git a/tests/queries/0_stateless/01716_drop_rename_sign_column.reference b/tests/queries/0_stateless/01716_drop_rename_sign_column.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01716_drop_rename_sign_column.sql b/tests/queries/0_stateless/01716_drop_rename_sign_column.sql
new file mode 100644
index 00000000000..c9119ee2b46
--- /dev/null
+++ b/tests/queries/0_stateless/01716_drop_rename_sign_column.sql
@@ -0,0 +1,14 @@
+DROP TABLE IF EXISTS signed_table;
+
+CREATE TABLE signed_table (
+    k UInt32,
+    v String,
+    s Int8
+) ENGINE CollapsingMergeTree(s) ORDER BY k;
+
+INSERT INTO signed_table(k, v, s) VALUES (1, 'a', 1);
+
+ALTER TABLE signed_table DROP COLUMN s; --{serverError 524}
+ALTER TABLE signed_table RENAME COLUMN s TO s1; --{serverError 524}
+
+DROP TABLE IF EXISTS signed_table;
diff --git a/tests/queries/0_stateless/01717_global_with_subquery_fix.reference b/tests/queries/0_stateless/01717_global_with_subquery_fix.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01717_global_with_subquery_fix.sql b/tests/queries/0_stateless/01717_global_with_subquery_fix.sql
new file mode 100644
index 00000000000..14c4ac3e4ca
--- /dev/null
+++ b/tests/queries/0_stateless/01717_global_with_subquery_fix.sql
@@ -0,0 +1 @@
+WITH (SELECT count(distinct colU) from tabA) AS withA, (SELECT count(distinct colU) from tabA) AS withB SELECT withA / withB AS ratio FROM (SELECT date AS period, colX FROM (SELECT date, if(colA IN (SELECT colB FROM tabC), 0, colA) AS colX FROM tabB) AS tempB GROUP BY period, colX) AS main; -- {serverError 60}
diff --git a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.reference b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql
new file mode 100644
index 00000000000..c4f26a079f0
--- /dev/null
+++ b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql
@@ -0,0 +1,2 @@
+SELECT intDiv(9223372036854775807, 0.9998999834060669); -- { serverError 153 }
+SELECT intDiv(9223372036854775807, 1.);  -- { serverError 153 }
diff --git a/tests/queries/0_stateless/01718_subtract_seconds_date.reference b/tests/queries/0_stateless/01718_subtract_seconds_date.reference
new file mode 100644
index 00000000000..97e3da8cc48
--- /dev/null
+++ b/tests/queries/0_stateless/01718_subtract_seconds_date.reference
@@ -0,0 +1,2 @@
+2021-02-14 23:59:59
+10
diff --git a/tests/queries/0_stateless/01718_subtract_seconds_date.sql b/tests/queries/0_stateless/01718_subtract_seconds_date.sql
new file mode 100644
index 00000000000..6bffcd4db5a
--- /dev/null
+++ b/tests/queries/0_stateless/01718_subtract_seconds_date.sql
@@ -0,0 +1,2 @@
+SELECT subtractSeconds(toDate('2021-02-15'), 1);
+SELECT subtractSeconds(today(), 1) - subtractSeconds(today(), 11);
diff --git a/tests/queries/0_stateless/01719_join_timezone.reference b/tests/queries/0_stateless/01719_join_timezone.reference
new file mode 100644
index 00000000000..c2702a38012
--- /dev/null
+++ b/tests/queries/0_stateless/01719_join_timezone.reference
@@ -0,0 +1,3 @@
+2020-05-13 13:38:45	2020-05-13 16:38:45
+2020-05-13 13:38:45	2020-05-13 16:38:45
+2020-05-13 13:38:45	2020-05-13 16:38:45
diff --git a/tests/queries/0_stateless/01719_join_timezone.sql b/tests/queries/0_stateless/01719_join_timezone.sql
new file mode 100644
index 00000000000..cbf0c27fcfc
--- /dev/null
+++ b/tests/queries/0_stateless/01719_join_timezone.sql
@@ -0,0 +1,45 @@
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (timestamp DateTime('UTC'), i UInt8) Engine=MergeTree() PARTITION BY toYYYYMM(timestamp) ORDER BY (i);
+INSERT INTO test values ('2020-05-13 16:38:45', 1);
+
+SELECT
+    toTimeZone(timestamp, 'America/Sao_Paulo') AS converted,
+    timestamp AS original
+FROM test
+LEFT JOIN (SELECT 2 AS x) AS anything ON x = i
+WHERE timestamp >= toDateTime('2020-05-13T00:00:00', 'America/Sao_Paulo');
+
+/* This was incorrect result in previous ClickHouse versions:
+┌─converted───────────┬─original────────────┐
+│ 2020-05-13 16:38:45 │ 2020-05-13 16:38:45 │ <-- toTimeZone is ignored.
+└─────────────────────┴─────────────────────┘
+*/
+
+SELECT
+    toTimeZone(timestamp, 'America/Sao_Paulo') AS converted,
+    timestamp AS original
+FROM test
+-- LEFT JOIN (SELECT 2 AS x) AS anything ON x = i -- Removing the join fixes the issue.
+WHERE timestamp >= toDateTime('2020-05-13T00:00:00', 'America/Sao_Paulo');
+
+/*
+┌─converted───────────┬─original────────────┐
+│ 2020-05-13 13:38:45 │ 2020-05-13 16:38:45 │ <-- toTimeZone works.
+└─────────────────────┴─────────────────────┘
+*/
+
+SELECT
+    toTimeZone(timestamp, 'America/Sao_Paulo') AS converted,
+    timestamp AS original
+FROM test
+LEFT JOIN (SELECT 2 AS x) AS anything ON x = i
+WHERE timestamp >= '2020-05-13T00:00:00'; -- Not using toDateTime in the WHERE also fixes the issue.
+
+/*
+┌─converted───────────┬─original────────────┐
+│ 2020-05-13 13:38:45 │ 2020-05-13 16:38:45 │ <-- toTimeZone works.
+└─────────────────────┴─────────────────────┘
+*/
+
+DROP TABLE test;
diff --git a/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference
new file mode 100644
index 00000000000..38abe3c9f52
--- /dev/null
+++ b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference
@@ -0,0 +1 @@
+1	First
diff --git a/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql
new file mode 100644
index 00000000000..a0a4fbbfab9
--- /dev/null
+++ b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql
@@ -0,0 +1,28 @@
+DROP DATABASE IF EXISTS 01720_dictionary_db;
+CREATE DATABASE 01720_dictionary_db;
+
+CREATE TABLE 01720_dictionary_db.dictionary_source_table
+(
+	key UInt8,
+    value String
+)
+ENGINE = TinyLog;
+
+INSERT INTO 01720_dictionary_db.dictionary_source_table VALUES (1, 'First');
+
+CREATE DICTIONARY 01720_dictionary_db.dictionary
+(
+    key UInt64,
+    value String
+)
+PRIMARY KEY key
+SOURCE(CLICKHOUSE(DB '01720_dictionary_db' TABLE 'dictionary_source_table' HOST hostName() PORT tcpPort()))
+LIFETIME(0)
+LAYOUT(FLAT());
+
+SELECT * FROM 01720_dictionary_db.dictionary;
+
+DROP DICTIONARY 01720_dictionary_db.dictionary;
+DROP TABLE 01720_dictionary_db.dictionary_source_table;
+
+DROP DATABASE 01720_dictionary_db;
diff --git a/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.reference b/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql b/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql
new file mode 100644
index 00000000000..d665dbc722f
--- /dev/null
+++ b/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql
@@ -0,0 +1,16 @@
+DROP TABLE IF EXISTS file_engine_table;
+
+CREATE TABLE file_engine_table (id UInt32) ENGINE=File(TSV);
+
+SELECT * FROM file_engine_table; --{ serverError 107 }
+
+SET engine_file_empty_if_not_exists=0;
+
+SELECT * FROM file_engine_table; --{ serverError 107 }
+
+SET engine_file_empty_if_not_exists=1;
+
+SELECT * FROM file_engine_table;
+
+SET engine_file_empty_if_not_exists=0;
+DROP TABLE file_engine_table;
diff --git a/tests/queries/0_stateless/01720_union_distinct_with_limit.reference b/tests/queries/0_stateless/01720_union_distinct_with_limit.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01720_union_distinct_with_limit.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01720_union_distinct_with_limit.sql b/tests/queries/0_stateless/01720_union_distinct_with_limit.sql
new file mode 100644
index 00000000000..9fc5b3eafd2
--- /dev/null
+++ b/tests/queries/0_stateless/01720_union_distinct_with_limit.sql
@@ -0,0 +1,8 @@
+SELECT x
+FROM
+(
+    SELECT 1 AS x
+    UNION DISTINCT
+    SELECT 1
+)
+LIMIT 1;
diff --git a/tests/queries/0_stateless/01721_dictionary_decimal_p_s.reference b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.reference
new file mode 100644
index 00000000000..066b4bd1d97
--- /dev/null
+++ b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.reference
@@ -0,0 +1,10 @@
+-------- 42 --------
+42	14.0000	14.00000000	14.00000000	14.0000000000000000618637523926765281280
+42	14.0000	14.00000000	14.00000000
+14.0000	14.00000000	14.00000000
+-------- 4999 --------
+4999	1666.3333	1666.33333333	1666.33333333	1633.3553612205046244471093725648757194800
+4999	1666.3333	1666.33333333	1666.33333333
+1666.3333	1666.33333333	1666.33333333
+-------- 5000 --------
+0.1100	0.11000000	0.11000000
diff --git a/tests/queries/0_stateless/01721_dictionary_decimal_p_s.sql b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.sql
new file mode 100644
index 00000000000..0451d455009
--- /dev/null
+++ b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.sql
@@ -0,0 +1,78 @@
+set allow_experimental_bigint_types=1;
+drop database if exists db_01721;
+drop table if exists db_01721.table_decimal_dict;
+drop dictionary if exists db_01721.decimal_dict;
+
+
+create database db_01721;
+
+CREATE TABLE db_01721.table_decimal_dict(
+KeyField UInt64,
+Decimal32_ Decimal(5,4),
+Decimal64_ Decimal(18,8),
+Decimal128_ Decimal(25,8),
+Decimal256_ Decimal(76,37)
+)
+ENGINE = Memory;
+
+insert into db_01721.table_decimal_dict
+select number,
+       number / 3,
+       number / 3,
+       number / 3,
+       number / 3
+from numbers(5000);
+
+
+CREATE DICTIONARY IF NOT EXISTS db_01721.decimal_dict (
+	KeyField UInt64 DEFAULT 9999999,
+	Decimal32_ Decimal(5,4) DEFAULT 0.11,
+	Decimal64_ Decimal(18,8) DEFAULT 0.11,
+	Decimal128_ Decimal(25,8) DEFAULT 0.11
+--	,Decimal256_ Decimal256(37) DEFAULT 0.11
+)
+PRIMARY KEY KeyField
+SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_decimal_dict' DB 'db_01721'))
+LIFETIME(0) LAYOUT(SPARSE_HASHED);
+
+select '-------- 42 --------';
+
+SELECT * from db_01721.table_decimal_dict where KeyField = 42;
+
+SELECT * from db_01721.decimal_dict	where KeyField = 42;
+
+SELECT dictGet('db_01721.decimal_dict', 'Decimal32_', toUInt64(42)),
+       dictGet('db_01721.decimal_dict', 'Decimal64_', toUInt64(42)),
+       dictGet('db_01721.decimal_dict', 'Decimal128_', toUInt64(42))
+       -- ,dictGet('db_01721.decimal_dict', 'Decimal256_', toUInt64(42))
+;
+
+
+select '-------- 4999 --------';
+
+SELECT * from db_01721.table_decimal_dict where KeyField = 4999;
+
+SELECT * from db_01721.decimal_dict	where KeyField = 4999;
+
+SELECT dictGet('db_01721.decimal_dict', 'Decimal32_', toUInt64(4999)),
+       dictGet('db_01721.decimal_dict', 'Decimal64_', toUInt64(4999)),
+       dictGet('db_01721.decimal_dict', 'Decimal128_', toUInt64(4999))
+       --,dictGet('db_01721.decimal_dict', 'Decimal256_', toUInt64(4999))
+;
+
+select '-------- 5000 --------';
+
+SELECT * from db_01721.table_decimal_dict where KeyField = 5000;
+
+SELECT * from db_01721.decimal_dict	where KeyField = 5000;
+
+SELECT dictGet('db_01721.decimal_dict', 'Decimal32_', toUInt64(5000)),
+       dictGet('db_01721.decimal_dict', 'Decimal64_', toUInt64(5000)),
+       dictGet('db_01721.decimal_dict', 'Decimal128_', toUInt64(5000))
+       --,dictGet('db_01721.decimal_dict', 'Decimal256_', toUInt64(5000))
+;
+
+drop table if exists table_decimal_dict;
+drop dictionary if exists cache_dict;
+drop database if exists db_01721;
+
diff --git a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference
new file mode 100644
index 00000000000..578661c9194
--- /dev/null
+++ b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference
@@ -0,0 +1,13 @@
+1
+2
+3
+4
+1
+2
+3
+4
+5
+6
+0
+1
+2
diff --git a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql
new file mode 100644
index 00000000000..079b2546a20
--- /dev/null
+++ b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql
@@ -0,0 +1,21 @@
+DROP TABLE IF EXISTS test;
+
+INSERT INTO TABLE FUNCTION file('01721_file/test/data.TSV', 'TSV', 'id UInt32') VALUES (1);
+ATTACH TABLE test FROM '01721_file/test' (id UInt8) ENGINE=File(TSV);
+
+INSERT INTO test VALUES (2), (3);
+INSERT INTO test VALUES (4);
+SELECT * FROM test;
+
+SET engine_file_truncate_on_insert=0;
+
+INSERT INTO test VALUES (5), (6);
+SELECT * FROM test;
+
+SET engine_file_truncate_on_insert=1;
+
+INSERT INTO test VALUES (0), (1), (2);
+SELECT * FROM test;
+
+SET engine_file_truncate_on_insert=0;
+DROP TABLE test;
diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.reference b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.reference
new file mode 100644
index 00000000000..02ae8a37e52
--- /dev/null
+++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.reference
@@ -0,0 +1,20 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
new file mode 100644
index 00000000000..e43b81dca48
--- /dev/null
+++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
@@ -0,0 +1,20 @@
+drop table if exists data_01730;
+
+-- does not use 127.1 due to prefer_localhost_replica
+
+select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 20 settings distributed_group_by_no_merge=0, max_memory_usage='100Mi'; -- { serverError 241 }
+-- no memory limit error, because with distributed_group_by_no_merge=2 remote servers will do ORDER BY and will cut to the LIMIT
+select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 20 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi';
+
+-- since the MergingSortedTransform will start processing only when all ports (remotes) will have some data,
+-- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block,
+-- so the initiator will first receive all blocks from remotes and only after start merging,
+-- and will hit the memory limit.
+select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi'; -- { serverError 241 }
+
+-- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently,
+-- since they don't need to wait until the aggregation will be finished,
+-- and so the query will not hit the memory limit error.
+create table data_01730 engine=MergeTree() order by key as select number key from numbers(1e6);
+select * from remote('127.{2..11}', currentDatabase(), data_01730) group by key order by key limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi', optimize_aggregation_in_order=1 format Null;
+drop table data_01730;
diff --git a/tests/queries/0_stateless/01731_async_task_queue_wait.reference b/tests/queries/0_stateless/01731_async_task_queue_wait.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01731_async_task_queue_wait.sh b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
new file mode 100755
index 00000000000..e0babf3c6ff
--- /dev/null
+++ b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# regression for 'Empty task was returned from async task queue' during query
+# cancellation with async_socket_for_remote=1 (that ignores
+# max_distributed_connections)
+timeout --signal=SIGINT 1 ${CLICKHOUSE_CLIENT} --max_distributed_connections=1 --max_block_size=2  --interactive_delay=900000 -q "select number + sleep(0.3) as x from remote('127.{2,3}', system.numbers) settings max_block_size = 2" 2>&1 | grep "Empty task was returned from async task queue" || true
diff --git a/tests/queries/0_stateless/01732_bigint_ubsan.reference b/tests/queries/0_stateless/01732_bigint_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01732_bigint_ubsan.sql b/tests/queries/0_stateless/01732_bigint_ubsan.sql
new file mode 100644
index 00000000000..238a5d99d30
--- /dev/null
+++ b/tests/queries/0_stateless/01732_bigint_ubsan.sql
@@ -0,0 +1,11 @@
+CREATE TEMPORARY TABLE decimal
+(
+    f dec(38, 38)
+);
+
+INSERT INTO decimal VALUES (0);
+INSERT INTO decimal VALUES (0.42);
+INSERT INTO decimal VALUES (-0.42);
+
+SELECT f + 1048575, f - 21, f - 84, f * 21, f * -21, f / 21, f / 84 FROM decimal WHERE f > 0; -- { serverError 407 }
+SELECT f + -2, f - 21, f - 84, f * 21, f * -21, f / 9223372036854775807, f / 84 FROM decimal WHERE f > 0; -- { serverError 407 }
diff --git a/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.reference b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.reference
new file mode 100644
index 00000000000..4f3181ecce0
--- /dev/null
+++ b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.reference
@@ -0,0 +1,8 @@
+1	2005-03-18 01:58:31.222
+2	2005-03-18 01:58:31.222
+3	2005-03-18 01:58:31.222
+4	2005-03-18 01:58:31.222
+2005-03-18 04:58:31.222
+2005-03-18 04:58:31.222
+2005-03-18 04:58:31.222
+0
diff --git a/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql
new file mode 100644
index 00000000000..dcd874f8c45
--- /dev/null
+++ b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql
@@ -0,0 +1,11 @@
+CREATE TEMPORARY TABLE t (i UInt8, x DateTime64(3, 'UTC'));
+INSERT INTO t VALUES (1, 1111111111222);
+INSERT INTO t VALUES (2, 1111111111.222);
+INSERT INTO t VALUES (3, '1111111111222');
+INSERT INTO t VALUES (4, '1111111111.222');
+SELECT * FROM t ORDER BY i;
+
+SELECT toDateTime64(1111111111.222, 3);
+SELECT toDateTime64('1111111111.222', 3);
+SELECT toDateTime64('1111111111222', 3);
+SELECT ignore(toDateTime64(1111111111222, 3)); -- This gives somewhat correct but unexpected result
diff --git a/tests/queries/0_stateless/01733_transform_ubsan.reference b/tests/queries/0_stateless/01733_transform_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01733_transform_ubsan.sql b/tests/queries/0_stateless/01733_transform_ubsan.sql
new file mode 100644
index 00000000000..256603e9087
--- /dev/null
+++ b/tests/queries/0_stateless/01733_transform_ubsan.sql
@@ -0,0 +1,4 @@
+SELECT arrayStringConcat(arrayMap(x -> transform(x, [1025, -9223372036854775808, 65537, 257, 1048576, 10, 7, 1048575, 65536], ['yandex', 'googlegooglegooglegoogle', 'test', '', '', 'hello', 'world', '', 'xyz'], ''), arrayMap(x -> (x % -inf), range(number))), '')
+FROM system.numbers
+LIMIT 1025
+FORMAT Null;
diff --git a/tests/queries/0_stateless/01734_datetime64_from_float.reference b/tests/queries/0_stateless/01734_datetime64_from_float.reference
new file mode 100644
index 00000000000..32e7d2736c6
--- /dev/null
+++ b/tests/queries/0_stateless/01734_datetime64_from_float.reference
@@ -0,0 +1,7 @@
+-- { echo }
+SELECT CAST(1111111111.222 AS DateTime64(3));
+2005-03-18 04:58:31.222
+SELECT toDateTime(1111111111.222, 3);
+2005-03-18 04:58:31.222
+SELECT toDateTime64(1111111111.222, 3);
+2005-03-18 04:58:31.222
diff --git a/tests/queries/0_stateless/01734_datetime64_from_float.sql b/tests/queries/0_stateless/01734_datetime64_from_float.sql
new file mode 100644
index 00000000000..b6be65cb7c2
--- /dev/null
+++ b/tests/queries/0_stateless/01734_datetime64_from_float.sql
@@ -0,0 +1,4 @@
+-- { echo }
+SELECT CAST(1111111111.222 AS DateTime64(3));
+SELECT toDateTime(1111111111.222, 3);
+SELECT toDateTime64(1111111111.222, 3);
diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 871d429e037..4e523545938 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -189,6 +189,7 @@
 01650_fetch_patition_with_macro_in_zk_path
 01651_bugs_from_15889
 01655_agg_if_nullable
+01658_read_file_to_stringcolumn
 01182_materialized_view_different_structure
 01660_sum_ubsan
 01669_columns_declaration_serde
@@ -196,4 +197,13 @@
 01181_db_atomic_drop_on_cluster
 01658_test_base64Encode_mysql_compatibility
 01659_test_base64Decode_mysql_compatibility
+01674_htm_xml_coarse_parse
+01675_data_type_coroutine
+01676_clickhouse_client_autocomplete
 01671_aggregate_function_group_bitmap_data
+01674_executable_dictionary_implicit_key
+01686_rocksdb
+01683_dist_INSERT_block_structure_mismatch
+01702_bitmap_native_integers
+01686_event_time_microseconds_part_log
+01017_uniqCombined_memory_usage
diff --git a/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py b/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
new file mode 100755
index 00000000000..3ed42f1c820
--- /dev/null
+++ b/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+
+# The protobuf compiler protoc doesn't support encoding or decoding length-delimited protobuf message.
+# To do that this script has been written. 
+
+import argparse
+import os.path
+import struct
+import subprocess
+import sys
+import tempfile
+
+def read_varint(input):
+    res = 0
+    shift = 0
+    while True:
+        c = input.read(1)
+        if len(c) == 0:
+            return None
+        b = c[0]
+        if b < 0x80:
+            res += b << shift
+            break
+        b -= 0x80
+        res += b << shift
+        shift = shift << 7
+    return res
+
+def write_varint(output, value):
+    while True:
+        if value < 0x80:
+            b = value
+            output.write(b.to_bytes(1, byteorder='little'))
+            break
+        b = (value & 0x7F) + 0x80
+        output.write(b.to_bytes(1, byteorder='little'))
+        value = value >> 7
+
+def write_hexdump(output, data):
+    with subprocess.Popen(["hexdump", "-C"], stdin=subprocess.PIPE, stdout=output, shell=False) as proc:
+        proc.communicate(data)
+        if proc.returncode != 0:
+            raise RuntimeError("hexdump returned code " + str(proc.returncode))
+    output.flush()
+
+class FormatSchemaSplitted:
+    def __init__(self, format_schema):
+        self.format_schema = format_schema
+        splitted = self.format_schema.split(':')
+        if len(splitted) < 2:
+            raise RuntimeError('The format schema must have the format "schemafile:MessageType"')
+        path = splitted[0]
+        self.schemadir = os.path.dirname(path)
+        self.schemaname = os.path.basename(path)
+        if not self.schemaname.endswith(".proto"):
+            self.schemaname = self.schemaname + ".proto"
+        self.message_type = splitted[1]
+
+def decode(input, output, format_schema):
+    if not type(format_schema) is FormatSchemaSplitted:
+        format_schema = FormatSchemaSplitted(format_schema)
+    msgindex = 1
+    while True:
+        sz = read_varint(input)
+        if sz is None:
+            break
+        output.write("MESSAGE #{msgindex} AT 0x{msgoffset:08X}\n".format(msgindex=msgindex, msgoffset=input.tell()).encode())
+        output.flush()
+        msg = input.read(sz)
+        if len(msg) < sz:
+            raise EOFError('Unexpected end of file')
+        with subprocess.Popen(["protoc",
+                                "--decode", format_schema.message_type, format_schema.schemaname],
+                              cwd=format_schema.schemadir,
+                              stdin=subprocess.PIPE,
+                              stdout=output,
+                              shell=False) as proc:
+            proc.communicate(msg)
+            if proc.returncode != 0:
+                raise RuntimeError("protoc returned code " + str(proc.returncode))
+        output.flush()
+        msgindex = msgindex + 1
+
+def encode(input, output, format_schema):
+    if not type(format_schema) is FormatSchemaSplitted:
+        format_schema = FormatSchemaSplitted(format_schema)
+    line_offset = input.tell()
+    line = input.readline()
+    while True:
+        if len(line) == 0:
+            break
+        if not line.startswith(b"MESSAGE #"):
+            raise RuntimeError("The line at 0x{line_offset:08X} must start with the text 'MESSAGE #'".format(line_offset=line_offset))
+        msg = b""
+        while True:
+            line_offset = input.tell()
+            line = input.readline()
+            if line.startswith(b"MESSAGE #") or len(line) == 0:
+                break
+            msg += line
+        with subprocess.Popen(["protoc",
+                                "--encode", format_schema.message_type, format_schema.schemaname],
+                              cwd=format_schema.schemadir,
+                              stdin=subprocess.PIPE,
+                              stdout=subprocess.PIPE,
+                              shell=False) as proc:
+            msgbin = proc.communicate(msg)[0]
+            if proc.returncode != 0:
+                raise RuntimeError("protoc returned code " + str(proc.returncode))
+        write_varint(output, len(msgbin))
+        output.write(msgbin)
+        output.flush()
+
+def decode_and_check(input, output, format_schema):
+    input_data = input.read()
+    output.write(b"Binary representation:\n")
+    output.flush()
+    write_hexdump(output, input_data)
+    output.write(b"\n")
+    output.flush()
+
+    with tempfile.TemporaryFile() as tmp_input, tempfile.TemporaryFile() as tmp_decoded, tempfile.TemporaryFile() as tmp_encoded:
+        tmp_input.write(input_data)
+        tmp_input.flush()
+        tmp_input.seek(0)
+        decode(tmp_input, tmp_decoded, format_schema)
+        tmp_decoded.seek(0)
+        decoded_text = tmp_decoded.read()
+        output.write(decoded_text)
+        output.flush()
+        tmp_decoded.seek(0)
+        encode(tmp_decoded, tmp_encoded, format_schema)
+        tmp_encoded.seek(0)
+        encoded_data = tmp_encoded.read()
+
+    if encoded_data == input_data:
+        output.write(b"\nBinary representation is as expected\n")
+        output.flush()
+    else:
+        output.write(b"\nBinary representation differs from the expected one (listed below):\n")
+        output.flush()
+        write_hexdump(output, encoded_data)
+        sys.exit(1)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Encodes or decodes length-delimited protobuf messages.')
+    parser.add_argument('--input', help='The input file, the standard input will be used if not specified.')
+    parser.add_argument('--output', help='The output file, the standard output will be used if not specified')
+    parser.add_argument('--format_schema', required=True, help='Format schema in the format "schemafile:MessageType"')
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('--encode', action='store_true', help='Specify to encode length-delimited messages.'
+                       'The utility will read text-format messages of the given type from the input and write it in binary to the output.')
+    group.add_argument('--decode', action='store_true', help='Specify to decode length-delimited messages.'
+                       'The utility will read messages in binary from the input and write text-format messages to the output.')
+    group.add_argument('--decode_and_check', action='store_true', help='The same as --decode, and the utility will then encode '
+                       ' the decoded data back to the binary form to check that the result of that encoding is the same as the input was.')
+    args = parser.parse_args()
+    
+    custom_input_file = None
+    custom_output_file = None
+    try:
+        if args.input:
+            custom_input_file = open(args.input, "rb")
+        if args.output:
+            custom_output_file = open(args.output, "wb")
+        input = custom_input_file if custom_input_file else sys.stdin.buffer
+        output = custom_output_file if custom_output_file else sys.stdout.buffer
+
+        if args.encode:
+            encode(input, output, args.format_schema)
+        elif args.decode:
+            decode(input, output, args.format_schema)
+        elif args.decode_and_check:
+            decode_and_check(input, output, args.format_schema)
+
+    finally:
+        if custom_input_file:
+            custom_input_file.close()
+        if custom_output_file:
+            custom_output_file.close()
diff --git a/tests/queries/1_stateful/00139_like.sql b/tests/queries/1_stateful/00139_like.sql
index ccc195bc81d..8cb84558407 100644
--- a/tests/queries/1_stateful/00139_like.sql
+++ b/tests/queries/1_stateful/00139_like.sql
@@ -1,4 +1,4 @@
-/* Заметим, что запросы написаны так, как будто пользователь не понимает смысл символа _ в LIKE выражении. */
+/* Note that queries are written as the user doesn't really understand that the symbol _ has special meaning in LIKE pattern. */
 SELECT count() FROM test.hits WHERE URL LIKE '%/avtomobili_s_probegom/_%__%__%__%';
 SELECT count() FROM test.hits WHERE URL LIKE '/avtomobili_s_probegom/_%__%__%__%';
 SELECT count() FROM test.hits WHERE URL LIKE '%_/avtomobili_s_probegom/_%__%__%__%';
diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.reference b/tests/queries/1_stateful/00158_cache_dictionary_has.reference
index f8d5cd4f53d..ad4bce6bec5 100644
--- a/tests/queries/1_stateful/00158_cache_dictionary_has.reference
+++ b/tests/queries/1_stateful/00158_cache_dictionary_has.reference
@@ -1,6 +1,6 @@
+100
 6410
-6410
-25323
+100
 25323
-1774655
+100
 1774655
diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.sql b/tests/queries/1_stateful/00158_cache_dictionary_has.sql
index 063e7843fd4..8461728c58e 100644
--- a/tests/queries/1_stateful/00158_cache_dictionary_has.sql
+++ b/tests/queries/1_stateful/00158_cache_dictionary_has.sql
@@ -6,15 +6,15 @@ CREATE DICTIONARY db_dict.cache_hits
 PRIMARY KEY WatchID
 SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits' PASSWORD '' DB 'test'))
 LIFETIME(MIN 300 MAX 600)
-LAYOUT(CACHE(SIZE_IN_CELLS 100000 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000));
+LAYOUT(CACHE(SIZE_IN_CELLS 100 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000));
 
-SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0);
+SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0 LIMIT 100);
 SELECT count() from test.hits PREWHERE WatchID % 1400 == 0;
 
-SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 350 == 0);
+SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 350 == 0 LIMIT 100);
 SELECT count() from test.hits PREWHERE WatchID % 350 == 0;
 
-SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 5 == 0);
+SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 5 == 0 LIMIT 100);
 SELECT count() from test.hits PREWHERE WatchID % 5 == 0;
 
 DROP DICTIONARY IF EXISTS db_dict.cache_hits;
diff --git a/tests/queries/conftest.py b/tests/queries/conftest.py
index 2f19ae7c479..40a9a6b3a2e 100644
--- a/tests/queries/conftest.py
+++ b/tests/queries/conftest.py
@@ -25,6 +25,9 @@ def bin_prefix(cmdopts):
     prefix = 'clickhouse'
     if cmdopts['builddir'] is not None:
         prefix = os.path.join(cmdopts['builddir'], 'programs', prefix)
+    # FIXME: does this hangs the server start for some reason?
+    # if not os.path.isabs(prefix):
+    #     prefix = os.path.abspath(prefix)
     return prefix
 
 
diff --git a/tests/queries/query_test.py b/tests/queries/query_test.py
index c4e7e613175..417a51fe523 100644
--- a/tests/queries/query_test.py
+++ b/tests/queries/query_test.py
@@ -14,13 +14,10 @@ SKIP_LIST = [
     "00987_distributed_stack_overflow",
 
     # just fail
-    "00302_http_compression",
-    "00463_long_sessions_in_http_interface",
     "00505_secure",
     "00505_shard_secure",
-    "00506_union_distributed",  # flaky
     "00646_url_engine",
-    "00821_distributed_storage_with_join_on.sql",  # flaky
+    "00725_memory_tracking",  # BROKEN
     "00834_cancel_http_readonly_queries_on_client_close",
     "00933_test_fix_extra_seek_on_compressed_cache",
     "00965_logs_level_bugfix",
@@ -30,38 +27,34 @@ SKIP_LIST = [
     "01014_lazy_database_concurrent_recreate_reattach_and_show_tables",
     "01018_Distributed__shard_num",
     "01018_ip_dictionary",
-    "01023_materialized_view_query_context",  # flaky
-    "01035_lc_empty_part_bug",  # flaky
-    "01037_polygon_dicts_simple_functions.sh",  # flaky
-    "01046_materialized_view_with_join_over_distributed",  # flaky
     "01050_clickhouse_dict_source_with_subquery",
     "01053_ssd_dictionary",
     "01054_cache_dictionary_overflow_cell",
     "01057_http_compression_prefer_brotli",
     "01080_check_for_error_incorrect_size_of_nested_column",
     "01083_expressions_in_engine_arguments",
-    "01086_odbc_roundtrip",
+    # "01086_odbc_roundtrip",
     "01088_benchmark_query_id",
     "01098_temporary_and_external_tables",
-    "01099_parallel_distributed_insert_select",  # flaky
+    "01099_parallel_distributed_insert_select",
     "01103_check_cpu_instructions_at_startup",
     "01114_database_atomic",
     "01148_zookeeper_path_macros_unfolding",
-    "01193_metadata_loading.sh",  # flaky
-    "01274_alter_rename_column_distributed",  # flaky
+    "01181_db_atomic_drop_on_cluster",  # tcp port in reference
     "01280_ssd_complex_key_dictionary",
     "01293_client_interactive_vertical_multiline",  # expect-test
     "01293_client_interactive_vertical_singleline",  # expect-test
+    "01293_system_distribution_queue",  # FLAKY
     "01293_show_clusters",
     "01294_lazy_database_concurrent_recreate_reattach_and_show_tables",
     "01294_system_distributed_on_cluster",
     "01300_client_save_history_when_terminated",  # expect-test
     "01304_direct_io",
     "01306_benchmark_json",
+    "01035_lc_empty_part_bug",  # FLAKY
     "01320_create_sync_race_condition_zookeeper",
     "01355_CSV_input_format_allow_errors",
     "01370_client_autocomplete_word_break_characters",  # expect-test
-    "01375_storage_file_tsv_csv_with_names_write_prefix",  # flaky
     "01376_GROUP_BY_injective_elimination_dictGet",
     "01393_benchmark_secure_port",
     "01418_custom_settings",
@@ -72,6 +65,7 @@ SKIP_LIST = [
     "01507_clickhouse_server_start_with_embedded_config",
     "01514_distributed_cancel_query_on_error",
     "01520_client_print_query_id",  # expect-test
+    "01526_client_start_and_exit",  # expect-test
     "01527_dist_sharding_key_dictGet_reload",
     "01545_url_file_format_settings",
     "01553_datetime64_comparison",
@@ -79,17 +73,18 @@ SKIP_LIST = [
     "01558_ttest_scipy",
     "01561_mann_whitney_scipy",
     "01582_distinct_optimization",
-    "01586_storage_join_low_cardinality_key",
-    "01599_multiline_input_and_singleline_comments",
-    "01600_benchmark_query",
+    "01599_multiline_input_and_singleline_comments",  # expect-test
     "01601_custom_tld",
-    "01601_proxy_protocol",
+    "01610_client_spawn_editor",  # expect-test
+    "01676_clickhouse_client_autocomplete",  # expect-test (partially)
+    "01683_text_log_deadlock",  # secure tcp
 ]
 
 
 def check_result(result, error, return_code, reference, replace_map):
-    for old, new in replace_map.items():
-        result = result.replace(old.encode('utf-8'), new.encode('utf-8'))
+    if replace_map:
+        for old, new in replace_map.items():
+            result = result.replace(old.encode('utf-8'), new.encode('utf-8'))
 
     if return_code != 0:
         try:
@@ -106,9 +101,9 @@ def check_result(result, error, return_code, reference, replace_map):
             pytrace=False)
 
 
-def run_client(bin_prefix, port, query, reference, replace_map={}):
+def run_client(bin_prefix, port, database, query, reference, replace_map=None):
     # We can't use `text=True` since some tests may return binary data
-    client = subprocess.Popen([bin_prefix + '-client', '--port', str(port), '-m', '-n', '--testmode'],
+    client = subprocess.Popen([bin_prefix + '-client', '--port', str(port), '-d', database, '-m', '-n', '--testmode'],
                               stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     result, error = client.communicate(query.encode('utf-8'))
     assert client.returncode is not None, "Client should exit after processing all queries"
@@ -116,12 +111,13 @@ def run_client(bin_prefix, port, query, reference, replace_map={}):
     check_result(result, error, client.returncode, reference, replace_map)
 
 
-def run_shell(bin_prefix, server, database, path, reference, replace_map={}):
+def run_shell(bin_prefix, server, database, path, reference, replace_map=None):
     env = {
         'CLICKHOUSE_BINARY': bin_prefix,
         'CLICKHOUSE_DATABASE': database,
         'CLICKHOUSE_PORT_TCP': str(server.tcp_port),
         'CLICKHOUSE_PORT_TCP_SECURE': str(server.tcps_port),
+        'CLICKHOUSE_PORT_TCP_WITH_PROXY': str(server.proxy_port),
         'CLICKHOUSE_PORT_HTTP': str(server.http_port),
         'CLICKHOUSE_PORT_INTERSERVER': str(server.inter_port),
         'CLICKHOUSE_TMP': server.tmp_dir,
@@ -136,6 +132,7 @@ def run_shell(bin_prefix, server, database, path, reference, replace_map={}):
 
 def random_str(length=10):
     alphabet = string.ascii_lowercase + string.digits
+    random.seed(os.urandom(8))
     return ''.join(random.choice(alphabet) for _ in range(length))
 
 
@@ -159,17 +156,18 @@ def test_sql_query(bin_prefix, sql_query, standalone_server):
         reference = file.read()
 
     random_name = 'test_{random}'.format(random=random_str())
-    query = 'CREATE DATABASE {random}; USE {random}; {query}'.format(random=random_name, query=query)
-    run_client(bin_prefix, tcp_port, query, reference, {random_name: 'default'})
+    run_client(bin_prefix, tcp_port, 'default', 'CREATE DATABASE {random};'.format(random=random_name), b'')
+
+    run_client(bin_prefix, tcp_port, random_name, query, reference, {random_name: 'default'})
 
     query = "SELECT 'SHOW ORPHANED TABLES'; SELECT name FROM system.tables WHERE database != 'system' ORDER BY (database, name);"
-    run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED TABLES\n')
+    run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED TABLES\n')
 
     query = 'DROP DATABASE {random};'.format(random=random_name)
-    run_client(bin_prefix, tcp_port, query, b'')
+    run_client(bin_prefix, tcp_port, 'default', query, b'')
 
     query = "SELECT 'SHOW ORPHANED DATABASES'; SHOW DATABASES;"
-    run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n')
+    run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n')
 
 
 def test_shell_query(bin_prefix, shell_query, standalone_server):
@@ -191,15 +189,15 @@ def test_shell_query(bin_prefix, shell_query, standalone_server):
 
     random_name = 'test_{random}'.format(random=random_str())
     query = 'CREATE DATABASE {random};'.format(random=random_name)
-    run_client(bin_prefix, tcp_port, query, b'')
+    run_client(bin_prefix, tcp_port, 'default', query, b'')
 
     run_shell(bin_prefix, standalone_server, random_name, shell_path, reference, {random_name: 'default'})
 
     query = "SELECT 'SHOW ORPHANED TABLES'; SELECT name FROM system.tables WHERE database != 'system' ORDER BY (database, name);"
-    run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED TABLES\n')
+    run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED TABLES\n')
 
     query = 'DROP DATABASE {random};'.format(random=random_name)
-    run_client(bin_prefix, tcp_port, query, b'')
+    run_client(bin_prefix, tcp_port, 'default', query, b'')
 
     query = "SELECT 'SHOW ORPHANED DATABASES'; SHOW DATABASES;"
-    run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n')
+    run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n')
diff --git a/tests/queries/server.py b/tests/queries/server.py
index 599de2400e3..ed12931e658 100644
--- a/tests/queries/server.py
+++ b/tests/queries/server.py
@@ -37,6 +37,7 @@ class ServerThread(threading.Thread):
         self.tcps_port = port_base + 4
         self.https_port = port_base + 5
         self.odbc_port = port_base + 6
+        self.proxy_port = port_base + 7
 
         self._args = [
             '--config-file={config_path}'.format(config_path=self.server_config),
@@ -44,6 +45,7 @@ class ServerThread(threading.Thread):
             '--tcp_port={tcp_port}'.format(tcp_port=self.tcp_port),
             '--http_port={http_port}'.format(http_port=self.http_port),
             '--interserver_http_port={inter_port}'.format(inter_port=self.inter_port),
+            '--tcp_with_proxy_port={proxy_port}'.format(proxy_port=self.proxy_port),
             # TODO: SSL certificate is not specified '--tcp_port_secure={tcps_port}'.format(tcps_port=self.tcps_port),
         ]
 
@@ -76,8 +78,8 @@ class ServerThread(threading.Thread):
                     print('Successful server response:', s.recv(1024))  # FIXME: read whole buffered response
                     s.shutdown(socket.SHUT_RDWR)
                     s.close()
-                except Exception as e:
-                    print('Failed to connect to server:', e, file=sys.stderr)
+                except Exception:
+                    # Failed to connect to server - try again
                     continue
                 else:
                     break
@@ -96,6 +98,10 @@ class ServerThread(threading.Thread):
 
         self._lock.release()
 
+        if not retries:
+            print('Failed to start server', file=sys.stderr)
+            return
+
         while self._proc.returncode is None:
             self._proc.communicate()
 
@@ -297,6 +303,10 @@ ServerThread.DEFAULT_SERVER_CONFIG = \
         <implementation>testkeeper</implementation>
     </zookeeper>
 
+    <distributed_ddl>
+        <path>/clickhouse/task_queue/ddl</path>
+    </distributed_ddl>
+
     <part_log>
         <database>system</database>
         <table>part_log</table>
@@ -1112,6 +1122,136 @@ ServerThread.DEFAULT_DICTIONARIES_CONFIG = \
             </attribute>
         </structure>
     </dictionary>
+
+    <dictionary>
+        <name>simple_executable_cache_dictionary_no_implicit_key</name>
+        <structure>
+            <id>
+                <name>id</name>
+                <type>UInt64</type>
+            </id>
+
+            <attribute>
+                <name>value</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </structure>
+        <source>
+            <executable>
+                <command>echo "1\tValue"</command>
+                <format>TabSeparated</format>
+                <implicit_key>false</implicit_key>
+            </executable>
+        </source>
+        <layout>
+            <cache>
+                <size_in_cells>10000</size_in_cells>
+            </cache>
+        </layout>
+        <lifetime>300</lifetime>
+    </dictionary>
+
+    <dictionary>
+        <name>simple_executable_cache_dictionary_implicit_key</name>
+        <structure>
+            <id>
+                <name>id</name>
+                <type>UInt64</type>
+            </id>
+
+            <attribute>
+                <name>value</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </structure>
+        <source>
+            <executable>
+                <command>echo "Value"</command>
+                <format>TabSeparated</format>
+                <implicit_key>true</implicit_key>
+            </executable>
+        </source>
+        <layout>
+            <cache>
+                <size_in_cells>10000</size_in_cells>
+            </cache>
+        </layout>
+        <lifetime>300</lifetime>
+    </dictionary>
+
+    <dictionary>
+        <name>complex_executable_cache_dictionary_no_implicit_key</name>
+        <structure>
+            <key>
+                <attribute>
+                    <name>id</name>
+                    <type>UInt64</type>
+                    <null_value></null_value>
+                </attribute>
+                <attribute>
+                    <name>id_key</name>
+                    <type>String</type>
+                    <null_value></null_value>
+                </attribute>
+            </key>
+            <attribute>
+                <name>value</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </structure>
+        <source>
+            <executable>
+                <command>echo "1\tFirstKey\tValue"</command>
+                <format>TabSeparated</format>
+                <implicit_key>false</implicit_key>
+            </executable>
+        </source>
+        <layout>
+            <complex_key_cache>
+                <size_in_cells>10000</size_in_cells>
+            </complex_key_cache>
+        </layout>
+        <lifetime>300</lifetime>
+    </dictionary>
+
+    <dictionary>
+        <name>complex_executable_cache_dictionary_implicit_key</name>
+        <structure>
+            <key>
+                <attribute>
+                    <name>id</name>
+                    <type>UInt64</type>
+                    <null_value></null_value>
+                </attribute>
+                <attribute>
+                    <name>id_key</name>
+                    <type>String</type>
+                    <null_value></null_value>
+                </attribute>
+            </key>
+            <attribute>
+                <name>value</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </structure>
+        <source>
+            <executable>
+                <command>echo "Value"</command>
+                <format>TabSeparated</format>
+                <implicit_key>true</implicit_key>
+            </executable>
+        </source>
+        <layout>
+            <complex_key_cache>
+                <size_in_cells>10000</size_in_cells>
+            </complex_key_cache>
+        </layout>
+        <lifetime>300</lifetime>
+    </dictionary>
 </yandex>
 """
 
diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh
index 0ca2cee3c77..d20b5669cc5 100644
--- a/tests/queries/shell_config.sh
+++ b/tests/queries/shell_config.sh
@@ -8,9 +8,12 @@ export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL
 [ -v CLICKHOUSE_CONFIG_CLIENT ] && CLICKHOUSE_CLIENT_OPT0+=" --config-file=${CLICKHOUSE_CONFIG_CLIENT} "
 [ -v CLICKHOUSE_HOST ] && CLICKHOUSE_CLIENT_OPT0+=" --host=${CLICKHOUSE_HOST} "
 [ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} "
+[ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_BENCHMARK_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} "
 [ -v CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} "
 [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
+[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_CLIENT_OPT0+=" --log_comment='${CLICKHOUSE_LOG_COMMENT}' "
 [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
+[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment='${CLICKHOUSE_LOG_COMMENT}' "
 
 export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"}
 [ -x "$CLICKHOUSE_BINARY-client" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client}
@@ -51,14 +54,18 @@ export CLICKHOUSE_PORT_HTTP=${CLICKHOUSE_PORT_HTTP:="8123"}
 export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=$(${CLICKHOUSE_EXTRACT_CONFIG} --try --key=https_port 2>/dev/null)} 2>/dev/null
 export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:="8443"}
 export CLICKHOUSE_PORT_HTTP_PROTO=${CLICKHOUSE_PORT_HTTP_PROTO:="http"}
+export CLICKHOUSE_PORT_MYSQL=${CLICKHOUSE_PORT_MYSQL:=$(${CLICKHOUSE_EXTRACT_CONFIG} --try --key=mysql_port 2>/dev/null)} 2>/dev/null
+export CLICKHOUSE_PORT_MYSQL=${CLICKHOUSE_PORT_MYSQL:="9004"}
 
-# Add database to url params
+# Add database and log comment to url params
 if [ -v CLICKHOUSE_URL_PARAMS ]
 then
   export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&database=${CLICKHOUSE_DATABASE}"
 else
   export CLICKHOUSE_URL_PARAMS="database=${CLICKHOUSE_DATABASE}"
 fi
+# Note: missing url encoding of the log comment.
+[ -v CLICKHOUSE_LOG_COMMENT ] && export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}"
 
 export CLICKHOUSE_URL=${CLICKHOUSE_URL:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/"}
 export CLICKHOUSE_URL_HTTPS=${CLICKHOUSE_URL_HTTPS:="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/"}
@@ -82,6 +89,17 @@ export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} -q -s --ma
 export CLICKHOUSE_TMP=${CLICKHOUSE_TMP:="."}
 mkdir -p ${CLICKHOUSE_TMP}
 
+export MYSQL_CLIENT_BINARY=${MYSQL_CLIENT_BINARY:="mysql"}
+export MYSQL_CLIENT_CLICKHOUSE_USER=${MYSQL_CLIENT_CLICKHOUSE_USER:="default"}
+# Avoids "Can't connect to local MySQL server through socket '/var/run/mysqld/mysqld.sock'" when connecting to localhost
+[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --protocol tcp "
+[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --host ${CLICKHOUSE_HOST} "
+[ -v CLICKHOUSE_PORT_MYSQL ] && MYSQL_CLIENT_OPT0+=" --port ${CLICKHOUSE_PORT_MYSQL} "
+[ -v CLICKHOUSE_DATABASE ] && MYSQL_CLIENT_OPT0+=" --database ${CLICKHOUSE_DATABASE} "
+MYSQL_CLIENT_OPT0+=" --user ${MYSQL_CLIENT_CLICKHOUSE_USER} "
+export MYSQL_CLIENT_OPT="${MYSQL_CLIENT_OPT0:-} ${MYSQL_CLIENT_OPT:-}"
+export MYSQL_CLIENT=${MYSQL_CLIENT:="$MYSQL_CLIENT_BINARY ${MYSQL_CLIENT_OPT:-}"}
+
 function clickhouse_client_removed_host_parameter()
 {
     # removing only `--host=value` and `--host value` (removing '-hvalue' feels to dangerous) with python regex.
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 8ed1e890cf1..39ec8bac3cf 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -17,7 +17,8 @@
         "functions_bad_arguments", /// Too long for TSan
         "01603_read_with_backoff_bug", /// Too long for TSan
         "01646_system_restart_replicas_smoke", /// RESTART REPLICAS can acquire too much locks, while only 64 is possible from one thread under TSan
-        "01641_memory_tracking_insert_optimize" /// INSERT lots of rows is too heavy for TSan
+        "01641_memory_tracking_insert_optimize", /// INSERT lots of rows is too heavy for TSan
+        "01017_uniqCombined_memory_usage" /// Fine thresholds on memory usage
     ],
     "address-sanitizer": [
         "00877",
@@ -27,7 +28,8 @@
         "01103_check_cpu_instructions_at_startup",
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
-        "01193_metadata_loading"
+        "01193_metadata_loading",
+        "01017_uniqCombined_memory_usage" /// Fine thresholds on memory usage
     ],
     "ub-sanitizer": [
         "capnproto",
@@ -48,7 +50,8 @@
         "00877_memory_limit_for_new_delete", /// memory limits don't work correctly under msan because it replaces malloc/free
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
-        "01193_metadata_loading"
+        "01193_metadata_loading",
+        "01017_uniqCombined_memory_usage" /// Fine thresholds on memory usage
     ],
     "debug-build": [
         "query_profiler",
@@ -90,6 +93,7 @@
         "01300_client_save_history_when_terminated",
         "orc_output",
         "01370_client_autocomplete_word_break_characters",
+        "01676_clickhouse_client_autocomplete",
         "01193_metadata_loading",
         "01455_time_zones"
     ],
@@ -101,8 +105,165 @@
         "00510_materizlized_view_and_deduplication_zookeeper",
         "00738_lock_for_inner_table"
     ],
+    "database-replicated": [
+        /// Tests with DETACH TABLE (it's not allowed)
+        /// and tests with SET (session and query settings are not supported)
+        "memory_tracking",
+        "memory_usage",
+        "live_view",
+        "01413_alter_update_supertype",
+        "01149_zookeeper_mutation_stuck_after_replace_partition",
+        "00836_indices_alter_replicated_zookeeper",
+        "00652_mutations_alter_update",
+        "01715_tuple_insert_null_as_default",
+        "00825_protobuf_format_map",
+        "00152_insert_different_granularity",
+        "01715_background_checker_blather_zookeeper",
+        "01714_alter_drop_version",
+        "01114_materialize_clear_index_compact_parts",
+        "00814_replicated_minimalistic_part_header_zookeeper",
+        "01188_attach_table_from_pat",
+        "01415_sticking_mutations",
+        "01130_in_memory_parts",
+        "01110_dictionary_layout_without_arguments",
+        "01018_ddl_dictionaries_create",
+        "01018_ddl_dictionaries_select",
+        "01414_freeze_does_not_prevent_alters",
+        "01018_ddl_dictionaries_bad_queries",
+        "01686_rocksdb",
+        "01550_mutation_subquery",
+        "01070_mutations_with_dependencies",
+        "01070_materialize_ttl",
+        "01055_compact_parts",
+        "01017_mutations_with_nondeterministic_functions_zookeeper",
+        "00926_adaptive_index_granularity_pk",
+        "00910_zookeeper_test_alter_compression_codecs",
+        "00908_bloom_filter_index",
+        "00616_final_single_part",
+        "00446_clear_column_in_partition_zookeeper",
+        "01533_multiple_nested",
+        "01213_alter_rename_column_zookeeper",
+        "01575_disable_detach_table_of_dictionary",
+        "01457_create_as_table_function_structure",
+        "01415_inconsistent_merge_tree_settings",
+        "01413_allow_non_metadata_alters",
+        "01378_alter_rename_with_ttl_zookeeper",
+        "01349_mutation_datetime_key",
+        "01325_freeze_mutation_stuck",
+        "01272_suspicious_codecs",
+        "01181_db_atomic_drop_on_cluster",
+        "00957_delta_diff_bug",
+        "00910_zookeeper_custom_compression_codecs_replicated",
+        "00899_long_attach_memory_limit",
+        "00804_test_custom_compression_codes_log_storages",
+        "00804_test_alter_compression_codecs",
+        "00804_test_delta_codec_no_type_alter",
+        "00804_test_custom_compression_codecs",
+        "00753_alter_attach",
+        "00715_fetch_merged_or_mutated_part_zookeeper",
+        "00688_low_cardinality_serialization",
+        "01575_disable_detach_table_of_dictionary",
+        "00738_lock_for_inner_table",
+        "01666_blns",
+        "01652_ignore_and_low_cardinality",
+        "01651_map_functions",
+        "01650_fetch_patition_with_macro_in_zk_path",
+        "01648_mutations_and_escaping",
+        "01640_marks_corruption_regression",
+        "01622_byte_size",
+        "01611_string_to_low_cardinality_key_alter",
+        "01602_show_create_view",
+        "01600_log_queries_with_extensive_info",
+        "01560_ttl_remove_empty_parts",
+        "01554_bloom_filter_index_big_integer_uuid",
+        "01550_type_map_formats_input",
+        "01550_type_map_formats",
+        "01550_create_map_type",
+        "01532_primary_key_without_order_by_zookeeper",
+        "01511_alter_version_versioned_collapsing_merge_tree_zookeeper",
+        "01509_parallel_quorum_insert_no_replicas",
+        "01504_compression_multiple_streams",
+        "01494_storage_join_persistency",
+        "01493_storage_set_persistency",
+        "01493_alter_remove_properties_zookeeper",
+        "01475_read_subcolumns_storages",
+        "01475_read_subcolumns",
+        "01451_replicated_detach_drop_part",
+        "01451_detach_drop_part",
+        "01440_big_int_exotic_casts",
+        "01430_modify_sample_by_zookeeper",
+        "01417_freeze_partition_verbose_zookeeper",
+        "01417_freeze_partition_verbose",
+        "01396_inactive_replica_cleanup_nodes_zookeeper",
+        "01375_compact_parts_codecs",
+        "01357_version_collapsing_attach_detach_zookeeper",
+        "01355_alter_column_with_order",
+        "01291_geo_types",
+        "01270_optimize_skip_unused_shards_low_cardinality",
+        "01182_materialized_view_different_structure",
+        "01150_ddl_guard_rwr",
+        "01148_zookeeper_path_macros_unfolding",
+        "01135_default_and_alter_zookeeper",
+        "01130_in_memory_parts_partitons",
+        "01127_month_partitioning_consistency_select",
+        "01114_database_atomic",
+        "01083_expressions_in_engine_arguments",
+        "01073_attach_if_not_exists",
+        "01072_optimize_skip_unused_shards_const_expr_eval",
+        "01071_prohibition_secondary_index_with_old_format_merge_tree",
+        "01062_alter_on_mutataion_zookeeper",
+        "01060_shutdown_table_after_detach",
+        "01056_create_table_as",
+        "01035_avg",
+        "01021_only_tuple_columns",
+        "01019_alter_materialized_view_query",
+        "01019_alter_materialized_view_consistent",
+        "01019_alter_materialized_view_atomic",
+        "01015_attach_part",
+        "00989_parallel_parts_loading",
+        "00980_zookeeper_merge_tree_alter_settings",
+        "00980_merge_alter_settings",
+        "00955_test_final_mark",
+        "00933_reserved_word",
+        "00926_zookeeper_adaptive_index_granularity_replicated_merge_tree",
+        "00926_adaptive_index_granularity_replacing_merge_tree",
+        "00926_adaptive_index_granularity_merge_tree",
+        "00925_zookeeper_empty_replicated_merge_tree_optimize_final",
+        "00800_low_cardinality_distinct_numeric",
+        "00754_alter_modify_order_by_replicated_zookeeper",
+        "00751_low_cardinality_nullable_group_by",
+        "00751_default_databasename_for_view",
+        "00719_parallel_ddl_table",
+        "00718_low_cardinaliry_alter",
+        "00717_low_cardinaliry_distributed_group_by",
+        "00688_low_cardinality_syntax",
+        "00688_low_cardinality_nullable_cast",
+        "00688_low_cardinality_in",
+        "00652_replicated_mutations_zookeeper",
+        "00634_rename_view",
+        "00626_replace_partition_from_table",
+        "00625_arrays_in_nested",
+        "00623_replicated_truncate_table_zookeeper",
+        "00619_union_highlite",
+        "00599_create_view_with_subquery",
+        "00571_non_exist_database_when_create_materializ_view",
+        "00553_buff_exists_materlized_column",
+        "00516_deduplication_after_drop_partition_zookeeper",
+        "00508_materialized_view_to",
+        "00446_clear_column_in_partition_concurrent_zookeeper",
+        "00423_storage_log_single_thread",
+        "00311_array_primary_key",
+        "00236_replicated_drop_on_non_leader_zookeeper",
+        "00226_zookeeper_deduplication_and_unexpected_parts",
+        "00215_primary_key_order_zookeeper",
+        "00180_attach_materialized_view",
+        "00121_drop_column_zookeeper",
+        "00116_storage_set",
+        "00083_create_merge_tree_zookeeper",
+        "00062_replicated_merge_tree_alter_zookeeper"
+    ],
     "polymorphic-parts": [
-        "01508_partition_pruning", /// bug, shoud be fixed
+        "01508_partition_pruning_long", /// bug, shoud be fixed
         "01482_move_to_prewhere_and_cast" /// bug, shoud be fixed
     ],
     "antlr": [
@@ -121,6 +282,12 @@
         "00763_create_query_as_table_engine_bug",
         "00765_sql_compatibility_aliases",
         "00825_protobuf_format_input",
+        "00825_protobuf_format_nested_optional",
+        "00825_protobuf_format_array_3dim",
+        "00825_protobuf_format_map",
+        "00825_protobuf_format_array_of_arrays",
+        "00825_protobuf_format_table_default",
+        "00825_protobuf_format_enum_mapping",
         "00826_cross_to_inner_join",
         "00834_not_between",
         "00909_kill_not_initialized_query",
@@ -151,6 +318,7 @@
         "01015_attach_part",
         "01015_database_bad_tables",
         "01017_uniqCombined_memory_usage",
+        "01018_ddl_dictionaries_concurrent_requrests",  /// Cannot parse ATTACH DICTIONARY IF NOT EXISTS
         "01019_alter_materialized_view_atomic",
         "01019_alter_materialized_view_consistent",
         "01019_alter_materialized_view_query",
@@ -266,7 +434,7 @@
         "01501_clickhouse_client_INSERT_exception",
         "01504_compression_multiple_streams",
         "01508_explain_header",
-        "01508_partition_pruning",
+        "01508_partition_pruning_long",
         "01509_check_parallel_quorum_inserts",
         "01509_parallel_quorum_and_merge",
         "01515_mv_and_array_join_optimisation_bag",
@@ -280,7 +448,7 @@
         "01530_drop_database_atomic_sync",
         "01532_execute_merges_on_single_replica",
         "01532_primary_key_without_order_by_zookeeper",
-        "01541_max_memory_usage_for_user",
+        "01541_max_memory_usage_for_user_long",
         "01551_mergetree_read_in_order_spread",
         "01552_dict_fixedstring",
         "01554_bloom_filter_index_big_integer_uuid",
@@ -310,18 +478,23 @@
         "01642_if_nullable_regression",
         "01643_system_suspend",
         "01655_plan_optimizations",
-        "01475_read_subcolumns_storages"
+        "01475_read_subcolumns_storages",
+        "01674_clickhouse_client_query_param_cte",
+        "01666_merge_tree_max_query_limit"
     ],
     "parallel":
     [
         /// Pessimistic list of tests which work badly in parallel.
         /// Probably they need better investigation.
         "00062_replicated_merge_tree_alter_zookeeper",
+        "00080_show_tables_and_system_tables",
+        "00101_materialized_views_and_insert_without_explicit_database",
         "00109_shard_totals_after_having",
         "00110_external_sort",
         "00116_storage_set",
         "00121_drop_column_zookeeper",
         "00133_long_shard_memory_tracker_and_exception_safety",
+        "00158_buffer_and_nonexistent_table",
         "00180_attach_materialized_view",
         "00226_zookeeper_deduplication_and_unexpected_parts",
         "00236_replicated_drop_on_non_leader_zookeeper",
@@ -342,13 +515,17 @@
         "00571_non_exist_database_when_create_materializ_view",
         "00575_illegal_column_exception_when_drop_depen_column",
         "00599_create_view_with_subquery",
+        "00604_show_create_database",
         "00612_http_max_query_size",
         "00619_union_highlite",
         "00620_optimize_on_nonleader_replica_zookeeper",
+        "00623_truncate_table",
+        "00623_truncate_table_throw_exception",
         "00625_arrays_in_nested",
         "00626_replace_partition_from_table",
         "00626_replace_partition_from_table_zookeeper",
         "00633_materialized_view_and_too_many_parts_zookeeper",
+        "00643_cast_zookeeper",
         "00652_mergetree_mutations",
         "00652_replicated_mutations_zookeeper",
         "00682_empty_parts_merge",
@@ -357,20 +534,28 @@
         "00699_materialized_view_mutations",
         "00701_rollup",
         "00715_fetch_merged_or_mutated_part_zookeeper",
+        "00716_allow_ddl",
+        "00719_parallel_ddl_db",
+        "00740_database_in_nested_view",
+        "00741_client_comment_multiline",
         "00751_default_databasename_for_view",
         "00753_alter_attach",
         "00754_alter_modify_column_partitions",
         "00754_alter_modify_order_by_replicated_zookeeper",
         "00763_long_lock_buffer_alter_destination_table",
+        "00800_versatile_storage_join",
         "00804_test_alter_compression_codecs",
         "00804_test_custom_compression_codecs",
         "00804_test_custom_compression_codes_log_storages",
         "00804_test_delta_codec_compression",
+        "00815_left_join_on_stepanel",
         "00834_cancel_http_readonly_queries_on_client_close",
         "00834_kill_mutation",
         "00834_kill_mutation_replicated_zookeeper",
         "00840_long_concurrent_select_and_drop_deadlock",
+        "00857_global_joinsavel_table_alias",
         "00899_long_attach_memory_limit",
+        "00910_buffer_prewhere",
         "00910_zookeeper_custom_compression_codecs_replicated",
         "00926_adaptive_index_granularity_merge_tree",
         "00926_adaptive_index_granularity_pk",
@@ -388,49 +573,94 @@
         "00988_constraints_replication_zookeeper",
         "00989_parallel_parts_loading",
         "00993_system_parts_race_condition_drop_zookeeper",
+        "01012_show_tables_limit",
         "01013_sync_replica_timeout_zookeeper",
+        "01014_lazy_database_basic",
         "01014_lazy_database_concurrent_recreate_reattach_and_show_tables",
         "01015_attach_part",
+        "01015_database_bad_tables",
         "01018_ddl_dictionaries_concurrent_requrests",
         "01018_ddl_dictionaries_create",
         "01018_ddl_dictionaries_select",
+        "01018_ddl_dictionaries_special",
+        "01018_dictionaries_from_dictionaries",
+        "01018_ip_dictionary",
         "01021_only_tuple_columns",
+        "01023_materialized_view_query_context",
         "01031_mutations_interpreter_and_context",
         "01033_dictionaries_lifetime",
         "01035_concurrent_move_partition_from_table_zookeeper",
+        "01036_no_superfluous_dict_reload_on_create_database",
+        "01036_no_superfluous_dict_reload_on_create_database_2",
+        "01037_polygon_dicts_correctness_all",
+        "01037_polygon_dicts_correctness_fast",
+        "01037_polygon_dicts_simple_functions",
+        "01038_dictionary_lifetime_min_zero_sec",
+        "01040_dictionary_invalidate_query_switchover_long",
+        "01041_create_dictionary_if_not_exists",
+        "01042_system_reload_dictionary_reloads_completely",
+        "01043_dictionary_attribute_properties_values",
+        "01045_dictionaries_restrictions",
         "01045_zookeeper_system_mutations_with_parts_names",
+        "01048_exists_query",
+        "01053_drop_database_mat_view",
         "01053_ssd_dictionary",
+        "01054_cache_dictionary_bunch_update",
+        "01054_cache_dictionary_overflow_cell",
         "01055_compact_parts_1",
+        "01056_create_table_as",
         "01060_avro",
         "01060_shutdown_table_after_detach",
+        "01069_database_memory",
         "01070_materialize_ttl",
         "01070_modify_ttl",
         "01070_mutations_with_dependencies",
         "01071_live_view_detach_dependency",
         "01071_prohibition_secondary_index_with_old_format_merge_tree",
         "01073_attach_if_not_exists",
+        "01073_show_tables_not_like",
+        "01076_cache_dictionary_datarace_exception_ptr",
         "01076_parallel_alter_replicated_zookeeper",
         "01079_parallel_alter_add_drop_column_zookeeper",
         "01079_parallel_alter_detach_table_zookeeper",
+        "01080_check_for_error_incorrect_size_of_nested_column",
         "01083_expressions_in_engine_arguments",
+        "01084_regexp_empty",
         "01085_max_distributed_connections_http",
         "01092_memory_profiler",
         "01098_temporary_and_external_tables",
+        "01103_distributed_product_mode_local_column_renames",
         "01107_atomic_db_detach_attach",
         "01108_restart_replicas_rename_deadlock_zookeeper",
+        "01109_exchange_tables",
         "01110_dictionary_layout_without_arguments",
+        "01113_local_dictionary_type_conversion",
         "01114_database_atomic",
+        "01114_mysql_database_engine_segfault",
+        "01115_join_with_dictionary",
+        "01125_dict_ddl_cannot_add_column",
         "01127_month_partitioning_consistency_select",
         "01130_in_memory_parts_partitons",
         "01135_default_and_alter_zookeeper",
         "01148_zookeeper_path_macros_unfolding",
+        "01150_ddl_guard_rwr",
+        "01185_create_or_replace_table",
         "01190_full_attach_syntax",
+        "01191_rename_dictionary",
+        "01192_rename_database_zookeeper",
         "01193_metadata_loading",
         "01200_mutations_memory_consumption",
+        "01224_no_superfluous_dict_reload",
+        "01225_drop_dictionary_as_table",
+        "01225_show_create_table_from_dictionary",
+        "01231_distributed_aggregation_memory_efficient_mix_levels",
+        "01232_extremes",
         "01238_http_memory_tracking",
         "01249_bad_arguments_for_bloom_filter",
         "01251_dict_is_in_infinite_loop",
+        "01254_dict_create_without_db",
         "01254_dict_load_after_detach_attach",
+        "01257_dictionary_mismatch_types",
         "01259_dictionary_custom_settings_ddl",
         "01267_alter_default_key_columns_zookeeper",
         "01268_dictionary_direct_layout",
@@ -444,18 +674,26 @@
         "01293_system_distribution_queue",
         "01294_lazy_database_concurrent",
         "01294_lazy_database_concurrent_recreate_reattach_and_show_tables",
+        "01294_system_distributed_on_cluster",
+        "01296_create_row_policy_in_current_database",
+        "01297_create_quota",
         "01305_replica_create_drop_zookeeper",
         "01307_multiple_leaders_zookeeper",
         "01318_long_unsuccessful_mutation_zookeeper",
         "01319_manual_write_to_replicas",
+        "01320_create_sync_race_condition_zookeeper",
         "01338_long_select_and_alter",
         "01338_long_select_and_alter_zookeeper",
         "01355_alter_column_with_order",
         "01355_ilike",
         "01357_version_collapsing_attach_detach_zookeeper",
         "01375_compact_parts_codecs",
+        "01376_GROUP_BY_injective_elimination_dictGet",
         "01378_alter_rename_with_ttl_zookeeper",
+        "01383_remote_ambiguous_column_shard",
         "01388_clear_all_columns",
+        "01391_join_on_dict_crash",
+        "01392_column_resolve",
         "01396_inactive_replica_cleanup_nodes_zookeeper",
         "01412_cache_dictionary_race",
         "01414_mutations_and_errors_zookeeper",
@@ -464,20 +702,48 @@
         "01417_freeze_partition_verbose",
         "01417_freeze_partition_verbose_zookeeper",
         "01430_modify_sample_by_zookeeper",
+        "01444_create_table_drop_database_race",
         "01454_storagememory_data_race_challenge",
+        "01455_rank_correlation_spearman",
         "01456_modify_column_type_via_add_drop_update",
         "01457_create_as_table_function_structure",
         "01459_manual_write_to_replicas",
         "01460_DistributedFilesToInsert",
         "01465_ttl_recompression",
+        "01470_show_databases_like",
         "01471_calculate_ttl_during_merge",
+        "01487_distributed_in_not_default_db",
         "01493_alter_remove_properties_zookeeper",
         "01493_storage_set_persistency",
         "01494_storage_join_persistency",
+        "01501_cache_dictionary_all_fields",
+        "01507_clickhouse_server_start_with_embedded_config",
+        "01509_dictionary_preallocate",
+        "01516_create_table_primary_key",
         "01516_drop_table_stress",
-        "01541_max_memory_usage_for_user",
-        "01646_system_restart_replicas_smoke", // system restart replicas is a global query
+        "01517_drop_mv_with_inner_table",
+        "01526_complex_key_dict_direct_layout",
+        "01527_clickhouse_local_optimize",
+        "01527_dist_sharding_key_dictGet_reload",
+        "01530_drop_database_atomic_sync",
+        "01541_max_memory_usage_for_user_long",
+        "01542_dictionary_load_exception_race",
+        "01575_disable_detach_table_of_dictionary",
+        "01593_concurrent_alter_mutations_kill",
+        "01593_concurrent_alter_mutations_kill_many_replicas",
         "01600_count_of_parts_metrics", // tests global system metrics
+        "01600_detach_permanently",
+        "01600_log_queries_with_extensive_info",
+        "01600_multiple_left_join_with_aliases",
+        "01601_detach_permanently",
+        "01602_show_create_view",
+        "01603_rename_overwrite_bug",
+        "01646_system_restart_replicas_smoke", // system restart replicas is a global query
+        "01656_test_query_log_factories_info",
+        "01669_columns_declaration_serde",
+        "01676_dictget_in_default_expression",
+        "01700_system_zookeeper_path_in",
+        "01715_background_checker_blather_zookeeper",
         "attach",
         "ddl_dictionaries",
         "dictionary",
@@ -485,6 +751,8 @@
         "live_view",
         "memory_leak",
         "memory_limit",
-        "polygon_dicts" // they use an explicitly specified database
+        "polygon_dicts", // they use an explicitly specified database
+        "01658_read_file_to_stringcolumn",
+        "01721_engine_file_truncate_on_insert" // It's ok to execute in parallel but not several instances of the same test.
     ]
 }
diff --git a/tests/testflows/aes_encryption/docker-compose/docker-compose.yml b/tests/testflows/aes_encryption/docker-compose/docker-compose.yml
index 04a51ad7ec0..124b53bf502 100644
--- a/tests/testflows/aes_encryption/docker-compose/docker-compose.yml
+++ b/tests/testflows/aes_encryption/docker-compose/docker-compose.yml
@@ -56,7 +56,7 @@ services:
       zookeeper:
         condition: service_healthy
 
-  # dummy service which does nothing, but allows to postpone 
+  # dummy service which does nothing, but allows to postpone
   # 'docker-compose up -d' till all dependecies will go healthy
   all_services_ready:
     image: hello-world
diff --git a/tests/testflows/example/docker-compose/docker-compose.yml b/tests/testflows/example/docker-compose/docker-compose.yml
index e7e57386dc4..4edb415824f 100644
--- a/tests/testflows/example/docker-compose/docker-compose.yml
+++ b/tests/testflows/example/docker-compose/docker-compose.yml
@@ -20,7 +20,7 @@ services:
       zookeeper:
         condition: service_healthy
 
-  # dummy service which does nothing, but allows to postpone 
+  # dummy service which does nothing, but allows to postpone
   # 'docker-compose up -d' till all dependecies will go healthy
   all_services_ready:
     image: hello-world
diff --git a/tests/testflows/helpers/cluster.py b/tests/testflows/helpers/cluster.py
index 3be79132ec3..5e8717e7a8e 100755
--- a/tests/testflows/helpers/cluster.py
+++ b/tests/testflows/helpers/cluster.py
@@ -26,7 +26,7 @@ class Node(object):
     def repr(self):
         return f"Node(name='{self.name}')"
 
-    def restart(self, timeout=300, safe=True):
+    def restart(self, timeout=300, retries=5):
         """Restart node.
         """
         with self.cluster.lock:
@@ -35,15 +35,20 @@ class Node(object):
                     shell = self.cluster._bash.pop(key)
                     shell.__exit__(None, None, None)
 
-        self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
-    def start(self, timeout=300, safe=True):
+    def start(self, timeout=300, retries=5):
         """Start node.
         """
-        self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
-
-    def stop(self, timeout=300, safe=True):
+    def stop(self, timeout=300, retries=5):
         """Stop node.
         """
         with self.cluster.lock:
@@ -52,7 +57,10 @@ class Node(object):
                     shell = self.cluster._bash.pop(key)
                     shell.__exit__(None, None, None)
 
-        self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
     def command(self, *args, **kwargs):
         return self.cluster.command(self.name, *args, **kwargs)
@@ -71,7 +79,7 @@ class ClickHouseNode(Node):
                     continue
                 assert False, "container is not healthy"
 
-    def stop(self, timeout=300, safe=True):
+    def stop(self, timeout=300, safe=True, retries=5):
         """Stop node.
         """
         if safe:
@@ -89,17 +97,23 @@ class ClickHouseNode(Node):
                     shell = self.cluster._bash.pop(key)
                     shell.__exit__(None, None, None)
 
-        self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
-    def start(self, timeout=300, wait_healthy=True):
+    def start(self, timeout=300, wait_healthy=True, retries=5):
         """Start node.
         """
-        self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
         if wait_healthy:
             self.wait_healthy(timeout)
 
-    def restart(self, timeout=300, safe=True, wait_healthy=True):
+    def restart(self, timeout=300, safe=True, wait_healthy=True, retries=5):
         """Restart node.
         """
         if safe:
@@ -117,7 +131,10 @@ class ClickHouseNode(Node):
                     shell = self.cluster._bash.pop(key)
                     shell.__exit__(None, None, None)
 
-        self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
         if wait_healthy:
             self.wait_healthy(timeout)
diff --git a/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml b/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml
index c8ff683df58..36e25ef766e 100644
--- a/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml
+++ b/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml
@@ -135,7 +135,7 @@ services:
       zookeeper:
         condition: service_healthy
 
-  # dummy service which does nothing, but allows to postpone 
+  # dummy service which does nothing, but allows to postpone
   # 'docker-compose up -d' till all dependecies will go healthy
   all_services_ready:
     image: hello-world
diff --git a/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml b/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml
index 139907c513c..e489637b8c9 100644
--- a/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml
+++ b/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml
@@ -28,7 +28,7 @@ services:
     environment:
       PHPLDAPADMIN_HTTPS=false:
     ports:
-      - "8080:80"      
+      - "8080:80"
     healthcheck:
       test: echo 1
       interval: 10s
@@ -37,4 +37,3 @@ services:
       start_period: 300s
     security_opt:
       - label:disable
-
diff --git a/tests/testflows/ldap/authentication/tests/common.py b/tests/testflows/ldap/authentication/tests/common.py
index 8efb389a23f..7f9f16e827c 100644
--- a/tests/testflows/ldap/authentication/tests/common.py
+++ b/tests/testflows/ldap/authentication/tests/common.py
@@ -270,7 +270,7 @@ def ldap_authenticated_users(*users, config_d_dir="/etc/clickhouse-server/users.
             config = create_ldap_users_config_content(*users, config_d_dir=config_d_dir, config_file=config_file)
         return add_config(config, restart=restart)
 
-def invalid_server_config(servers, message=None, tail=13, timeout=60):
+def invalid_server_config(servers, message=None, tail=30, timeout=60):
     """Check that ClickHouse errors when trying to load invalid LDAP servers configuration file.
     """
     node = current().context.node
@@ -299,7 +299,7 @@ def invalid_server_config(servers, message=None, tail=13, timeout=60):
             with By("removing the config file", description=config.path):
                 node.command(f"rm -rf {config.path}", exitcode=0)
 
-def invalid_user_config(servers, config, message=None, tail=13, timeout=60):
+def invalid_user_config(servers, config, message=None, tail=30, timeout=60):
     """Check that ClickHouse errors when trying to load invalid LDAP users configuration file.
     """
     node = current().context.node
diff --git a/tests/testflows/ldap/authentication/tests/server_config.py b/tests/testflows/ldap/authentication/tests/server_config.py
index 38ec859226b..4053b5f61ed 100644
--- a/tests/testflows/ldap/authentication/tests/server_config.py
+++ b/tests/testflows/ldap/authentication/tests/server_config.py
@@ -245,7 +245,7 @@ def invalid_verification_cooldown_value(self, invalid_value, timeout=20):
         }}
 
     with When("I try to use this configuration then it should not work"):
-        invalid_server_config(servers, message=error_message, tail=17, timeout=timeout)
+        invalid_server_config(servers, message=error_message, tail=30, timeout=timeout)
 
 @TestScenario
 @Requirements(
diff --git a/tests/testflows/ldap/authentication/tests/user_config.py b/tests/testflows/ldap/authentication/tests/user_config.py
index 36ed33ed17a..0f296ea31c6 100644
--- a/tests/testflows/ldap/authentication/tests/user_config.py
+++ b/tests/testflows/ldap/authentication/tests/user_config.py
@@ -39,7 +39,7 @@ def empty_server_name(self, timeout=20):
         "message": "DB::Exception: user1: Authentication failed: password is incorrect or there is no user with such name"
     }]
     config = create_ldap_users_config_content(*users)
-    invalid_user_config(servers, config, message=message, tail=15, timeout=timeout)
+    invalid_user_config(servers, config, message=message, tail=30, timeout=timeout)
 
 @TestScenario
 @Requirements(
@@ -147,7 +147,7 @@ def ldap_and_password(self):
     error_message = "DB::Exception: More than one field of 'password'"
 
     with Then("I expect an error when I try to load the configuration file", description=error_message):
-        invalid_user_config(servers, new_config, message=error_message, tail=16)
+        invalid_user_config(servers, new_config, message=error_message, tail=30)
 
 @TestFeature
 @Name("user config")
diff --git a/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml b/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml
index c8ff683df58..36e25ef766e 100644
--- a/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml
+++ b/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml
@@ -135,7 +135,7 @@ services:
       zookeeper:
         condition: service_healthy
 
-  # dummy service which does nothing, but allows to postpone 
+  # dummy service which does nothing, but allows to postpone
   # 'docker-compose up -d' till all dependecies will go healthy
   all_services_ready:
     image: hello-world
diff --git a/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml b/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml
index 139907c513c..e489637b8c9 100644
--- a/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml
+++ b/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml
@@ -28,7 +28,7 @@ services:
     environment:
       PHPLDAPADMIN_HTTPS=false:
     ports:
-      - "8080:80"      
+      - "8080:80"
     healthcheck:
       test: echo 1
       interval: 10s
@@ -37,4 +37,3 @@ services:
       start_period: 300s
     security_opt:
       - label:disable
-
diff --git a/tests/testflows/ldap/external_user_directory/tests/common.py b/tests/testflows/ldap/external_user_directory/tests/common.py
index e5980640721..23a8d68be0d 100644
--- a/tests/testflows/ldap/external_user_directory/tests/common.py
+++ b/tests/testflows/ldap/external_user_directory/tests/common.py
@@ -133,7 +133,7 @@ def create_entries_ldap_external_user_directory_config_content(entries, config_d
 
     return Config(content, path, name, uid, "config.xml")
 
-def invalid_ldap_external_user_directory_config(server, roles, message, tail=20, timeout=60, config=None):
+def invalid_ldap_external_user_directory_config(server, roles, message, tail=30, timeout=60, config=None):
     """Check that ClickHouse errors when trying to load invalid LDAP external user directory
     configuration file.
     """
diff --git a/tests/testflows/ldap/external_user_directory/tests/server_config.py b/tests/testflows/ldap/external_user_directory/tests/server_config.py
index 4e2e586f77c..8d0d1db976a 100644
--- a/tests/testflows/ldap/external_user_directory/tests/server_config.py
+++ b/tests/testflows/ldap/external_user_directory/tests/server_config.py
@@ -41,7 +41,7 @@ def invalid_host(self):
     RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Invalid("1.0"),
     RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Host("1.0")
 )
-def empty_host(self, tail=20, timeout=60):
+def empty_host(self, tail=30, timeout=60):
     """Check that server returns an error when LDAP server
     host value is empty.
     """
@@ -50,14 +50,14 @@ def empty_host(self, tail=20, timeout=60):
 
     servers = {"foo": {"host": "", "port": "389", "enable_tls": "no"}}
 
-    invalid_server_config(servers, message=message, tail=16, timeout=timeout)
+    invalid_server_config(servers, message=message, tail=30, timeout=timeout)
 
 @TestScenario
 @Requirements(
     RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Invalid("1.0"),
     RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Host("1.0")
 )
-def missing_host(self, tail=20, timeout=60):
+def missing_host(self, tail=30, timeout=60):
     """Check that server returns an error when LDAP server
     host is missing.
     """
@@ -148,7 +148,7 @@ def invalid_enable_tls_value(self, timeout=60):
     servers = {"openldap1": {"host": "openldap1", "port": "389", "enable_tls": "foo",
         "auth_dn_prefix": "cn=", "auth_dn_suffix": ",ou=users,dc=company,dc=com"
     }}
-    invalid_server_config(servers, message=message, tail=18, timeout=timeout)
+    invalid_server_config(servers, message=message, tail=30, timeout=timeout)
 
 @TestScenario
 @Requirements(
@@ -259,7 +259,7 @@ def invalid_verification_cooldown_value(self, invalid_value, timeout=20):
         }}
 
     with When("I try to use this configuration then it should not work"):
-        invalid_server_config(servers, message=error_message, tail=17, timeout=timeout)
+        invalid_server_config(servers, message=error_message, tail=30, timeout=timeout)
 
 @TestScenario
 @Requirements(
diff --git a/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml b/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml
index c8ff683df58..36e25ef766e 100644
--- a/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml
+++ b/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml
@@ -135,7 +135,7 @@ services:
       zookeeper:
         condition: service_healthy
 
-  # dummy service which does nothing, but allows to postpone 
+  # dummy service which does nothing, but allows to postpone
   # 'docker-compose up -d' till all dependecies will go healthy
   all_services_ready:
     image: hello-world
diff --git a/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml b/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml
index 139907c513c..e489637b8c9 100644
--- a/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml
+++ b/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml
@@ -28,7 +28,7 @@ services:
     environment:
       PHPLDAPADMIN_HTTPS=false:
     ports:
-      - "8080:80"      
+      - "8080:80"
     healthcheck:
       test: echo 1
       interval: 10s
@@ -37,4 +37,3 @@ services:
       start_period: 300s
     security_opt:
       - label:disable
-
diff --git a/tests/testflows/ldap/role_mapping/regression.py b/tests/testflows/ldap/role_mapping/regression.py
index fff1e72a945..7afb6c98713 100755
--- a/tests/testflows/ldap/role_mapping/regression.py
+++ b/tests/testflows/ldap/role_mapping/regression.py
@@ -18,7 +18,7 @@ xfails = {
 @Name("role mapping")
 @ArgumentParser(argparser)
 @Specifications(
-    QA_SRS014_ClickHouse_LDAP_Role_Mapping
+    SRS_014_ClickHouse_LDAP_Role_Mapping
 )
 @Requirements(
     RQ_SRS_014_LDAP_RoleMapping("1.0")
diff --git a/tests/testflows/ldap/role_mapping/requirements/requirements.md b/tests/testflows/ldap/role_mapping/requirements/requirements.md
new file mode 100644
index 00000000000..e79baa9cd7c
--- /dev/null
+++ b/tests/testflows/ldap/role_mapping/requirements/requirements.md
@@ -0,0 +1,504 @@
+# SRS-014 ClickHouse LDAP Role Mapping
+# Software Requirements Specification
+
+## Table of Contents
+
+* 1 [Revision History](#revision-history)
+* 2 [Introduction](#introduction)
+* 3 [Terminology](#terminology)
+  * 3.1 [LDAP](#ldap)
+* 4 [Requirements](#requirements)
+  * 4.1 [General](#general)
+    * 4.1.1 [RQ.SRS-014.LDAP.RoleMapping](#rqsrs-014ldaprolemapping)
+    * 4.1.2 [RQ.SRS-014.LDAP.RoleMapping.WithFixedRoles](#rqsrs-014ldaprolemappingwithfixedroles)
+    * 4.1.3 [RQ.SRS-014.LDAP.RoleMapping.Search](#rqsrs-014ldaprolemappingsearch)
+  * 4.2 [Mapped Role Names](#mapped-role-names)
+    * 4.2.1 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithUTF8Characters](#rqsrs-014ldaprolemappingmaprolenamewithutf8characters)
+    * 4.2.2 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.Long](#rqsrs-014ldaprolemappingmaprolenamelong)
+    * 4.2.3 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialXMLCharacters](#rqsrs-014ldaprolemappingmaprolenamewithspecialxmlcharacters)
+    * 4.2.4 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialRegexCharacters](#rqsrs-014ldaprolemappingmaprolenamewithspecialregexcharacters)
+  * 4.3 [Multiple Roles](#multiple-roles)
+    * 4.3.1 [RQ.SRS-014.LDAP.RoleMapping.Map.MultipleRoles](#rqsrs-014ldaprolemappingmapmultipleroles)
+  * 4.4 [LDAP Groups](#ldap-groups)
+    * 4.4.1 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.Removed](#rqsrs-014ldaprolemappingldapgroupremoved)
+    * 4.4.2 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.RemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingldapgroupremovedandaddedparallel)
+    * 4.4.3 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemoved](#rqsrs-014ldaprolemappingldapgroupuserremoved)
+    * 4.4.4 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingldapgroupuserremovedandaddedparallel)
+  * 4.5 [RBAC Roles](#rbac-roles)
+    * 4.5.1 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NotPresent](#rqsrs-014ldaprolemappingrbacrolenotpresent)
+    * 4.5.2 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Added](#rqsrs-014ldaprolemappingrbacroleadded)
+    * 4.5.3 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Removed](#rqsrs-014ldaprolemappingrbacroleremoved)
+    * 4.5.4 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Readded](#rqsrs-014ldaprolemappingrbacrolereadded)
+    * 4.5.5 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingrbacroleremovedandaddedparallel)
+    * 4.5.6 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.New](#rqsrs-014ldaprolemappingrbacrolenew)
+    * 4.5.7 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NewPrivilege](#rqsrs-014ldaprolemappingrbacrolenewprivilege)
+    * 4.5.8 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedPrivilege](#rqsrs-014ldaprolemappingrbacroleremovedprivilege)
+  * 4.6 [Authentication](#authentication)
+    * 4.6.1 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel](#rqsrs-014ldaprolemappingauthenticationparallel)
+    * 4.6.2 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.ValidAndInvalid](#rqsrs-014ldaprolemappingauthenticationparallelvalidandinvalid)
+    * 4.6.3 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.MultipleServers](#rqsrs-014ldaprolemappingauthenticationparallelmultipleservers)
+    * 4.6.4 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalOnly](#rqsrs-014ldaprolemappingauthenticationparallellocalonly)
+    * 4.6.5 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalAndMultipleLDAP](#rqsrs-014ldaprolemappingauthenticationparallellocalandmultipleldap)
+    * 4.6.6 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.SameUser](#rqsrs-014ldaprolemappingauthenticationparallelsameuser)
+  * 4.7 [Server Configuration](#server-configuration)
+    * 4.7.1 [BindDN Parameter](#binddn-parameter)
+      * 4.7.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN](#rqsrs-014ldaprolemappingconfigurationserverbinddn)
+      * 4.7.1.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN](#rqsrs-014ldaprolemappingconfigurationserverbinddnconflictwithauthdn)
+  * 4.8 [External User Directory Configuration](#external-user-directory-configuration)
+    * 4.8.1 [Syntax](#syntax)
+      * 4.8.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingsyntax)
+    * 4.8.2 [Special Characters Escaping](#special-characters-escaping)
+      * 4.8.2.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SpecialCharactersEscaping](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingspecialcharactersescaping)
+    * 4.8.3 [Multiple Sections](#multiple-sections)
+      * 4.8.3.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingmultiplesections)
+      * 4.8.3.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections.IdenticalParameters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingmultiplesectionsidenticalparameters)
+    * 4.8.4 [BaseDN Parameter](#basedn-parameter)
+      * 4.8.4.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.BaseDN](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingbasedn)
+    * 4.8.5 [Attribute Parameter](#attribute-parameter)
+      * 4.8.5.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Attribute](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingattribute)
+    * 4.8.6 [Scope Parameter](#scope-parameter)
+      * 4.8.6.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscope)
+      * 4.8.6.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Base](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluebase)
+      * 4.8.6.3 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.OneLevel](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevalueonelevel)
+      * 4.8.6.4 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Children](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluechildren)
+      * 4.8.6.5 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Subtree](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluesubtree)
+      * 4.8.6.6 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Default](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluedefault)
+    * 4.8.7 [Search Filter Parameter](#search-filter-parameter)
+      * 4.8.7.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SearchFilter](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingsearchfilter)
+    * 4.8.8 [Prefix Parameter](#prefix-parameter)
+      * 4.8.8.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefix)
+      * 4.8.8.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.Default](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixdefault)
+      * 4.8.8.3 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithUTF8Characters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithutf8characters)
+      * 4.8.8.4 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialXMLCharacters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithspecialxmlcharacters)
+      * 4.8.8.5 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialRegexCharacters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithspecialregexcharacters)
+* 5 [References](#references)
+
+## Revision History
+
+This document is stored in an electronic form using [Git] source control management software
+hosted in a [GitHub Repository].
+All the updates are tracked using the [Revision History].
+
+## Introduction
+
+The [SRS-007 ClickHouse Authentication of Users via LDAP] added support for authenticating
+users using an [LDAP] server and the [SRS-009 ClickHouse LDAP External User Directory] added
+support for authenticating users using an [LDAP] external user directory. 
+
+This requirements specification adds additional functionality for mapping [LDAP] groups to 
+the corresponding [ClickHouse] [RBAC] roles when [LDAP] external user directory is configured.
+This functionality will enable easier access management for [LDAP] authenticated users
+as the privileges granted by the roles can be granted or revoked by granting or revoking
+a corresponding [LDAP] group to one or more [LDAP] users.
+
+For the use case when only [LDAP] user authentication is used, the roles can be
+managed using [RBAC] in the same way as for non-[LDAP] authenticated users.
+
+## Terminology
+
+### LDAP
+
+* Lightweight Directory Access Protocol
+
+## Requirements
+
+### General
+
+#### RQ.SRS-014.LDAP.RoleMapping
+version: 1.0
+
+[ClickHouse] SHALL support mapping of [LDAP] groups to [RBAC] roles
+for users authenticated using [LDAP] external user directory.
+
+#### RQ.SRS-014.LDAP.RoleMapping.WithFixedRoles
+version: 1.0
+
+[ClickHouse] SHALL support mapping of [LDAP] groups to [RBAC] roles
+for users authenticated using [LDAP] external user directory when
+one or more roles are specified in the `<roles>` section.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Search
+version: 1.0
+
+[ClickHouse] SHALL perform search on the [LDAP] server and map the results to [RBAC] role names 
+when authenticating users using the [LDAP] external user directory if the `<role_mapping>` section is configured
+as part of the [LDAP] external user directory. The matched roles SHALL be assigned to the user.
+
+### Mapped Role Names
+
+#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithUTF8Characters
+version: 1.0
+
+[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory
+to an [RBAC] role that contains UTF-8 characters.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.Long
+version: 1.0
+
+[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory
+to an [RBAC] role that has a name with more than 128 characters.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialXMLCharacters
+version: 1.0
+
+[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory
+to an [RBAC] role that has a name that contains special characters that need to be escaped in XML.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialRegexCharacters
+version: 1.0
+
+[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory
+to an [RBAC] role that has a name that contains special characters that need to be escaped in regex.
+
+### Multiple Roles
+
+#### RQ.SRS-014.LDAP.RoleMapping.Map.MultipleRoles
+version: 1.0
+
+[ClickHouse] SHALL support mapping one or more [LDAP] search results for users authenticated using 
+[LDAP] external user directory to one or more [RBAC] role.
+
+### LDAP Groups
+
+#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.Removed
+version: 1.0
+
+[ClickHouse] SHALL not assign [RBAC] role(s) for any users authenticated using [LDAP] external user directory
+if the corresponding [LDAP] group(s) that map those role(s) are removed. Any users that have active sessions SHALL still
+have privileges provided by the role(s) until the next time they are authenticated.
+
+#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.RemovedAndAdded.Parallel
+version: 1.0
+
+[ClickHouse] SHALL support authenticating users using [LDAP] external user directory 
+when [LDAP] groups are removed and added 
+at the same time as [LDAP] user authentications are performed in parallel.
+
+#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemoved
+version: 1.0
+
+[ClickHouse] SHALL not assign [RBAC] role(s) for the user authenticated using [LDAP] external user directory
+if the user has been removed from the corresponding [LDAP] group(s) that map those role(s). 
+Any active user sessions SHALL have privileges provided by the role(s) until the next time the user is authenticated.
+
+#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemovedAndAdded.Parallel
+version: 1.0
+
+[ClickHouse] SHALL support authenticating users using [LDAP] external user directory
+when [LDAP] users are added and removed from [LDAP] groups used to map to [RBAC] roles
+at the same time as [LDAP] user authentications are performed in parallel.
+
+### RBAC Roles
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NotPresent
+version: 1.0
+
+[ClickHouse] SHALL not reject authentication attempt using [LDAP] external user directory if any of the roles that are 
+are mapped from [LDAP] but are not present locally.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Added
+version: 1.0
+
+[ClickHouse] SHALL add the privileges provided by the [LDAP] mapped role when the
+role is not present during user authentication using [LDAP] external user directory
+as soon as the role is added.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Removed
+version: 1.0
+
+[ClickHouse] SHALL remove the privileges provided by the role from all the
+users authenticated using [LDAP] external user directory if the [RBAC] role that was mapped
+as a result of [LDAP] search is removed.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Readded
+version: 1.0
+
+[ClickHouse] SHALL reassign the [RBAC] role and add all the privileges provided by the role
+when it is re-added after removal for all [LDAP] users authenticated using external user directory
+for any role that was mapped as a result of [LDAP] search.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedAndAdded.Parallel
+version: 1.0
+
+[ClickHouse] SHALL support authenticating users using [LDAP] external user directory
+when [RBAC] roles that are mapped by [LDAP] groups
+are added and removed at the same time as [LDAP] user authentications are performed in parallel.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.New
+version: 1.0
+
+[ClickHouse] SHALL not allow any new roles to be assigned to any
+users authenticated using [LDAP] external user directory unless the role is specified
+in the configuration of the external user directory or was mapped as a result of [LDAP] search.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NewPrivilege
+version: 1.0
+
+[ClickHouse] SHALL add new privilege to all the users authenticated using [LDAP] external user directory
+when new privilege is added to one of the roles that were mapped as a result of [LDAP] search.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedPrivilege
+version: 1.0
+
+[ClickHouse] SHALL remove privilege from all the users authenticated using [LDAP] external user directory
+when the privilege that was provided by the mapped role is removed from all the roles 
+that were mapped as a result of [LDAP] search.
+
+### Authentication
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel
+version: 1.0
+
+[ClickHouse] SHALL support parallel authentication of users using [LDAP] server
+when using [LDAP] external user directory that has role mapping enabled.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.ValidAndInvalid
+version: 1.0
+
+[ClickHouse] SHALL support authentication of valid users and
+prohibit authentication of invalid users using [LDAP] server
+in parallel without having invalid attempts affecting valid authentications
+when using [LDAP] external user directory that has role mapping enabled.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.MultipleServers
+version: 1.0
+
+[ClickHouse] SHALL support parallel authentication of external [LDAP] users
+authenticated using multiple [LDAP] external user directories that have
+role mapping enabled.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalOnly
+version: 1.0
+
+[ClickHouse] SHALL support parallel authentication of users defined only locally
+when one or more [LDAP] external user directories with role mapping
+are specified in the configuration file.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalAndMultipleLDAP
+version: 1.0
+
+[ClickHouse] SHALL support parallel authentication of local and external [LDAP] users
+authenticated using multiple [LDAP] external user directories with role mapping enabled.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.SameUser
+version: 1.0
+
+[ClickHouse] SHALL support parallel authentication of the same external [LDAP] user
+authenticated using the same [LDAP] external user directory with role mapping enabled.
+
+### Server Configuration
+
+#### BindDN Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN
+version: 1.0
+
+[ClickHouse] SHALL support the `<bind_dn>` parameter in the `<ldap_servers><server_name>` section
+of the `config.xml` that SHALL be used to construct the `DN` to bind to.
+The resulting `DN` SHALL be constructed by replacing all `{user_name}` substrings of the template 
+with the actual user name during each authentication attempt.
+
+For example, 
+
+```xml
+<yandex>
+    <ldap_servers>
+        <my_ldap_server>
+            <!-- ... -->
+            <bind_dn>uid={user_name},ou=users,dc=example,dc=com</bind_dn>
+            <!-- ... -->
+        </my_ldap_server>
+    </ldap_servers>
+</yandex>
+```
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN
+version: 1.0
+
+[ClickHouse] SHALL return an error if both `<bind_dn>` and `<auth_dn_prefix>` or `<auth_dn_suffix>` parameters
+are specified as part of [LDAP] server description in the `<ldap_servers>` section of the `config.xml`.
+
+### External User Directory Configuration
+
+#### Syntax
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax
+version: 1.0
+
+[ClickHouse] SHALL support the `role_mapping` sub-section in the `<user_directories><ldap>` section
+of the `config.xml`.
+
+For example,
+
+```xml
+<yandex>
+    <user_directories>
+        <ldap>
+            <!-- ... -->
+            <role_mapping>
+                <base_dn>ou=groups,dc=example,dc=com</base_dn>
+                <attribute>cn</attribute>
+                <scope>subtree</scope>
+                <search_filter>(&amp;(objectClass=groupOfNames)(member={bind_dn}))</search_filter>
+                <prefix>clickhouse_</prefix>
+            </role_mapping>
+        </ldap>
+    </user_directories>
+</yandex>
+```
+
+#### Special Characters Escaping
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SpecialCharactersEscaping
+version: 1.0
+
+[ClickHouse] SHALL support properly escaped special XML characters that can be present
+as part of the values for different configuration parameters inside the
+`<user_directories><ldap><role_mapping>` section of the `config.xml` such as
+
+* `<search_filter>` parameter
+* `<prefix>` parameter
+
+#### Multiple Sections
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections
+version: 1.0
+
+[ClickHouse] SHALL support multiple `<role_mapping>` sections defined inside the same `<user_directories><ldap>` section 
+of the `config.xml` and all of the `<role_mapping>` sections SHALL be applied.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections.IdenticalParameters
+version: 1.0
+
+[ClickHouse] SHALL not duplicate mapped roles when multiple `<role_mapping>` sections 
+with identical parameters are defined inside the `<user_directories><ldap>` section 
+of the `config.xml`.
+
+#### BaseDN Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.BaseDN
+version: 1.0
+
+[ClickHouse] SHALL support the `<base_dn>` parameter in the `<user_directories><ldap><role_mapping>` section 
+of the `config.xml` that SHALL specify the template to be used to construct the base `DN` for the [LDAP] search.
+
+The resulting `DN` SHALL be constructed by replacing all the `{user_name}` and `{bind_dn}` substrings of 
+the template with the actual user name and bind `DN` during each [LDAP] search.
+
+#### Attribute Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Attribute
+version: 1.0
+
+[ClickHouse] SHALL support the `<attribute>` parameter in the `<user_directories><ldap><role_mapping>` section of 
+the `config.xml` that SHALL specify the name of the attribute whose values SHALL be returned by the [LDAP] search.
+
+#### Scope Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope
+version: 1.0
+
+[ClickHouse] SHALL support the `<scope>` parameter in the `<user_directories><ldap><role_mapping>` section of 
+the `config.xml` that SHALL define the scope of the LDAP search as defined 
+by the https://ldapwiki.com/wiki/LDAP%20Search%20Scopes.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Base
+version: 1.0
+
+[ClickHouse] SHALL support the `base` value for the the `<scope>` parameter in the 
+`<user_directories><ldap><role_mapping>` section of the `config.xml` that SHALL
+limit the scope as specified by the https://ldapwiki.com/wiki/BaseObject.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.OneLevel
+version: 1.0
+
+[ClickHouse] SHALL support the `one_level` value for the the `<scope>` parameter in the 
+`<user_directories><ldap><role_mapping>` section of the `config.xml` that SHALL
+limit the scope as specified by the https://ldapwiki.com/wiki/SingleLevel.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Children
+version: 1.0
+
+[ClickHouse] SHALL support the `children` value for the the `<scope>` parameter in the 
+`<user_directories><ldap><role_mapping>` section of the `config.xml` that SHALL
+limit the scope as specified by the https://ldapwiki.com/wiki/SubordinateSubtree.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Subtree
+version: 1.0
+
+[ClickHouse] SHALL support the `children` value for the the `<scope>` parameter in the 
+`<user_directories><ldap><role_mapping>` section of the `config.xml` that SHALL
+limit the scope as specified by the https://ldapwiki.com/wiki/WholeSubtree.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Default
+version: 1.0
+
+[ClickHouse] SHALL support the `subtree` as the default value for the the `<scope>` parameter in the 
+`<user_directories><ldap><role_mapping>` section of the `config.xml` when the `<scope>` parameter is not specified.
+
+#### Search Filter Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SearchFilter
+version: 1.0
+
+[ClickHouse] SHALL support the `<search_filter>` parameter in the `<user_directories><ldap><role_mapping>`
+section of the `config.xml` that SHALL specify the template used to construct 
+the [LDAP filter](https://ldap.com/ldap-filters/) for the search.
+
+The resulting filter SHALL be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` substrings 
+of the template with the actual user name, bind `DN`, and base `DN` during each the [LDAP] search.
+ 
+#### Prefix Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix
+version: 1.0
+
+[ClickHouse] SHALL support the `<prefix>` parameter in the `<user directories><ldap><role_mapping>`
+section of the `config.xml` that SHALL be expected to be in front of each string in 
+the original list of strings returned by the [LDAP] search. 
+Prefix SHALL be removed from the original strings and resulting strings SHALL be treated as [RBAC] role names. 
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.Default
+version: 1.0
+
+[ClickHouse] SHALL support empty string as the default value of the `<prefix>` parameter in 
+the `<user directories><ldap><role_mapping>` section of the `config.xml`.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithUTF8Characters
+version: 1.0
+
+[ClickHouse] SHALL support UTF8 characters as the value of the `<prefix>` parameter in
+the `<user directories><ldap><role_mapping>` section of the `config.xml`.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialXMLCharacters
+version: 1.0
+
+[ClickHouse] SHALL support XML special characters as the value of the `<prefix>` parameter in
+the `<user directories><ldap><role_mapping>` section of the `config.xml`.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialRegexCharacters
+version: 1.0
+
+[ClickHouse] SHALL support regex special characters as the value of the `<prefix>` parameter in
+the `<user directories><ldap><role_mapping>` section of the `config.xml`.
+
+## References
+
+* **Access Control and Account Management**: https://clickhouse.tech/docs/en/operations/access-rights/
+* **LDAP**: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol
+* **ClickHouse:** https://clickhouse.tech
+* **GitHub Repository**: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
+* **Revision History**: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md 
+* **Git:** https://git-scm.com/
+
+[RBAC]: https://clickhouse.tech/docs/en/operations/access-rights/
+[SRS]: #srs
+[Access Control and Account Management]: https://clickhouse.tech/docs/en/operations/access-rights/
+[SRS-009 ClickHouse LDAP External User Directory]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/external_user_directory/requirements/requirements.md
+[SRS-007 ClickHouse Authentication of Users via LDAP]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/authentication/requirements/requirements.md
+[LDAP]: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol
+[ClickHouse]: https://clickhouse.tech
+[GitHub Repository]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
+[Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
+[Git]: https://git-scm.com/
+[GitHub]: https://github.com
diff --git a/tests/testflows/ldap/role_mapping/requirements/requirements.py b/tests/testflows/ldap/role_mapping/requirements/requirements.py
index ca7192e9dad..b2748762e03 100644
--- a/tests/testflows/ldap/role_mapping/requirements/requirements.py
+++ b/tests/testflows/ldap/role_mapping/requirements/requirements.py
@@ -1,6 +1,6 @@
 # These requirements were auto generated
 # from software requirements specification (SRS)
-# document by TestFlows v1.6.210101.1235930.
+# document by TestFlows v1.6.210129.1222545.
 # Do not edit by hand but re-generate instead
 # using 'tfs requirements generate' command.
 from testflows.core import Specification
@@ -814,15 +814,15 @@ RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithS
     level=4,
     num='4.8.8.5')
 
-QA_SRS014_ClickHouse_LDAP_Role_Mapping = Specification(
-    name='QA-SRS014 ClickHouse LDAP Role Mapping', 
+SRS_014_ClickHouse_LDAP_Role_Mapping = Specification(
+    name='SRS-014 ClickHouse LDAP Role Mapping', 
     description=None,
-    author='vzakaznikov',
-    date='December 4, 2020', 
-    status='-', 
-    approved_by='-',
-    approved_date='-',
-    approved_version='-',
+    author=None,
+    date=None, 
+    status=None, 
+    approved_by=None,
+    approved_date=None,
+    approved_version=None,
     version=None,
     group=None,
     type=None,
@@ -950,27 +950,9 @@ QA_SRS014_ClickHouse_LDAP_Role_Mapping = Specification(
         RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithSpecialRegexCharacters,
         ),
     content='''
-# QA-SRS014 ClickHouse LDAP Role Mapping
+# SRS-014 ClickHouse LDAP Role Mapping
 # Software Requirements Specification
 
-(c) 2020 Altinity LTD. All Rights Reserved.
-
-**Document status:** Confidential
-
-**Author:** vzakaznikov
-
-**Date:** December 4, 2020
-
-## Approval
-
-**Status:** -
-
-**Version:** -
-
-**Approved by:** -
-
-**Date:** -
-
 ## Table of Contents
 
 * 1 [Revision History](#revision-history)
@@ -1046,13 +1028,13 @@ QA_SRS014_ClickHouse_LDAP_Role_Mapping = Specification(
 ## Revision History
 
 This document is stored in an electronic form using [Git] source control management software
-hosted in a [GitLab Repository].
+hosted in a [GitHub Repository].
 All the updates are tracked using the [Revision History].
 
 ## Introduction
 
-The [QA-SRS007 ClickHouse Authentication of Users via LDAP] added support for authenticating
-users using an [LDAP] server and the [QA-SRS009 ClickHouse LDAP External User Directory] added
+The [SRS-007 ClickHouse Authentication of Users via LDAP] added support for authenticating
+users using an [LDAP] server and the [SRS-009 ClickHouse LDAP External User Directory] added
 support for authenticating users using an [LDAP] external user directory. 
 
 This requirements specification adds additional functionality for mapping [LDAP] groups to 
@@ -1457,19 +1439,19 @@ the `<user directories><ldap><role_mapping>` section of the `config.xml`.
 * **Access Control and Account Management**: https://clickhouse.tech/docs/en/operations/access-rights/
 * **LDAP**: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol
 * **ClickHouse:** https://clickhouse.tech
-* **GitLab Repository**: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/blob/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md
-* **Revision History**: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/commits/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md
+* **GitHub Repository**: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
+* **Revision History**: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md 
 * **Git:** https://git-scm.com/
 
 [RBAC]: https://clickhouse.tech/docs/en/operations/access-rights/
 [SRS]: #srs
 [Access Control and Account Management]: https://clickhouse.tech/docs/en/operations/access-rights/
-[QA-SRS009 ClickHouse LDAP External User Directory]: https://gitlab.com/altinity-qa/documents/qa-srs009-clickhouse-ldap-external-user-directory/-/blob/master/QA_SRS009_ClickHouse_LDAP_External_User_Directory.md
-[QA-SRS007 ClickHouse Authentication of Users via LDAP]: https://gitlab.com/altinity-qa/documents/qa-srs007-clickhouse-athentication-of-users-via-ldap/-/blob/master/QA_SRS007_ClickHouse_Authentication_of_Users_via_LDAP.md
+[SRS-009 ClickHouse LDAP External User Directory]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/external_user_directory/requirements/requirements.md
+[SRS-007 ClickHouse Authentication of Users via LDAP]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/authentication/requirements/requirements.md
 [LDAP]: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol
 [ClickHouse]: https://clickhouse.tech
-[GitLab Repository]: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/blob/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md
-[Revision History]: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/commits/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md
+[GitHub Repository]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
+[Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
 [Git]: https://git-scm.com/
-[GitLab]: https://gitlab.com
+[GitHub]: https://github.com
 ''')
diff --git a/tests/testflows/ldap/role_mapping/tests/server_config.py b/tests/testflows/ldap/role_mapping/tests/server_config.py
index 85fe33f4388..8008d9003d7 100644
--- a/tests/testflows/ldap/role_mapping/tests/server_config.py
+++ b/tests/testflows/ldap/role_mapping/tests/server_config.py
@@ -65,7 +65,7 @@ def bind_dn_conflict_with_auth_dn(self, timeout=60):
         }
     }
 
-    invalid_server_config(servers, message=message, tail=18, timeout=timeout)
+    invalid_server_config(servers, message=message, tail=30, timeout=timeout)
 
 
 @TestFeature
@@ -75,4 +75,4 @@ def feature(self, node="clickhouse1"):
     """
     self.context.node = self.context.cluster.node(node)
     for scenario in loads(current_module(), Scenario):
-        scenario()
\ No newline at end of file
+        scenario()
diff --git a/tests/testflows/rbac/docker-compose/docker-compose.yml b/tests/testflows/rbac/docker-compose/docker-compose.yml
index a3f5144c9ed..29f2ef52470 100755
--- a/tests/testflows/rbac/docker-compose/docker-compose.yml
+++ b/tests/testflows/rbac/docker-compose/docker-compose.yml
@@ -57,4 +57,4 @@ services:
       clickhouse3:
         condition: service_healthy
       zookeeper:
-        condition: service_healthy
\ No newline at end of file
+        condition: service_healthy
diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp
index ccdc4cd168c..04dfb56ff08 100644
--- a/utils/check-mysql-binlog/main.cpp
+++ b/utils/check-mysql-binlog/main.cpp
@@ -69,21 +69,27 @@ static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody(
         case DB::MySQLReplication::WRITE_ROWS_EVENT_V1:
         case DB::MySQLReplication::WRITE_ROWS_EVENT_V2:
         {
-            event = std::make_shared<DB::MySQLReplication::WriteRowsEvent>(last_table_map_event, std::move(header));
+            DB::MySQLReplication::RowsEventHeader rows_header(header.type);
+            rows_header.parse(*event_payload);
+            event = std::make_shared<DB::MySQLReplication::WriteRowsEvent>(last_table_map_event, std::move(header), rows_header);
             event->parseEvent(*event_payload);
             break;
         }
         case DB::MySQLReplication::DELETE_ROWS_EVENT_V1:
         case DB::MySQLReplication::DELETE_ROWS_EVENT_V2:
         {
-            event = std::make_shared<DB::MySQLReplication::DeleteRowsEvent>(last_table_map_event, std::move(header));
+            DB::MySQLReplication::RowsEventHeader rows_header(header.type);
+            rows_header.parse(*event_payload);
+            event = std::make_shared<DB::MySQLReplication::DeleteRowsEvent>(last_table_map_event, std::move(header), rows_header);
             event->parseEvent(*event_payload);
             break;
         }
         case DB::MySQLReplication::UPDATE_ROWS_EVENT_V1:
         case DB::MySQLReplication::UPDATE_ROWS_EVENT_V2:
         {
-            event = std::make_shared<DB::MySQLReplication::UpdateRowsEvent>(last_table_map_event, std::move(header));
+            DB::MySQLReplication::RowsEventHeader rows_header(header.type);
+            rows_header.parse(*event_payload);
+            event = std::make_shared<DB::MySQLReplication::UpdateRowsEvent>(last_table_map_event, std::move(header), rows_header);
             event->parseEvent(*event_payload);
             break;
         }
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index 9e2b5fc6fef..f8926a9af2f 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -70,7 +70,11 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
     xargs xmllint --noout --nonet
 
 # FIXME: for now only clickhouse-test
-pylint --score=n $ROOT_PATH/tests/clickhouse-test
+pylint --rcfile=$ROOT_PATH/.pylintrc --score=n $ROOT_PATH/tests/clickhouse-test
+
+find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f |
+    grep -vP $EXCLUDE_DIRS |
+    xargs yamllint --config-file=$ROOT_PATH/.yamllint
 
 # Machine translation to Russian is strictly prohibited
 find $ROOT_PATH/docs/ru -name '*.md' |
@@ -107,7 +111,23 @@ find $ROOT_PATH -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*'
 
 # Check that ya.make files are auto-generated
 "$ROOT_PATH"/utils/generate-ya-make/generate-ya-make.sh
-git status -uno | grep ya.make && echo "ya.make files should be generated with utils/generate-ya-make/generate-ya-make.sh"
+# FIXME: apparently sandbox (don't confuse it with docker) cloning sources
+# using some ancient git version, <2.8, that contains one bug for submodules
+# initialization [1]:
+#
+#    " * A partial rewrite of "git submodule" in the 2.7 timeframe changed
+#        the way the gitdir: pointer in the submodules point at the real
+#        repository location to use absolute paths by accident.  This has
+#        been corrected."
+#
+#  [1]: https://github.com/git/git/blob/cf11a67975b057a144618badf16dc4e3d25b9407/Documentation/RelNotes/2.8.3.txt#L33-L36
+#
+# Due to which "git status" will report the following error:
+#
+#     fatal: not a git repository: /place/sandbox-data/tasks/0/2/882869720/ClickHouse/.git/modules/contrib/AMQP-CPP
+#
+# Anyway this check does not requires any submodule traverse, so it is fine to ignore those errors.
+git status -uno 2> >(grep "fatal: not a git repository: /place/sandbox-data/tasks/.*/ClickHouse/\\.git/modules/contrib") | grep ya.make && echo "ya.make files should be generated with utils/generate-ya-make/generate-ya-make.sh"
 
 # Check that every header file has #pragma once in first line
 find $ROOT_PATH/{src,programs,utils} -name '*.h' |
diff --git a/utils/check-style/check-style-all b/utils/check-style/check-style-all
new file mode 100755
index 00000000000..c34224e5469
--- /dev/null
+++ b/utils/check-style/check-style-all
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+dir=$(dirname $0)
+$dir/check-style -n
+$dir/check-typos
+$dir/check-whitespaces -n
+$dir/check-duplicate-includes.sh
+$dir/shellcheck-run.sh
diff --git a/utils/convert-month-partitioned-parts/main.cpp b/utils/convert-month-partitioned-parts/main.cpp
index bce1e08077c..0a697937eb6 100644
--- a/utils/convert-month-partitioned-parts/main.cpp
+++ b/utils/convert-month-partitioned-parts/main.cpp
@@ -97,6 +97,8 @@ void run(String part_path, String date_column, String dest_path)
     Poco::File(new_tmp_part_path_str + "checksums.txt").setWriteable();
     WriteBufferFromFile checksums_out(new_tmp_part_path_str + "checksums.txt", 4096);
     checksums.write(checksums_out);
+    checksums_in.close();
+    checksums_out.close();
 
     Poco::File(new_tmp_part_path).renameTo(new_part_path.toString());
 }
diff --git a/utils/github/backport.py b/utils/github/backport.py
index 576e3b069c2..7fddbbee241 100644
--- a/utils/github/backport.py
+++ b/utils/github/backport.py
@@ -62,7 +62,7 @@ class Backport:
         RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$')
         RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$')
 
-        # pull-requests are sorted by ancestry from the least recent.
+        # pull-requests are sorted by ancestry from the most recent.
         for pr in pull_requests:
             while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']):
                 logging.info("PR #{} is already inside {}. Dropping this branch for further PRs".format(pr['number'], branches[-1][0]))
diff --git a/utils/github/local.py b/utils/github/local.py
index a997721bc76..2ad8d4b8b71 100644
--- a/utils/github/local.py
+++ b/utils/github/local.py
@@ -6,15 +6,15 @@ import os
 import re
 
 
-class RepositoryBase(object):
+class RepositoryBase:
     def __init__(self, repo_path):
         import git
 
         self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path))
 
-        # commit comparator
+        # comparator of commits
         def cmp(x, y):
-            if x == y:
+            if str(x) == str(y):
                 return 0
             if self._repo.is_ancestor(x, y):
                 return -1
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index fc1cf7c1b67..4ba92864020 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,8 +1,18 @@
+v21.2.4.6-stable	2021-02-20
+v21.2.3.15-stable	2021-02-14
+v21.2.2.8-stable	2021-02-07
+v21.1.5.4-stable	2021-02-20
+v21.1.4.46-stable	2021-02-14
+v21.1.3.32-stable	2021-02-03
 v21.1.2.15-stable	2021-01-18
+v20.12.7.3-stable	2021-02-20
+v20.12.6.29-stable	2021-02-14
+v20.12.5.18-stable	2021-02-03
 v20.12.5.14-stable	2020-12-28
 v20.12.4.5-stable	2020-12-24
 v20.12.3.3-stable	2020-12-09
 v20.12.2.1-stable	2020-12-09
+v20.11.7.16-stable	2021-02-03
 v20.11.6.6-stable	2020-12-24
 v20.11.5.18-stable	2020-12-06
 v20.11.4.13-stable	2020-11-20
@@ -20,6 +30,7 @@ v20.9.5.5-stable	2020-11-13
 v20.9.4.76-stable	2020-10-29
 v20.9.3.45-stable	2020-10-09
 v20.9.2.20-stable	2020-09-22
+v20.8.13.15-lts	2021-02-20
 v20.8.12.2-lts	2021-01-16
 v20.8.11.17-lts	2020-12-25
 v20.8.10.13-lts	2020-12-24
diff --git a/utils/zookeeper-test/main.cpp b/utils/zookeeper-test/main.cpp
index 8f8aac00866..bfd7df26726 100644
--- a/utils/zookeeper-test/main.cpp
+++ b/utils/zookeeper-test/main.cpp
@@ -127,18 +127,22 @@ void testCreateListWatchEvent(zkutil::ZooKeeper & zk)
 
 void testMultiRequest(zkutil::ZooKeeper & zk)
 {
+    std::cerr << "Testing multi request\n";
     Coordination::Requests requests;
     requests.push_back(zkutil::makeCreateRequest("/data/multirequest", "aaa", zkutil::CreateMode::Persistent));
     requests.push_back(zkutil::makeSetRequest("/data/multirequest", "bbb", -1));
     zk.multi(requests);
+    std::cerr << "Multi executed\n";
 
     try
     {
         requests.clear();
+        std::cerr << "Testing bad multi\n";
         requests.push_back(zkutil::makeCreateRequest("/data/multirequest", "qweqwe", zkutil::CreateMode::Persistent));
         requests.push_back(zkutil::makeSetRequest("/data/multirequest", "bbb", -1));
         requests.push_back(zkutil::makeSetRequest("/data/multirequest", "ccc", -1));
         zk.multi(requests);
+        std::cerr << "Bad multi executed\n";
         std::terminate();
     }
     catch (...)
@@ -147,6 +151,7 @@ void testMultiRequest(zkutil::ZooKeeper & zk)
     }
 
     checkEq(zk, "/data/multirequest", "bbb");
+    std::cerr << "Multi request finished\n";
 }
 
 std::mutex elements_mutex;
diff --git a/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu.json b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu.json
new file mode 100644
index 00000000000..1217adbbff5
--- /dev/null
+++ b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu.json
@@ -0,0 +1,55 @@
+[
+    {
+        "system":       "Yandex Cloud 8vCPU",
+        "system_full":  "Yandex Cloud Broadwell, 8 vCPU (4 threads), 64 GB RAM, 500 GB SSD",
+        "cpu_vendor":   "Intel",
+        "time":         "2021-02-05 00:00:00",
+        "kind":         "cloud",
+        "result":
+        [
+            [0.004, 0.003, 0.003],
+            [0.047, 0.030, 0.021],
+            [0.129, 0.066, 0.067],
+            [0.873, 0.098, 0.095],
+            [0.869, 0.247, 0.257],
+            [1.429, 0.818, 0.768],
+            [0.055, 0.042, 0.043],
+            [0.034, 0.025, 0.024],
+            [1.372, 1.003, 1.051],
+            [1.605, 1.281, 1.209],
+            [0.942, 0.503, 0.483],
+            [0.980, 0.537, 0.558],
+            [2.076, 1.664, 1.635],
+            [3.136, 2.235, 2.171],
+            [2.351, 1.973, 1.974],
+            [2.369, 2.170, 2.133],
+            [6.281, 5.576, 5.498],
+            [3.739, 3.481, 3.354],
+            [10.947, 10.225, 10.271],
+            [0.875, 0.111, 0.108],
+            [10.832, 1.844, 1.877],
+            [12.344, 2.330, 2.227],
+            [22.999, 5.000, 4.903],
+            [20.086, 2.390, 2.278],
+            [3.036, 0.722, 0.673],
+            [1.420, 0.602, 0.578],
+            [3.040, 0.728, 0.714],
+            [10.842, 1.874, 1.783],
+            [9.207, 2.809, 2.705],
+            [2.751, 2.703, 2.714],
+            [2.810, 1.675, 1.568],
+            [6.507, 2.449, 2.505],
+            [15.968, 15.014, 15.318],
+            [13.479, 7.951, 7.702],
+            [13.227, 7.791, 7.699],
+            [2.811, 2.723, 2.549],
+            [0.358, 0.249, 0.273],
+            [0.157, 0.099, 0.101],
+            [0.189, 0.088, 0.080],
+            [0.758, 0.544, 0.525],
+            [0.115, 0.033, 0.027],
+            [0.063, 0.048, 0.023],
+            [0.014, 0.011, 0.008]
+        ]
+    }
+]
diff --git a/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu_s3.json b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu_s3.json
new file mode 100644
index 00000000000..ace2442c86e
--- /dev/null
+++ b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu_s3.json
@@ -0,0 +1,55 @@
+[
+    {
+        "system":       "Yandex Cloud 8vCPU Object Storage",
+        "system_full":  "Yandex Cloud Broadwell, 8 vCPU (4 threads), 64 GB RAM, Object Storage",
+        "cpu_vendor":   "Intel",
+        "time":         "2021-02-05 00:00:00",
+        "kind":         "cloud",
+        "result":
+        [
+            [0.007, 0.003, 0.003],
+            [0.214, 0.111, 0.096],
+            [1.239, 1.359, 0.718],
+            [3.056, 3.366, 1.869],
+            [1.946, 1.552, 2.450],
+            [4.804, 2.307, 2.398],
+            [0.198, 0.108, 0.114],
+            [0.141, 0.104, 0.100],
+            [2.755, 2.749, 3.608],
+            [3.140, 3.905, 3.830],
+            [2.353, 4.996, 1.637],
+            [3.796, 1.536, 1.724],
+            [3.565, 3.016, 3.381],
+            [4.962, 4.263, 4.352],
+            [4.210, 3.974, 4.318],
+            [3.884, 3.434, 3.124],
+            [10.451, 9.147, 7.526],
+            [6.288, 5.882, 7.714],
+            [15.239, 33.243, 17.968],
+            [1.645, 1.870, 3.230],
+            [10.980, 8.984, 7.589],
+            [14.345, 11.503, 12.449],
+            [17.687, 17.764, 18.984],
+            [76.606, 65.179, 94.215],
+            [5.833, 3.347, 3.127],
+            [3.815, 2.574, 2.402],
+            [4.916, 6.169, 5.731],
+            [7.961, 9.930, 8.555],
+            [5.995, 7.382, 6.054],
+            [3.113, 4.176, 3.172],
+            [5.077, 5.221, 5.709],
+            [8.990, 9.598, 6.272],
+            [17.832, 17.668, 17.276],
+            [11.846, 14.692, 13.225],
+            [12.544, 12.502, 12.725],
+            [3.604, 4.811, 3.267],
+            [0.738, 0.751, 0.862],
+            [0.718, 0.611, 0.561],
+            [2.125, 0.688, 0.522],
+            [1.469, 1.546, 1.373],
+            [1.382, 1.069, 0.976],
+            [1.353, 1.212, 1.119],
+            [0.045, 0.031, 0.041]
+        ]
+    }
+]
diff --git a/website/templates/footer.html b/website/templates/footer.html
index 765ea63d528..1eaf519b58b 100644
--- a/website/templates/footer.html
+++ b/website/templates/footer.html
@@ -8,7 +8,7 @@
         {{ _('ClickHouse source code is published under the Apache 2.0 License.') }}</a> {{ _('Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.') }}
             </div>
             <div class="col-lg-4 text-right">
-                &copy; 2016–2020 <a href="https://yandex.com/company/" rel="external nofollow noreferrer" target="_blank" class="text-muted">{{ _('Yandex LLC') }}</a>
+                &copy; 2016–2021 <a href="https://yandex.com/company/" rel="external nofollow noreferrer" target="_blank" class="text-muted">{{ _('Yandex LLC') }}</a>
             </div>
         </div>
     </div>
diff --git a/website/templates/index/community.html b/website/templates/index/community.html
index e65f9ff0f86..20b09e7318b 100644
--- a/website/templates/index/community.html
+++ b/website/templates/index/community.html
@@ -66,7 +66,7 @@
                 </div>
                 <div class="row mb-3">
                     <div class="col w-100">
-                        <a href="https://join.slack.com/t/clickhousedb/shared_invite/zt-d2zxkf9e-XyxDa_ucfPxzuH4SJIm~Ng"
+                        <a href="https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg"
                             rel="external nofollow noreferrer" target="_blank" class="text-decoration-none">
                             <div class="bg-dark p-4">
                                 <img data-src="/images/index/slack.svg"