diff --git a/.gitmodules b/.gitmodules
index 7a2c5600e65..f9bc8a56a5c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -93,7 +93,7 @@
 	url = https://github.com/ClickHouse-Extras/libunwind.git
 [submodule "contrib/simdjson"]
 	path = contrib/simdjson
-	url = https://github.com/ClickHouse-Extras/simdjson.git
+	url = https://github.com/simdjson/simdjson.git
 [submodule "contrib/rapidjson"]
 	path = contrib/rapidjson
 	url = https://github.com/ClickHouse-Extras/rapidjson
diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h
index 3c895202c09..363f281584e 100644
--- a/base/common/DateLUTImpl.h
+++ b/base/common/DateLUTImpl.h
@@ -1105,11 +1105,11 @@ public:
     }
 
     template <typename DateOrTime>
-    inline LUTIndex addMonthsIndex(DateOrTime v, Int64 delta) const
+    inline LUTIndex NO_SANITIZE_UNDEFINED addMonthsIndex(DateOrTime v, Int64 delta) const
     {
         const Values & values = lut[toLUTIndex(v)];
 
-        Int64 month = static_cast<Int64>(values.month) + delta;
+        Int64 month = values.month + delta;
 
         if (month > 0)
         {
diff --git a/contrib/NuRaft b/contrib/NuRaft
index 3d3683e7775..70468326ad5 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 3d3683e77753cfe015a05fae95ddf418e19f59e1
+Subproject commit 70468326ad5d72e9497944838484c591dae054ea
diff --git a/contrib/replxx b/contrib/replxx
index cdb6e3f2ce4..2b24f14594d 160000
--- a/contrib/replxx
+++ b/contrib/replxx
@@ -1 +1 @@
-Subproject commit cdb6e3f2ce4464225daf9c8beeae7db98d590bdc
+Subproject commit 2b24f14594d7606792b92544bb112a6322ba34d7
diff --git a/contrib/simdjson b/contrib/simdjson
index 3190d66a490..95b4870e20b 160000
--- a/contrib/simdjson
+++ b/contrib/simdjson
@@ -1 +1 @@
-Subproject commit 3190d66a49059092a1753dc35595923debfc1698
+Subproject commit 95b4870e20be5f97d9dcf63b23b1c6f520c366c1
diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile
index 8443eae691b..d9cd68254b7 100644
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@@ -18,6 +18,7 @@ RUN apt-get update \
             clickhouse-client=$version \
             clickhouse-common-static=$version \
             locales \
+            tzdata \
     && rm -rf /var/lib/apt/lists/* /var/cache/debconf \
     && apt-get clean
 
diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile
index 295784a6184..414eb23d044 100644
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@@ -32,6 +32,7 @@ RUN groupadd -r clickhouse --gid=101 \
             clickhouse-server=$version \
             locales \
             wget \
+            tzdata \
     && rm -rf \
         /var/lib/apt/lists/* \
         /var/cache/debconf \
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 0f9de1996ab..cd192c0c9da 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -21,7 +21,9 @@ RUN addgroup -S -g 101 clickhouse \
     && chown clickhouse:clickhouse /var/lib/clickhouse \
     && chown root:clickhouse /var/log/clickhouse-server \
     && chmod +x /entrypoint.sh \
-    && apk add --no-cache su-exec bash \
+    && apk add --no-cache su-exec bash tzdata \
+    && cp /usr/share/zoneinfo/UTC /etc/localtime \
+    && echo "UTC" > /etc/timezone \
     && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
 
 # we need to allow "others" access to clickhouse folder, because docker container
diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh
index 0138a165505..81e04bd7874 100755
--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@@ -46,9 +46,11 @@ DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --
 TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
 USER_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=user_files_path || true)"
 LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.log || true)"
-LOG_DIR="$(dirname "$LOG_PATH" || true)"
+LOG_DIR=""
+if [ -n "$LOG_PATH" ]; then LOG_DIR="$(dirname "$LOG_PATH")"; fi
 ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.errorlog || true)"
-ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH" || true)"
+ERROR_LOG_DIR=""
+if [ -n "$ERROR_LOG_PATH" ]; then ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH")"; fi
 FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"
 
 CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 649f9f812e1..bbd5443ffb6 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -292,6 +292,7 @@ function run_tests
         01318_decrypt                           # Depends on OpenSSL
         01663_aes_msan                          # Depends on OpenSSL
         01667_aes_args_check                    # Depends on OpenSSL
+        01776_decrypt_aead_size_check           # Depends on OpenSSL
         01281_unsucceeded_insert_select_queries_counter
         01292_create_user
         01294_lazy_database_concurrent
diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py
index 3ddaf99b879..4727f485943 100755
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@@ -266,14 +266,13 @@ for query_index in queries_to_run:
 
             try:
                 # Will also detect too long queries during warmup stage
-                res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': 10})
+                res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': args.max_query_seconds})
             except clickhouse_driver.errors.Error as e:
                 # Add query id to the exception to make debugging easier.
                 e.args = (prewarm_id, *e.args)
                 e.message = prewarm_id + ': ' + e.message
                 raise
 
-
             print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
         except KeyboardInterrupt:
             raise
@@ -320,7 +319,7 @@ for query_index in queries_to_run:
 
         for conn_index, c in enumerate(this_query_connections):
             try:
-                res = c.execute(q, query_id = run_id)
+                res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
             except clickhouse_driver.errors.Error as e:
                 # Add query id to the exception to make debugging easier.
                 e.args = (run_id, *e.args)
diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile
index 6bcdc3df5cd..253ca1b729a 100644
--- a/docker/test/sqlancer/Dockerfile
+++ b/docker/test/sqlancer/Dockerfile
@@ -2,7 +2,6 @@
 FROM ubuntu:20.04
 
 RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven python3 --yes --no-install-recommends
-
 RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
 RUN mkdir /sqlancer && \
 	cd /sqlancer && \
diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md
index 1a2ccf3e0dc..8326038407f 100644
--- a/docs/en/engines/table-engines/integrations/postgresql.md
+++ b/docs/en/engines/table-engines/integrations/postgresql.md
@@ -3,7 +3,7 @@ toc_priority: 8
 toc_title: PostgreSQL
 ---
 
-# PosgtreSQL {#postgresql}
+# PostgreSQL {#postgresql}
 
 The PostgreSQL engine allows you to perform `SELECT` queries on data that is stored on a remote PostgreSQL server.
 
diff --git a/docs/en/faq/integration/json-import.md b/docs/en/faq/integration/json-import.md
index 7038cc539d2..3fa026c794a 100644
--- a/docs/en/faq/integration/json-import.md
+++ b/docs/en/faq/integration/json-import.md
@@ -19,7 +19,7 @@ $ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test
 Using [CLI interface](../../interfaces/cli.md):
 
 ``` bash
-$ echo '{"foo":"bar"}'  | clickhouse-client ---query="INSERT INTO test FORMAT JSONEachRow"
+$ echo '{"foo":"bar"}'  | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow"
 ```
 
 Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead.
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index ee2235b7861..5987ba0f676 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -50,7 +50,7 @@ The supported formats are:
 | [Parquet](#data-format-parquet)                                                         | ✔     | ✔      |
 | [Arrow](#data-format-arrow)                                                             | ✔     | ✔      |
 | [ArrowStream](#data-format-arrow-stream)                                                | ✔     | ✔      |
-| [ORC](#data-format-orc)                                                                 | ✔     | ✗      |
+| [ORC](#data-format-orc)                                                                 | ✔     | ✔      |
 | [RowBinary](#rowbinary)                                                                 | ✔     | ✔      |
 | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes)                               | ✔     | ✔      |
 | [Native](#native)                                                                       | ✔     | ✔      |
@@ -1284,32 +1284,33 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e
 
 ## ORC {#data-format-orc}
 
-[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. You can only insert data in this format to ClickHouse.
+[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the [Hadoop](https://hadoop.apache.org/) ecosystem.
 
 ### Data Types Matching {#data_types-matching-3}
 
-The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` queries.
+The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
 
-| ORC data type (`INSERT`) | ClickHouse data type                                |
-|--------------------------|-----------------------------------------------------|
-| `UINT8`, `BOOL`          | [UInt8](../sql-reference/data-types/int-uint.md)    |
-| `INT8`                   | [Int8](../sql-reference/data-types/int-uint.md)     |
-| `UINT16`                 | [UInt16](../sql-reference/data-types/int-uint.md)   |
-| `INT16`                  | [Int16](../sql-reference/data-types/int-uint.md)    |
-| `UINT32`                 | [UInt32](../sql-reference/data-types/int-uint.md)   |
-| `INT32`                  | [Int32](../sql-reference/data-types/int-uint.md)    |
-| `UINT64`                 | [UInt64](../sql-reference/data-types/int-uint.md)   |
-| `INT64`                  | [Int64](../sql-reference/data-types/int-uint.md)    |
-| `FLOAT`, `HALF_FLOAT`    | [Float32](../sql-reference/data-types/float.md)     |
-| `DOUBLE`                 | [Float64](../sql-reference/data-types/float.md)     |
-| `DATE32`                 | [Date](../sql-reference/data-types/date.md)         |
-| `DATE64`, `TIMESTAMP`    | [DateTime](../sql-reference/data-types/datetime.md) |
-| `STRING`, `BINARY`       | [String](../sql-reference/data-types/string.md)     |
-| `DECIMAL`                | [Decimal](../sql-reference/data-types/decimal.md)   |
+| ORC data type (`INSERT`) | ClickHouse data type                                | ORC data type (`SELECT`) |
+|--------------------------|-----------------------------------------------------|--------------------------|
+| `UINT8`, `BOOL`          | [UInt8](../sql-reference/data-types/int-uint.md)    | `UINT8`                  |
+| `INT8`                   | [Int8](../sql-reference/data-types/int-uint.md)     | `INT8`                   |
+| `UINT16`                 | [UInt16](../sql-reference/data-types/int-uint.md)   | `UINT16`                 |
+| `INT16`                  | [Int16](../sql-reference/data-types/int-uint.md)    | `INT16`                  |
+| `UINT32`                 | [UInt32](../sql-reference/data-types/int-uint.md)   | `UINT32`                 |
+| `INT32`                  | [Int32](../sql-reference/data-types/int-uint.md)    | `INT32`                  |
+| `UINT64`                 | [UInt64](../sql-reference/data-types/int-uint.md)   | `UINT64`                 |
+| `INT64`                  | [Int64](../sql-reference/data-types/int-uint.md)    | `INT64`                  |
+| `FLOAT`, `HALF_FLOAT`    | [Float32](../sql-reference/data-types/float.md)     | `FLOAT`                  |
+| `DOUBLE`                 | [Float64](../sql-reference/data-types/float.md)     | `DOUBLE`                 |
+| `DATE32`                 | [Date](../sql-reference/data-types/date.md)         | `DATE32`                 |
+| `DATE64`, `TIMESTAMP`    | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP`              |
+| `STRING`, `BINARY`       | [String](../sql-reference/data-types/string.md)     | `BINARY`                 |
+| `DECIMAL`                | [Decimal](../sql-reference/data-types/decimal.md)   | `DECIMAL`                |
+| `-`                      | [Array](../sql-reference/data-types/array.md)       | `LIST`                   |
 
 ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type.
 
-Unsupported ORC data types: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
+Unsupported ORC data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
 
 The data types of ClickHouse table columns don’t have to match the corresponding ORC data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to the data type set for the ClickHouse table column.
 
@@ -1321,6 +1322,14 @@ You can insert ORC data from a file into ClickHouse table by the following comma
 $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
 ```
 
+### Selecting Data {#selecting-data-2}
+
+You can select data from a ClickHouse table and save them into some file in the ORC format by the following command:
+
+``` bash
+$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.orc}
+```
+
 To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-engines/integrations/hdfs.md).
 
 ## LineAsString {#lineasstring}
diff --git a/docs/en/operations/system-tables/errors.md b/docs/en/operations/system-tables/errors.md
index 72a537f15b9..583cce88ca4 100644
--- a/docs/en/operations/system-tables/errors.md
+++ b/docs/en/operations/system-tables/errors.md
@@ -9,7 +9,7 @@ Columns:
 -   `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened.
 -   `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened.
 -   `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error.
--   `last_error_stacktrace` ([String](../../sql-reference/data-types/string.md)) — stacktrace for the last error.
+-   `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored.
 -   `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query).
 
 **Example**
@@ -25,3 +25,12 @@ LIMIT 1
 │ CANNOT_OPEN_FILE │   76 │     1 │
 └──────────────────┴──────┴───────┘
 ```
+
+``` sql
+WITH arrayMap(x -> demangle(addressToSymbol(x)), last_error_trace) AS all
+SELECT name, arrayStringConcat(all, '\n') AS res
+FROM system.errors
+LIMIT 1
+SETTINGS allow_introspection_functions=1\G
+```
+
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
index 337586a2e10..de6a780235f 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
@@ -320,8 +320,6 @@ Similar to `cache`, but stores data on SSD and index in RAM.
         <write_buffer_size>1048576</write_buffer_size>
         <!-- Path where cache file will be stored. -->
         <path>/var/lib/clickhouse/clickhouse_dictionaries/test_dict</path>
-        <!-- Max number on stored keys in the cache. Rounded up to a power of two. -->
-        <max_stored_keys>1048576</max_stored_keys>
     </ssd_cache>
 </layout>
 ```
@@ -329,8 +327,8 @@ Similar to `cache`, but stores data on SSD and index in RAM.
 or
 
 ``` sql
-LAYOUT(CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
-    PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict MAX_STORED_KEYS 1048576))
+LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
+    PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict))
 ```
 
 ### complex_key_ssd_cache {#complex-key-ssd-cache}
diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index cbf03a44d46..a646347ea60 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -23,7 +23,9 @@ ClickHouse supports the standard grammar for defining windows and window functio
 | `GROUPS` frame | not supported |
 | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
 | `rank()`, `dense_rank()`, `row_number()` | supported |
-| `lag/lead(value, offset)` | not supported, replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`| 
+| `lag/lead(value, offset)` | Not supported. Workarounds: |
+|  | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`|
+|  | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
 
 ## References
 
diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md
index 67cc80f5cd8..f67997b58d6 100644
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@@ -49,7 +49,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
 | [Parquet](#data-format-parquet)                                                         | ✔     | ✔      |
 | [Arrow](#data-format-arrow)                                                             | ✔     | ✔      |
 | [ArrowStream](#data-format-arrow-stream)                                                | ✔     | ✔      |
-| [ORC](#data-format-orc)                                                                 | ✔     | ✗      |
+| [ORC](#data-format-orc)                                                                 | ✔     | ✔      |
 | [RowBinary](#rowbinary)                                                                 | ✔     | ✔      |
 | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes)                               | ✔     | ✔      |
 | [Native](#native)                                                                       | ✔     | ✔      |
@@ -1203,45 +1203,53 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
 
 ## ORC {#data-format-orc}
 
-[Apache ORC](https://orc.apache.org/) - это column-oriented формат данных, распространённый в экосистеме Hadoop. Вы можете только вставлять данные этого формата в ClickHouse.
+[Apache ORC](https://orc.apache.org/) — это столбцовый формат данных, распространенный в экосистеме [Hadoop](https://hadoop.apache.org/).
 
 ### Соответствие типов данных {#sootvetstvie-tipov-dannykh-1}
 
-Таблица показывает поддержанные типы данных и их соответствие [типам данных](../sql-reference/data-types/index.md) ClickHouse для запросов `INSERT`.
+Таблица ниже содержит поддерживаемые типы данных и их соответствие [типам данных](../sql-reference/data-types/index.md) ClickHouse для запросов `INSERT` и `SELECT`.
 
-| Тип данных ORC (`INSERT`) | Тип данных ClickHouse                               |
-|---------------------------|-----------------------------------------------------|
-| `UINT8`, `BOOL`           | [UInt8](../sql-reference/data-types/int-uint.md)    |
-| `INT8`                    | [Int8](../sql-reference/data-types/int-uint.md)     |
-| `UINT16`                  | [UInt16](../sql-reference/data-types/int-uint.md)   |
-| `INT16`                   | [Int16](../sql-reference/data-types/int-uint.md)    |
-| `UINT32`                  | [UInt32](../sql-reference/data-types/int-uint.md)   |
-| `INT32`                   | [Int32](../sql-reference/data-types/int-uint.md)    |
-| `UINT64`                  | [UInt64](../sql-reference/data-types/int-uint.md)   |
-| `INT64`                   | [Int64](../sql-reference/data-types/int-uint.md)    |
-| `FLOAT`, `HALF_FLOAT`     | [Float32](../sql-reference/data-types/float.md)     |
-| `DOUBLE`                  | [Float64](../sql-reference/data-types/float.md)     |
-| `DATE32`                  | [Date](../sql-reference/data-types/date.md)         |
-| `DATE64`, `TIMESTAMP`     | [DateTime](../sql-reference/data-types/datetime.md) |
-| `STRING`, `BINARY`        | [String](../sql-reference/data-types/string.md)     |
-| `DECIMAL`                 | [Decimal](../sql-reference/data-types/decimal.md)   |
+| Тип данных ORC (`INSERT`) | Тип данных ClickHouse                               | Тип данных ORC (`SELECT`) |
+|---------------------------|-----------------------------------------------------|---------------------------|
+| `UINT8`, `BOOL`           | [UInt8](../sql-reference/data-types/int-uint.md)    | `UINT8`                   |
+| `INT8`                    | [Int8](../sql-reference/data-types/int-uint.md)     | `INT8`                    |
+| `UINT16`                  | [UInt16](../sql-reference/data-types/int-uint.md)   | `UINT16`                  |
+| `INT16`                   | [Int16](../sql-reference/data-types/int-uint.md)    | `INT16`                   |
+| `UINT32`                  | [UInt32](../sql-reference/data-types/int-uint.md)   | `UINT32`                  |
+| `INT32`                   | [Int32](../sql-reference/data-types/int-uint.md)    | `INT32`                   |
+| `UINT64`                  | [UInt64](../sql-reference/data-types/int-uint.md)   | `UINT64`                  |
+| `INT64`                   | [Int64](../sql-reference/data-types/int-uint.md)    | `INT64`                   |
+| `FLOAT`, `HALF_FLOAT`     | [Float32](../sql-reference/data-types/float.md)     | `FLOAT`                   |
+| `DOUBLE`                  | [Float64](../sql-reference/data-types/float.md)     | `DOUBLE`                  |
+| `DATE32`                  | [Date](../sql-reference/data-types/date.md)         | `DATE32`                  |
+| `DATE64`, `TIMESTAMP`     | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP`               |
+| `STRING`, `BINARY`        | [String](../sql-reference/data-types/string.md)     | `BINARY`                  |
+| `DECIMAL`                 | [Decimal](../sql-reference/data-types/decimal.md)   | `DECIMAL`                 |
+| `-`                       | [Array](../sql-reference/data-types/array.md)       | `LIST`                    |
 
-ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`.
+ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных ORC `DECIMAL` как `Decimal128`.
 
-Неподдержанные типы данных ORC: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
+Неподдерживаемые типы данных ORC: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
 
-Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных, ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse.
+Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse.
 
 ### Вставка данных {#vstavka-dannykh-1}
 
-Данные ORC можно вставить в таблицу ClickHouse командой:
+Чтобы вставить в ClickHouse данные из файла в формате ORC, используйте команду следующего вида:
 
 ``` bash
 $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
 ```
 
-Для обмена данных с Hadoop можно использовать [движок таблиц HDFS](../engines/table-engines/integrations/hdfs.md).
+### Вывод данных {#vyvod-dannykh-1}
 
+Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида:
+
+``` bash
+$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.orc}
+```
+
+Для обмена данных с экосистемой Hadoop вы можете использовать [движок таблиц HDFS](../engines/table-engines/integrations/hdfs.md).
 
 ## LineAsString {#lineasstring}
 
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
index 1d1e46250e2..285982565c2 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
@@ -318,8 +318,6 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000))
         <write_buffer_size>1048576</write_buffer_size>
         <!-- Path where cache file will be stored. -->
         <path>/var/lib/clickhouse/clickhouse_dictionaries/test_dict</path>
-        <!-- Max number on stored keys in the cache. Rounded up to a power of two. -->
-        <max_stored_keys>1048576</max_stored_keys>
     </ssd_cache>
 </layout>
 ```
@@ -327,8 +325,8 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000))
 или
 
 ``` sql
-LAYOUT(CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
-    PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict MAX_STORED_KEYS 1048576))
+LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
+    PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict))
 ```
 
 ### complex_key_ssd_cache {#complex-key-ssd-cache}
diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md
index 54537b7735d..f9b3e5c3e68 100644
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@@ -672,7 +672,7 @@ neighbor(column, offset[, default_value])
     Функция может получить доступ к значению в столбце соседней строки только внутри обрабатываемого в данный момент блока данных.
 
 Порядок строк, используемый при вычислении функции `neighbor`, может отличаться от порядка строк, возвращаемых пользователю.
-Чтобы этого не случилось, вы можете сделать подзапрос с [ORDER BY](../../sql-reference/statements/select/order-by.md) и вызвать функцию изне подзапроса.
+Чтобы этого не случилось, вы можете сделать подзапрос с [ORDER BY](../../sql-reference/statements/select/order-by.md) и вызвать функцию извне подзапроса.
 
 **Аргументы**
 
diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt
index 470bc5e8719..9605525edbf 100644
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@@ -26,7 +26,7 @@ numpy==1.19.2
 Pygments==2.5.2
 pymdown-extensions==8.0
 python-slugify==4.0.1
-PyYAML==5.3.1
+PyYAML==5.4.1
 repackage==0.7.3
 requests==2.24.0
 singledispatch==3.4.0.3
diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 80d44a336a5..939a48d949f 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -8,10 +8,10 @@
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
+#include <IO/TimeoutSetter.h>
 #include <DataStreams/NativeBlockInputStream.h>
 #include <DataStreams/NativeBlockOutputStream.h>
 #include <Client/Connection.h>
-#include <Client/TimeoutSetter.h>
 #include <Common/ClickHouseRevision.h>
 #include <Common/Exception.h>
 #include <Common/NetException.h>
diff --git a/src/Client/ya.make b/src/Client/ya.make
index af1dd05f1d4..4201203a8e9 100644
--- a/src/Client/ya.make
+++ b/src/Client/ya.make
@@ -16,7 +16,6 @@ SRCS(
     HedgedConnections.cpp
     HedgedConnectionsFactory.cpp
     MultiplexedConnections.cpp
-    TimeoutSetter.cpp
 
 )
 
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 586c0fbde4d..918bc301754 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -560,7 +560,7 @@ namespace DB
 {
 namespace ErrorCodes
 {
-#define M(VALUE, NAME) extern const Value NAME = VALUE;
+#define M(VALUE, NAME) extern const ErrorCode NAME = VALUE;
     APPLY_FOR_ERROR_CODES(M)
 #undef M
 
@@ -587,7 +587,7 @@ namespace ErrorCodes
 
     ErrorCode end() { return END + 1; }
 
-    void increment(ErrorCode error_code, bool remote, const std::string & message, const std::string & stacktrace)
+    void increment(ErrorCode error_code, bool remote, const std::string & message, const FramePointers & trace)
     {
         if (error_code >= end())
         {
@@ -596,10 +596,10 @@ namespace ErrorCodes
             error_code = end() - 1;
         }
 
-        values[error_code].increment(remote, message, stacktrace);
+        values[error_code].increment(remote, message, trace);
     }
 
-    void ErrorPairHolder::increment(bool remote, const std::string & message, const std::string & stacktrace)
+    void ErrorPairHolder::increment(bool remote, const std::string & message, const FramePointers & trace)
     {
         const auto now = std::chrono::system_clock::now();
 
@@ -609,7 +609,7 @@ namespace ErrorCodes
 
         ++error.count;
         error.message = message;
-        error.stacktrace = stacktrace;
+        error.trace = trace;
         error.error_time_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
     }
     ErrorPair ErrorPairHolder::get()
diff --git a/src/Common/ErrorCodes.h b/src/Common/ErrorCodes.h
index edb9be9e0c0..ffd0b8b8619 100644
--- a/src/Common/ErrorCodes.h
+++ b/src/Common/ErrorCodes.h
@@ -1,11 +1,12 @@
 #pragma once
 
-#include <stddef.h>
+#include <cstddef>
 #include <cstdint>
 #include <utility>
 #include <mutex>
-#include <common/types.h>
 #include <string_view>
+#include <vector>
+#include <common/types.h>
 
 /** Allows to count number of simultaneously happening error codes.
   * See also Exception.cpp for incrementing part.
@@ -19,6 +20,7 @@ namespace ErrorCodes
     /// ErrorCode identifier (index in array).
     using ErrorCode = int;
     using Value = size_t;
+    using FramePointers = std::vector<void *>;
 
     /// Get name of error_code by identifier.
     /// Returns statically allocated string.
@@ -33,7 +35,7 @@ namespace ErrorCodes
         /// Message for the last error.
         std::string message;
         /// Stacktrace for the last error.
-        std::string stacktrace;
+        FramePointers trace;
     };
     struct ErrorPair
     {
@@ -46,7 +48,7 @@ namespace ErrorCodes
     {
     public:
         ErrorPair get();
-        void increment(bool remote, const std::string & message, const std::string & stacktrace);
+        void increment(bool remote, const std::string & message, const FramePointers & trace);
 
     private:
         ErrorPair value;
@@ -60,7 +62,7 @@ namespace ErrorCodes
     ErrorCode end();
 
     /// Add value for specified error_code.
-    void increment(ErrorCode error_code, bool remote, const std::string & message, const std::string & stacktrace);
+    void increment(ErrorCode error_code, bool remote, const std::string & message, const FramePointers & trace);
 }
 
 }
diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp
index 08afd0397f5..e8a98021588 100644
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@@ -36,7 +36,7 @@ namespace ErrorCodes
 
 /// - Aborts the process if error code is LOGICAL_ERROR.
 /// - Increments error codes statistics.
-void handle_error_code([[maybe_unused]] const std::string & msg, const std::string & stacktrace, int code, bool remote)
+void handle_error_code([[maybe_unused]] const std::string & msg, int code, bool remote, const Exception::FramePointers & trace)
 {
     // In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure.
     // Log the message before we fail.
@@ -47,20 +47,21 @@ void handle_error_code([[maybe_unused]] const std::string & msg, const std::stri
         abort();
     }
 #endif
-    ErrorCodes::increment(code, remote, msg, stacktrace);
+
+    ErrorCodes::increment(code, remote, msg, trace);
 }
 
 Exception::Exception(const std::string & msg, int code, bool remote_)
     : Poco::Exception(msg, code)
     , remote(remote_)
 {
-    handle_error_code(msg, getStackTraceString(), code, remote);
+    handle_error_code(msg, code, remote, getStackFramePointers());
 }
 
 Exception::Exception(const std::string & msg, const Exception & nested, int code)
     : Poco::Exception(msg, nested, code)
 {
-    handle_error_code(msg, getStackTraceString(), code, remote);
+    handle_error_code(msg, code, remote, getStackFramePointers());
 }
 
 Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
@@ -101,6 +102,31 @@ std::string Exception::getStackTraceString() const
 #endif
 }
 
+Exception::FramePointers Exception::getStackFramePointers() const
+{
+    FramePointers frame_pointers;
+#ifdef STD_EXCEPTION_HAS_STACK_TRACE
+    {
+        frame_pointers.resize(get_stack_trace_size());
+        for (size_t i = 0; i < frame_pointers.size(); ++i)
+        {
+            frame_pointers[i] = get_stack_trace_frames()[i];
+        }
+    }
+#else
+    {
+        size_t stack_trace_size = trace.getSize();
+        size_t stack_trace_offset = trace.getOffset();
+        frame_pointers.reserve(stack_trace_size - stack_trace_offset);
+        for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
+        {
+            frame_pointers.push_back(trace.getFramePointers()[i]);
+        }
+    }
+#endif
+    return frame_pointers;
+}
+
 
 void throwFromErrno(const std::string & s, int code, int the_errno)
 {
diff --git a/src/Common/Exception.h b/src/Common/Exception.h
index e487badafa5..79b4394948a 100644
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@@ -24,6 +24,8 @@ namespace DB
 class Exception : public Poco::Exception
 {
 public:
+    using FramePointers = std::vector<void *>;
+
     Exception() = default;
     Exception(const std::string & msg, int code, bool remote_ = false);
     Exception(const std::string & msg, const Exception & nested, int code);
@@ -66,6 +68,8 @@ public:
     bool isRemoteException() const { return remote; }
 
     std::string getStackTraceString() const;
+    /// Used for system.errors
+    FramePointers getStackFramePointers() const;
 
 private:
 #ifndef STD_EXCEPTION_HAS_STACK_TRACE
diff --git a/src/Common/HashTable/LRUHashMap.h b/src/Common/HashTable/LRUHashMap.h
index df9766c5ee8..870fb219523 100644
--- a/src/Common/HashTable/LRUHashMap.h
+++ b/src/Common/HashTable/LRUHashMap.h
@@ -271,13 +271,13 @@ private:
 };
 
 template <typename Key, typename Mapped>
-struct DefaultCellDisposer
+struct DefaultLRUHashMapCellDisposer
 {
     void operator()(const Key &, const Mapped &) const {}
 };
 
-template <typename Key, typename Value, typename Disposer = DefaultCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
+template <typename Key, typename Value, typename Disposer = DefaultLRUHashMapCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
 using LRUHashMap = LRUHashMapImpl<Key, Value, Disposer, Hash, false>;
 
-template <typename Key, typename Value, typename Disposer = DefaultCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
+template <typename Key, typename Value, typename Disposer = DefaultLRUHashMapCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
 using LRUHashMapWithSavedHash = LRUHashMapImpl<Key, Value, Disposer, Hash, true>;
diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h
index 163a6503d2e..57ad3d46177 100644
--- a/src/Common/PODArray.h
+++ b/src/Common/PODArray.h
@@ -692,6 +692,30 @@ public:
         assign(from.begin(), from.end());
     }
 
+    void erase(const_iterator first, const_iterator last)
+    {
+        iterator first_no_const = const_cast<iterator>(first);
+        iterator last_no_const = const_cast<iterator>(last);
+
+        size_t items_to_move = end() - last;
+
+        while (items_to_move != 0)
+        {
+            *first_no_const = *last_no_const;
+
+            ++first_no_const;
+            ++last_no_const;
+
+            --items_to_move;
+        }
+
+        this->c_end = reinterpret_cast<char *>(first_no_const);
+    }
+
+    void erase(const_iterator pos)
+    {
+        this->erase(pos, pos + 1);
+    }
 
     bool operator== (const PODArray & rhs) const
     {
diff --git a/src/Common/tests/gtest_pod_array.cpp b/src/Common/tests/gtest_pod_array.cpp
index 53b3e207a22..63cf7026757 100644
--- a/src/Common/tests/gtest_pod_array.cpp
+++ b/src/Common/tests/gtest_pod_array.cpp
@@ -92,3 +92,57 @@ TEST(Common, PODInsertElementSizeNotMultipleOfLeftPadding)
 
     EXPECT_EQ(arr1_initially_empty.size(), items_to_insert_size);
 }
+
+TEST(Common, PODErase)
+{
+    {
+        PaddedPODArray<UInt64> items {0,1,2,3,4,5,6,7,8,9};
+        PaddedPODArray<UInt64> expected;
+        expected = {0,1,2,3,4,5,6,7,8,9};
+
+        items.erase(items.begin(), items.begin());
+        EXPECT_EQ(items, expected);
+
+        items.erase(items.end(), items.end());
+        EXPECT_EQ(items, expected);
+    }
+    {
+        PaddedPODArray<UInt64> actual {0,1,2,3,4,5,6,7,8,9};
+        PaddedPODArray<UInt64> expected;
+
+        expected = {0,1,4,5,6,7,8,9};
+        actual.erase(actual.begin() + 2, actual.begin() + 4);
+        EXPECT_EQ(actual, expected);
+
+        expected = {0,1,4};
+        actual.erase(actual.begin() + 3, actual.end());
+        EXPECT_EQ(actual, expected);
+
+        expected = {};
+        actual.erase(actual.begin(), actual.end());
+        EXPECT_EQ(actual, expected);
+
+        for (size_t i = 0; i < 10; ++i)
+            actual.emplace_back(static_cast<UInt64>(i));
+
+        expected = {0,1,4,5,6,7,8,9};
+        actual.erase(actual.begin() + 2, actual.begin() + 4);
+        EXPECT_EQ(actual, expected);
+
+        expected = {0,1,4};
+        actual.erase(actual.begin() + 3, actual.end());
+        EXPECT_EQ(actual, expected);
+
+        expected = {};
+        actual.erase(actual.begin(), actual.end());
+        EXPECT_EQ(actual, expected);
+    }
+    {
+        PaddedPODArray<UInt64> actual {0,1,2,3,4,5,6,7,8,9};
+        PaddedPODArray<UInt64> expected;
+
+        expected = {1,2,3,4,5,6,7,8,9};
+        actual.erase(actual.begin());
+        EXPECT_EQ(actual, expected);
+    }
+}
diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp
index 4b4d33954a9..0548de07859 100644
--- a/src/Compression/CachedCompressedReadBuffer.cpp
+++ b/src/Compression/CachedCompressedReadBuffer.cpp
@@ -51,7 +51,7 @@ bool CachedCompressedReadBuffer::nextImpl()
         {
             owned_cell->additional_bytes = codec->getAdditionalSizeAtTheEndOfBuffer();
             owned_cell->data.resize(size_decompressed + owned_cell->additional_bytes);
-            decompress(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum);
+            decompressTo(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum);
 
         }
 
diff --git a/src/Compression/CompressedReadBuffer.cpp b/src/Compression/CompressedReadBuffer.cpp
index 6a082164231..78241ec1b69 100644
--- a/src/Compression/CompressedReadBuffer.cpp
+++ b/src/Compression/CompressedReadBuffer.cpp
@@ -21,7 +21,7 @@ bool CompressedReadBuffer::nextImpl()
     memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
     working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
 
-    decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
+    decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
 
     return true;
 }
@@ -48,7 +48,7 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n)
         /// If the decompressed block fits entirely where it needs to be copied.
         if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
         {
-            decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum);
+            decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
             bytes_read += size_decompressed;
             bytes += size_decompressed;
         }
@@ -61,9 +61,9 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n)
 
             memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
             working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
-            pos = working_buffer.begin();
 
-            decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
+            decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
+            pos = working_buffer.begin();
 
             bytes_read += read(to + bytes_read, n - bytes_read);
             break;
diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp
index 8f5b779e4bc..79757d6f151 100644
--- a/src/Compression/CompressedReadBufferBase.cpp
+++ b/src/Compression/CompressedReadBufferBase.cpp
@@ -184,7 +184,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
 }
 
 
-void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum)
+static void readHeaderAndGetCodec(const char * compressed_buffer, size_t size_decompressed, CompressionCodecPtr & codec, bool allow_different_codecs)
 {
     ProfileEvents::increment(ProfileEvents::CompressedReadBufferBlocks);
     ProfileEvents::increment(ProfileEvents::CompressedReadBufferBytes, size_decompressed);
@@ -210,11 +210,38 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s
                             ErrorCodes::CANNOT_DECOMPRESS);
         }
     }
+}
 
+
+void CompressedReadBufferBase::decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum)
+{
+    readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs);
     codec->decompress(compressed_buffer, size_compressed_without_checksum, to);
 }
 
 
+void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum)
+{
+    readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs);
+
+    if (codec->isNone())
+    {
+        /// Shortcut for NONE codec to avoid extra memcpy.
+        /// We doing it by changing the buffer `to` to point to existing uncompressed data.
+
+        UInt8 header_size = ICompressionCodec::getHeaderSize();
+        if (size_compressed_without_checksum < header_size)
+            throw Exception(ErrorCodes::CORRUPTED_DATA,
+                "Can't decompress data: the compressed data size ({}, this should include header size) is less than the header size ({})",
+                    size_compressed_without_checksum, static_cast<size_t>(header_size));
+
+        to = BufferBase::Buffer(compressed_buffer + header_size, compressed_buffer + size_compressed_without_checksum);
+    }
+    else
+        codec->decompress(compressed_buffer, size_compressed_without_checksum, to.begin());
+}
+
+
 /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
 CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_)
     : compressed_in(in), own_compressed_buffer(0), allow_different_codecs(allow_different_codecs_)
diff --git a/src/Compression/CompressedReadBufferBase.h b/src/Compression/CompressedReadBufferBase.h
index 60b8847f639..c1e928039ef 100644
--- a/src/Compression/CompressedReadBufferBase.h
+++ b/src/Compression/CompressedReadBufferBase.h
@@ -3,6 +3,7 @@
 #include <Common/PODArray.h>
 #include <Compression/LZ4_decompress_faster.h>
 #include <Compression/ICompressionCodec.h>
+#include <IO/BufferBase.h>
 
 
 namespace DB
@@ -37,7 +38,12 @@ protected:
     /// Returns number of compressed bytes read.
     size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy);
 
-    void decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum);
+    /// Decompress into memory pointed by `to`
+    void decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum);
+
+    /// This method can change location of `to` to avoid unnecessary copy if data is uncompressed.
+    /// It is more efficient for compression codec NONE but not suitable if you want to decompress into specific location.
+    void decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum);
 
 public:
     /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp
index 54f360f417b..3a75ea14166 100644
--- a/src/Compression/CompressedReadBufferFromFile.cpp
+++ b/src/Compression/CompressedReadBufferFromFile.cpp
@@ -31,7 +31,7 @@ bool CompressedReadBufferFromFile::nextImpl()
     memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
     working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
 
-    decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
+    decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
 
     return true;
 }
@@ -108,7 +108,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
         /// If the decompressed block fits entirely where it needs to be copied.
         if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
         {
-            decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum);
+            decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
             bytes_read += size_decompressed;
             bytes += size_decompressed;
         }
@@ -122,9 +122,9 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
 
             memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
             working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
-            pos = working_buffer.begin();
 
-            decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
+            decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
+            pos = working_buffer.begin();
 
             bytes_read += read(to + bytes_read, n - bytes_read);
             break;
diff --git a/src/Compression/ICompressionCodec.cpp b/src/Compression/ICompressionCodec.cpp
index dec2b633046..46a12e50828 100644
--- a/src/Compression/ICompressionCodec.cpp
+++ b/src/Compression/ICompressionCodec.cpp
@@ -98,7 +98,7 @@ UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, ch
 
     UInt8 header_size = getHeaderSize();
     if (source_size < header_size)
-        throw Exception(ErrorCodes::CORRUPTED_DATA, "Can't decompress data: the compressed data size ({}), this should include header size) is less than the header size ({})", source_size, size_t(header_size));
+        throw Exception(ErrorCodes::CORRUPTED_DATA, "Can't decompress data: the compressed data size ({}, this should include header size) is less than the header size ({})", source_size, static_cast<size_t>(header_size));
 
     uint8_t our_method = getMethodByte();
     uint8_t method = source[0];
diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h
index dcfb13c359e..45eb1348ac6 100644
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@@ -31,6 +31,8 @@ struct Settings;
     M(UInt64, rotate_log_storage_interval, 10000, "How many records will be stored in one log storage file", 0) \
     M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
     M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
+    M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
+    M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
     M(Bool, force_sync, true, " Call fsync on each change in RAFT changelog", 0)
 
 DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index edda26613dd..7e6c10ca125 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -30,6 +30,8 @@ NuKeeperServer::NuKeeperServer(
     , state_manager(nuraft::cs_new<NuKeeperStateManager>(server_id, "test_keeper_server", config, coordination_settings))
     , responses_queue(responses_queue_)
 {
+    if (coordination_settings->quorum_reads)
+        LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Quorum reads enabled, NuKeeper will work slower.");
 }
 
 void NuKeeperServer::startup()
@@ -59,6 +61,7 @@ void NuKeeperServer::startup()
     params.reserved_log_items_ = coordination_settings->reserved_log_items;
     params.snapshot_distance_ = coordination_settings->snapshot_distance;
     params.stale_log_gap_ = coordination_settings->stale_log_gap;
+    params.fresh_log_gap_ = coordination_settings->fresh_log_gap;
     params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds();
     params.auto_forwarding_ = coordination_settings->auto_forwarding;
     params.auto_forwarding_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds() * 2;
@@ -106,7 +109,7 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coord
 void NuKeeperServer::putRequest(const NuKeeperStorage::RequestForSession & request_for_session)
 {
     auto [session_id, request] = request_for_session;
-    if (isLeaderAlive() && request->isReadRequest())
+    if (!coordination_settings->quorum_reads && isLeaderAlive() && request->isReadRequest())
     {
         state_machine->processReadRequest(request_for_session);
     }
@@ -185,6 +188,9 @@ nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type t
     if (next_index < last_commited || next_index - last_commited <= 1)
         commited_store = true;
 
+    if (initialized_flag)
+        return nuraft::cb_func::ReturnCode::Ok;
+
     auto set_initialized = [this] ()
     {
         std::unique_lock lock(initialized_mutex);
@@ -196,10 +202,27 @@ nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type t
     {
         case nuraft::cb_func::BecomeLeader:
         {
-            if (commited_store) /// We become leader and store is empty, ready to serve requests
+            /// We become leader and store is empty or we already committed it
+            if (commited_store || initial_batch_committed)
                 set_initialized();
             return nuraft::cb_func::ReturnCode::Ok;
         }
+        case nuraft::cb_func::BecomeFollower:
+        case nuraft::cb_func::GotAppendEntryReqFromLeader:
+        {
+            if (isLeaderAlive())
+            {
+                auto leader_index = raft_instance->get_leader_committed_log_idx();
+                auto our_index = raft_instance->get_committed_log_idx();
+                /// This may happen when we start RAFT cluster from scratch.
+                /// Node first became leader, and after that some other node became leader.
+                /// BecameFresh for this node will not be called because it was already fresh
+                /// when it was leader.
+                if (leader_index < our_index + coordination_settings->fresh_log_gap)
+                    set_initialized();
+            }
+            return nuraft::cb_func::ReturnCode::Ok;
+        }
         case nuraft::cb_func::BecomeFresh:
         {
             set_initialized(); /// We are fresh follower, ready to serve requests.
@@ -209,6 +232,7 @@ nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type t
         {
             if (isLeader()) /// We have committed our log store and we are leader, ready to serve requests.
                 set_initialized();
+            initial_batch_committed = true;
             return nuraft::cb_func::ReturnCode::Ok;
         }
         default: /// ignore other events
@@ -220,7 +244,7 @@ void NuKeeperServer::waitInit()
 {
     std::unique_lock lock(initialized_mutex);
     int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds();
-    if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag; }))
+    if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); }))
         throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization");
 }
 
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 17099045640..b5c13e62212 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -31,8 +31,9 @@ private:
     ResponsesQueue & responses_queue;
 
     std::mutex initialized_mutex;
-    bool initialized_flag = false;
+    std::atomic<bool> initialized_flag = false;
     std::condition_variable initialized_cv;
+    std::atomic<bool> initial_batch_committed = false;
 
     nuraft::cb_func::ReturnCode callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param);
 
diff --git a/src/Coordination/NuKeeperSnapshotManager.cpp b/src/Coordination/NuKeeperSnapshotManager.cpp
index f5a97619976..1caa1ea94b8 100644
--- a/src/Coordination/NuKeeperSnapshotManager.cpp
+++ b/src/Coordination/NuKeeperSnapshotManager.cpp
@@ -241,9 +241,10 @@ NuKeeperStorageSnapshot::~NuKeeperStorageSnapshot()
     storage->disableSnapshotMode();
 }
 
-NuKeeperSnapshotManager::NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_)
+NuKeeperSnapshotManager::NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_, size_t storage_tick_time_)
     : snapshots_path(snapshots_path_)
     , snapshots_to_keep(snapshots_to_keep_)
+    , storage_tick_time(storage_tick_time_)
 {
     namespace fs = std::filesystem;
 
@@ -325,22 +326,24 @@ nuraft::ptr<nuraft::buffer> NuKeeperSnapshotManager::serializeSnapshotToBuffer(c
     return writer.getBuffer();
 }
 
-SnapshotMetadataPtr NuKeeperSnapshotManager::deserializeSnapshotFromBuffer(NuKeeperStorage * storage, nuraft::ptr<nuraft::buffer> buffer)
+SnapshotMetaAndStorage NuKeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const
 {
     ReadBufferFromNuraftBuffer reader(buffer);
     CompressedReadBuffer compressed_reader(reader);
-    return NuKeeperStorageSnapshot::deserialize(*storage, compressed_reader);
+    auto storage = std::make_unique<NuKeeperStorage>(storage_tick_time);
+    auto snapshot_metadata = NuKeeperStorageSnapshot::deserialize(*storage, compressed_reader);
+    return std::make_pair(snapshot_metadata, std::move(storage));
 }
 
-SnapshotMetadataPtr NuKeeperSnapshotManager::restoreFromLatestSnapshot(NuKeeperStorage * storage)
+SnapshotMetaAndStorage NuKeeperSnapshotManager::restoreFromLatestSnapshot()
 {
     if (existing_snapshots.empty())
-        return nullptr;
+        return {};
 
     auto buffer = deserializeLatestSnapshotBufferFromDisk();
     if (!buffer)
-        return nullptr;
-    return deserializeSnapshotFromBuffer(storage, buffer);
+        return {};
+    return deserializeSnapshotFromBuffer(buffer);
 }
 
 void NuKeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded()
diff --git a/src/Coordination/NuKeeperSnapshotManager.h b/src/Coordination/NuKeeperSnapshotManager.h
index 422baf11a65..d844a52eaf4 100644
--- a/src/Coordination/NuKeeperSnapshotManager.h
+++ b/src/Coordination/NuKeeperSnapshotManager.h
@@ -40,17 +40,20 @@ public:
 using NuKeeperStorageSnapshotPtr = std::shared_ptr<NuKeeperStorageSnapshot>;
 using CreateSnapshotCallback = std::function<void(NuKeeperStorageSnapshotPtr &&)>;
 
+
+using SnapshotMetaAndStorage = std::pair<SnapshotMetadataPtr, NuKeeperStoragePtr>;
+
 class NuKeeperSnapshotManager
 {
 public:
-    NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_);
+    NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_, size_t storage_tick_time_ = 500);
 
-    SnapshotMetadataPtr restoreFromLatestSnapshot(NuKeeperStorage * storage);
+    SnapshotMetaAndStorage restoreFromLatestSnapshot();
 
     static nuraft::ptr<nuraft::buffer> serializeSnapshotToBuffer(const NuKeeperStorageSnapshot & snapshot);
     std::string serializeSnapshotBufferToDisk(nuraft::buffer & buffer, size_t up_to_log_idx);
 
-    static SnapshotMetadataPtr deserializeSnapshotFromBuffer(NuKeeperStorage * storage, nuraft::ptr<nuraft::buffer> buffer);
+    SnapshotMetaAndStorage deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const;
 
     nuraft::ptr<nuraft::buffer> deserializeSnapshotBufferFromDisk(size_t up_to_log_idx) const;
     nuraft::ptr<nuraft::buffer> deserializeLatestSnapshotBufferFromDisk();
@@ -74,6 +77,7 @@ private:
     const std::string snapshots_path;
     const size_t snapshots_to_keep;
     std::map<size_t, std::string> existing_snapshots;
+    size_t storage_tick_time;
 };
 
 struct CreateSnapshotTask
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 58a7ca3d5bc..a7037b8d644 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -4,6 +4,7 @@
 #include <IO/ReadHelpers.h>
 #include <Common/ZooKeeper/ZooKeeperIO.h>
 #include <Coordination/NuKeeperSnapshotManager.h>
+#include <future>
 
 namespace DB
 {
@@ -37,8 +38,7 @@ NuKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
 
 NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_)
     : coordination_settings(coordination_settings_)
-    , storage(coordination_settings->dead_session_check_period_ms.totalMilliseconds())
-    , snapshot_manager(snapshots_path_, coordination_settings->snapshots_to_keep)
+    , snapshot_manager(snapshots_path_, coordination_settings->snapshots_to_keep, coordination_settings->dead_session_check_period_ms.totalMicroseconds())
     , responses_queue(responses_queue_)
     , snapshots_queue(snapshots_queue_)
     , last_committed_idx(0)
@@ -60,7 +60,7 @@ void NuKeeperStateMachine::init()
         try
         {
             latest_snapshot_buf = snapshot_manager.deserializeSnapshotBufferFromDisk(latest_log_index);
-            latest_snapshot_meta = snapshot_manager.deserializeSnapshotFromBuffer(&storage, latest_snapshot_buf);
+            std::tie(latest_snapshot_meta, storage) = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_buf);
             last_committed_idx = latest_snapshot_meta->get_last_log_idx();
             loaded = true;
             break;
@@ -83,6 +83,9 @@ void NuKeeperStateMachine::init()
     {
         LOG_DEBUG(log, "No existing snapshots, last committed log index {}", last_committed_idx);
     }
+
+    if (!storage)
+        storage = std::make_unique<NuKeeperStorage>(coordination_settings->dead_session_check_period_ms.totalMilliseconds());
 }
 
 nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
@@ -96,7 +99,7 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
         nuraft::buffer_serializer bs(response);
         {
             std::lock_guard lock(storage_lock);
-            session_id = storage.getSessionID(session_timeout_ms);
+            session_id = storage->getSessionID(session_timeout_ms);
             bs.put_i64(session_id);
         }
         LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_timeout_ms);
@@ -109,7 +112,7 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
         NuKeeperStorage::ResponsesForSessions responses_for_sessions;
         {
             std::lock_guard lock(storage_lock);
-            responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id, log_idx);
+            responses_for_sessions = storage->processRequest(request_for_session.request, request_for_session.session_id, log_idx);
             for (auto & response_for_session : responses_for_sessions)
                 responses_queue.push(response_for_session);
         }
@@ -133,7 +136,7 @@ bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
 
     {
         std::lock_guard lock(storage_lock);
-        snapshot_manager.deserializeSnapshotFromBuffer(&storage, latest_snapshot_ptr);
+        std::tie(latest_snapshot_meta, storage) = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr);
     }
     last_committed_idx = s.get_last_log_idx();
     return true;
@@ -157,7 +160,7 @@ void NuKeeperStateMachine::create_snapshot(
     CreateSnapshotTask snapshot_task;
     {
         std::lock_guard lock(storage_lock);
-        snapshot_task.snapshot = std::make_shared<NuKeeperStorageSnapshot>(&storage, snapshot_meta_copy);
+        snapshot_task.snapshot = std::make_shared<NuKeeperStorageSnapshot>(storage.get(), snapshot_meta_copy);
     }
 
     snapshot_task.create_snapshot = [this, when_done] (NuKeeperStorageSnapshotPtr && snapshot)
@@ -179,7 +182,7 @@ void NuKeeperStateMachine::create_snapshot(
             {
                 /// Must do it with lock (clearing elements from list)
                 std::lock_guard lock(storage_lock);
-                storage.clearGarbageAfterSnapshot();
+                storage->clearGarbageAfterSnapshot();
                 /// Destroy snapshot with lock
                 snapshot.reset();
                 LOG_TRACE(log, "Cleared garbage after snapshot");
@@ -214,7 +217,7 @@ void NuKeeperStateMachine::save_logical_snp_obj(
     if (obj_id == 0)
     {
         std::lock_guard lock(storage_lock);
-        NuKeeperStorageSnapshot snapshot(&storage, s.get_last_log_idx());
+        NuKeeperStorageSnapshot snapshot(storage.get(), s.get_last_log_idx());
         cloned_buffer = snapshot_manager.serializeSnapshotToBuffer(snapshot);
     }
     else
@@ -225,7 +228,28 @@ void NuKeeperStateMachine::save_logical_snp_obj(
     nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
     cloned_meta = nuraft::snapshot::deserialize(*snp_buf);
 
-    auto result_path = snapshot_manager.serializeSnapshotBufferToDisk(*cloned_buffer, s.get_last_log_idx());
+    /// Sometimes NuRaft can call save and create snapshots from different threads
+    /// at once. To avoid race conditions we serialize snapshots through snapshots_queue
+    /// TODO: make something better
+    CreateSnapshotTask snapshot_task;
+    std::shared_ptr<std::promise<void>> waiter = std::make_shared<std::promise<void>>();
+    auto future = waiter->get_future();
+    snapshot_task.snapshot = nullptr;
+    snapshot_task.create_snapshot = [this, waiter, cloned_buffer, log_idx = s.get_last_log_idx()] (NuKeeperStorageSnapshotPtr &&)
+    {
+        try
+        {
+            auto result_path = snapshot_manager.serializeSnapshotBufferToDisk(*cloned_buffer, log_idx);
+            LOG_DEBUG(log, "Saved snapshot {} to path {}", log_idx, result_path);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log);
+        }
+        waiter->set_value();
+    };
+    snapshots_queue.push(std::move(snapshot_task));
+    future.wait();
 
     {
         std::lock_guard lock(snapshots_lock);
@@ -233,7 +257,6 @@ void NuKeeperStateMachine::save_logical_snp_obj(
         latest_snapshot_meta = cloned_meta;
     }
 
-    LOG_DEBUG(log, "Created snapshot {} with path {}", s.get_last_log_idx(), result_path);
 
     obj_id++;
 }
@@ -271,7 +294,7 @@ void NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForS
     NuKeeperStorage::ResponsesForSessions responses;
     {
         std::lock_guard lock(storage_lock);
-        responses = storage.processRequest(request_for_session.request, request_for_session.session_id, std::nullopt);
+        responses = storage->processRequest(request_for_session.request, request_for_session.session_id, std::nullopt);
     }
     for (const auto & response : responses)
         responses_queue.push(response);
@@ -280,13 +303,13 @@ void NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForS
 std::unordered_set<int64_t> NuKeeperStateMachine::getDeadSessions()
 {
     std::lock_guard lock(storage_lock);
-    return storage.getDeadSessions();
+    return storage->getDeadSessions();
 }
 
 void NuKeeperStateMachine::shutdownStorage()
 {
     std::lock_guard lock(storage_lock);
-    storage.finalize();
+    storage->finalize();
 }
 
 }
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index 905f3448c1a..af9ad6de4d2 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -52,7 +52,7 @@ public:
 
     NuKeeperStorage & getStorage()
     {
-        return storage;
+        return *storage;
     }
 
     void processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session);
@@ -68,7 +68,7 @@ private:
 
     CoordinationSettingsPtr coordination_settings;
 
-    NuKeeperStorage storage;
+    NuKeeperStoragePtr storage;
 
     NuKeeperSnapshotManager snapshot_manager;
 
diff --git a/src/Coordination/NuKeeperStorage.cpp b/src/Coordination/NuKeeperStorage.cpp
index fff44163b71..c1a8ebdfb44 100644
--- a/src/Coordination/NuKeeperStorage.cpp
+++ b/src/Coordination/NuKeeperStorage.cpp
@@ -233,7 +233,7 @@ struct NuKeeperStorageGetRequest final : public NuKeeperStorageRequest
 struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
 {
     using NuKeeperStorageRequest::NuKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t session_id) const override
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t /*session_id*/) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperRemoveResponse & response = dynamic_cast<Coordination::ZooKeeperRemoveResponse &>(*response_ptr);
@@ -257,7 +257,12 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
         {
             auto prev_node = it->value;
             if (prev_node.stat.ephemeralOwner != 0)
-                ephemerals[session_id].erase(request.path);
+            {
+                auto ephemerals_it = ephemerals.find(prev_node.stat.ephemeralOwner);
+                ephemerals_it->second.erase(request.path);
+                if (ephemerals_it->second.empty())
+                    ephemerals.erase(ephemerals_it);
+            }
 
             auto child_basename = getBaseName(it->key);
             container.updateValue(parentPath(request.path), [&child_basename] (NuKeeperStorage::Node & parent)
@@ -271,10 +276,10 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
 
             container.erase(request.path);
 
-            undo = [prev_node, &container, &ephemerals, session_id, path = request.path, child_basename]
+            undo = [prev_node, &container, &ephemerals, path = request.path, child_basename]
             {
                 if (prev_node.stat.ephemeralOwner != 0)
-                    ephemerals[session_id].emplace(path);
+                    ephemerals[prev_node.stat.ephemeralOwner].emplace(path);
 
                 container.insert(path, prev_node);
                 container.updateValue(parentPath(path), [&child_basename] (NuKeeperStorage::Node & parent)
@@ -377,7 +382,6 @@ struct NuKeeperStorageSetRequest final : public NuKeeperStorageRequest
     {
         return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED);
     }
-
 };
 
 struct NuKeeperStorageListRequest final : public NuKeeperStorageRequest
@@ -641,6 +645,13 @@ NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coor
             for (const auto & ephemeral_path : it->second)
             {
                 container.erase(ephemeral_path);
+                container.updateValue(parentPath(ephemeral_path), [&ephemeral_path] (NuKeeperStorage::Node & parent)
+                {
+                    --parent.stat.numChildren;
+                    ++parent.stat.cversion;
+                    parent.children.erase(getBaseName(ephemeral_path));
+                });
+
                 auto responses = processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED);
                 results.insert(results.end(), responses.begin(), responses.end());
             }
diff --git a/src/Coordination/NuKeeperStorage.h b/src/Coordination/NuKeeperStorage.h
index c49df88159f..058eed55cab 100644
--- a/src/Coordination/NuKeeperStorage.h
+++ b/src/Coordination/NuKeeperStorage.h
@@ -131,4 +131,6 @@ public:
     }
 };
 
+using NuKeeperStoragePtr = std::unique_ptr<NuKeeperStorage>;
+
 }
diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index 3aed0d99568..5b35b9c4829 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -132,6 +132,10 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
 
     coordination_settings->loadFromConfig("test_keeper_server.coordination_settings", config);
 
+    request_thread = ThreadFromGlobalPool([this] { requestThread(); });
+    responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
+    snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
+
     server = std::make_unique<NuKeeperServer>(myid, coordination_settings, config, responses_queue, snapshots_queue);
     try
     {
@@ -148,10 +152,8 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
         throw;
     }
 
-    request_thread = ThreadFromGlobalPool([this] { requestThread(); });
-    responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
+
     session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); });
-    snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
 
     LOG_DEBUG(log, "Dispatcher initialized");
 }
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index 01146248f63..cc3dcc04e53 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -897,25 +897,25 @@ TEST(CoordinationTest, TestStorageSnapshotSimple)
     manager.serializeSnapshotBufferToDisk(*buf, 2);
     EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin"));
 
-    DB::NuKeeperStorage restored_storage(500);
 
     auto debuf = manager.deserializeSnapshotBufferFromDisk(2);
-    manager.deserializeSnapshotFromBuffer(&restored_storage, debuf);
 
-    EXPECT_EQ(restored_storage.container.size(), 3);
-    EXPECT_EQ(restored_storage.container.getValue("/").children.size(), 1);
-    EXPECT_EQ(restored_storage.container.getValue("/hello").children.size(), 1);
-    EXPECT_EQ(restored_storage.container.getValue("/hello/somepath").children.size(), 0);
+    auto [snapshot_meta, restored_storage] = manager.deserializeSnapshotFromBuffer(debuf);
 
-    EXPECT_EQ(restored_storage.container.getValue("/").data, "");
-    EXPECT_EQ(restored_storage.container.getValue("/hello").data, "world");
-    EXPECT_EQ(restored_storage.container.getValue("/hello/somepath").data, "somedata");
-    EXPECT_EQ(restored_storage.session_id_counter, 7);
-    EXPECT_EQ(restored_storage.zxid, 2);
-    EXPECT_EQ(restored_storage.ephemerals.size(), 2);
-    EXPECT_EQ(restored_storage.ephemerals[3].size(), 1);
-    EXPECT_EQ(restored_storage.ephemerals[1].size(), 1);
-    EXPECT_EQ(restored_storage.session_and_timeout.size(), 2);
+    EXPECT_EQ(restored_storage->container.size(), 3);
+    EXPECT_EQ(restored_storage->container.getValue("/").children.size(), 1);
+    EXPECT_EQ(restored_storage->container.getValue("/hello").children.size(), 1);
+    EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").children.size(), 0);
+
+    EXPECT_EQ(restored_storage->container.getValue("/").data, "");
+    EXPECT_EQ(restored_storage->container.getValue("/hello").data, "world");
+    EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").data, "somedata");
+    EXPECT_EQ(restored_storage->session_id_counter, 7);
+    EXPECT_EQ(restored_storage->zxid, 2);
+    EXPECT_EQ(restored_storage->ephemerals.size(), 2);
+    EXPECT_EQ(restored_storage->ephemerals[3].size(), 1);
+    EXPECT_EQ(restored_storage->ephemerals[1].size(), 1);
+    EXPECT_EQ(restored_storage->session_and_timeout.size(), 2);
 }
 
 TEST(CoordinationTest, TestStorageSnapshotMoreWrites)
@@ -946,15 +946,14 @@ TEST(CoordinationTest, TestStorageSnapshotMoreWrites)
     manager.serializeSnapshotBufferToDisk(*buf, 50);
     EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin"));
 
-    DB::NuKeeperStorage restored_storage(500);
 
     auto debuf = manager.deserializeSnapshotBufferFromDisk(50);
-    manager.deserializeSnapshotFromBuffer(&restored_storage, debuf);
+    auto [meta, restored_storage] = manager.deserializeSnapshotFromBuffer(debuf);
 
-    EXPECT_EQ(restored_storage.container.size(), 51);
+    EXPECT_EQ(restored_storage->container.size(), 51);
     for (size_t i = 0; i < 50; ++i)
     {
-        EXPECT_EQ(restored_storage.container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
+        EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
     }
 }
 
@@ -987,14 +986,13 @@ TEST(CoordinationTest, TestStorageSnapshotManySnapshots)
     EXPECT_TRUE(fs::exists("./snapshots/snapshot_250.bin"));
 
 
-    DB::NuKeeperStorage restored_storage(500);
-    manager.restoreFromLatestSnapshot(&restored_storage);
+    auto [meta, restored_storage] = manager.restoreFromLatestSnapshot();
 
-    EXPECT_EQ(restored_storage.container.size(), 251);
+    EXPECT_EQ(restored_storage->container.size(), 251);
 
     for (size_t i = 0; i < 250; ++i)
     {
-        EXPECT_EQ(restored_storage.container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
+        EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
     }
 }
 
@@ -1040,12 +1038,11 @@ TEST(CoordinationTest, TestStorageSnapshotMode)
             EXPECT_FALSE(storage.container.contains("/hello_" + std::to_string(i)));
     }
 
-    DB::NuKeeperStorage restored_storage(500);
-    manager.restoreFromLatestSnapshot(&restored_storage);
+    auto [meta, restored_storage] = manager.restoreFromLatestSnapshot();
 
     for (size_t i = 0; i < 50; ++i)
     {
-        EXPECT_EQ(restored_storage.container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
+        EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
     }
 
 }
@@ -1071,8 +1068,7 @@ TEST(CoordinationTest, TestStorageSnapshotBroken)
     plain_buf.truncate(34);
     plain_buf.sync();
 
-    DB::NuKeeperStorage restored_storage(500);
-    EXPECT_THROW(manager.restoreFromLatestSnapshot(&restored_storage), DB::Exception);
+    EXPECT_THROW(manager.restoreFromLatestSnapshot(), DB::Exception);
 }
 
 nuraft::ptr<nuraft::buffer> getBufferFromZKRequest(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
@@ -1236,6 +1232,37 @@ TEST(CoordinationTest, TestStateMachineAndLogStore)
     }
 }
 
+TEST(CoordinationTest, TestEphemeralNodeRemove)
+{
+    using namespace Coordination;
+    using namespace DB;
+
+    ChangelogDirTest snapshots("./snapshots");
+    CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
+
+    ResponsesQueue queue;
+    SnapshotsQueue snapshots_queue{1};
+    auto state_machine = std::make_shared<NuKeeperStateMachine>(queue, snapshots_queue, "./snapshots", settings);
+    state_machine->init();
+
+    std::shared_ptr<ZooKeeperCreateRequest> request_c = std::make_shared<ZooKeeperCreateRequest>();
+    request_c->path = "/hello";
+    request_c->is_ephemeral = true;
+    auto entry_c = getLogEntryFromZKRequest(0, 1, request_c);
+    state_machine->commit(1, entry_c->get_buf());
+    const auto & storage = state_machine->getStorage();
+
+    EXPECT_EQ(storage.ephemerals.size(), 1);
+    std::shared_ptr<ZooKeeperRemoveRequest> request_d = std::make_shared<ZooKeeperRemoveRequest>();
+    request_d->path = "/hello";
+    /// Delete from other session
+    auto entry_d = getLogEntryFromZKRequest(0, 2, request_d);
+    state_machine->commit(2, entry_d->get_buf());
+
+    EXPECT_EQ(storage.ephemerals.size(), 0);
+}
+
+
 int main(int argc, char ** argv)
 {
     Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
diff --git a/src/Core/Field.h b/src/Core/Field.h
index 558e1fafd74..81d06693a7f 100644
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@@ -953,3 +953,26 @@ void writeFieldText(const Field & x, WriteBuffer & buf);
 String toString(const Field & x);
 
 }
+
+template <>
+struct fmt::formatter<DB::Field>
+{
+    constexpr auto parse(format_parse_context & ctx)
+    {
+        auto it = ctx.begin();
+        auto end = ctx.end();
+
+        /// Only support {}.
+        if (it != end && *it != '}')
+            throw format_error("invalid format");
+
+        return it;
+    }
+
+    template <typename FormatContext>
+    auto format(const DB::Field & x, FormatContext & ctx)
+    {
+        return format_to(ctx.out(), "{}", toString(x));
+    }
+};
+
diff --git a/src/Core/NamesAndTypes.cpp b/src/Core/NamesAndTypes.cpp
index e96ce1824d2..7b1779d4346 100644
--- a/src/Core/NamesAndTypes.cpp
+++ b/src/Core/NamesAndTypes.cpp
@@ -6,7 +6,6 @@
 #include <IO/WriteHelpers.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromString.h>
-#include <sparsehash/dense_hash_map>
 
 
 namespace DB
diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index fe777355ca1..eedf4dd3d87 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -101,7 +101,7 @@ template <DictionaryKeyType dictionary_key_type>
 double CacheDictionary<dictionary_key_type>::getLoadFactor() const
 {
     const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-    return static_cast<double>(cache_storage_ptr->getSize()) / cache_storage_ptr->getMaxSize();
+    return cache_storage_ptr->getLoadFactor();
 }
 
 template <DictionaryKeyType dictionary_key_type>
@@ -333,9 +333,7 @@ Columns CacheDictionary<dictionary_key_type>::getColumnsImpl(
     FetchResult result_of_fetch_from_storage;
 
     {
-        /// Write lock on storage
-        const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
-
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
         result_of_fetch_from_storage = cache_storage_ptr->fetchColumnsForKeys(keys, request);
     }
 
diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h
index cf0b74e8bd2..f0028dd8848 100644
--- a/src/Dictionaries/CacheDictionaryStorage.h
+++ b/src/Dictionaries/CacheDictionaryStorage.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <chrono>
+#include <variant>
 
 #include <pcg_random.hpp>
 
@@ -30,28 +31,31 @@ struct CacheDictionaryStorageConfiguration
     const DictionaryLifetime lifetime;
 };
 
-/** Keys are stored in LRUCache and column values are serialized into arena.
-
-    Cell in LRUCache consists of allocated size and place in arena were columns serialized data is stored.
-
-    Columns are serialized by rows.
-
-    When cell is removed from LRUCache data associated with it is also removed from arena.
-
-    In case of complex key we also store key data in arena and it is removed from arena.
-*/
+/** ICacheDictionaryStorage implementation that keeps key in hash table with fixed collision length.
+  * Value in hash table point to index in attributes arrays.
+  */
 template <DictionaryKeyType dictionary_key_type>
 class CacheDictionaryStorage final : public ICacheDictionaryStorage
 {
+
+    static constexpr size_t max_collision_length = 10;
+
 public:
     using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::simple, UInt64, StringRef>;
     static_assert(dictionary_key_type != DictionaryKeyType::range, "Range key type is not supported by CacheDictionaryStorage");
 
-    explicit CacheDictionaryStorage(CacheDictionaryStorageConfiguration & configuration_)
+    explicit CacheDictionaryStorage(
+        const DictionaryStructure & dictionary_structure,
+        CacheDictionaryStorageConfiguration & configuration_)
         : configuration(configuration_)
         , rnd_engine(randomSeed())
-        , cache(configuration.max_size_in_cells, false, { arena })
     {
+        size_t cells_size = roundUpToPowerOfTwoOrZero(std::max(configuration.max_size_in_cells, max_collision_length));
+
+        cells.resize_fill(cells_size);
+        size_overlap_mask = cells_size - 1;
+
+        setup(dictionary_structure);
     }
 
     bool returnsFetchedColumnsInOrderOfRequestedKeys() const override { return true; }
@@ -71,9 +75,7 @@ public:
         const DictionaryStorageFetchRequest & fetch_request) override
     {
         if constexpr (dictionary_key_type == DictionaryKeyType::simple)
-        {
             return fetchColumnsForKeysImpl<SimpleKeysStorageFetchResult>(keys, fetch_request);
-        }
         else
             throw Exception("Method fetchColumnsForKeys is not supported for complex key storage", ErrorCodes::NOT_IMPLEMENTED);
     }
@@ -109,9 +111,7 @@ public:
         const DictionaryStorageFetchRequest & column_fetch_requests) override
     {
         if constexpr (dictionary_key_type == DictionaryKeyType::complex)
-        {
             return fetchColumnsForKeysImpl<ComplexKeysStorageFetchResult>(keys, column_fetch_requests);
-        }
         else
             throw Exception("Method fetchColumnsForKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
     }
@@ -140,79 +140,162 @@ public:
             throw Exception("Method getCachedComplexKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
     }
 
-    size_t getSize() const override { return cache.size(); }
+    size_t getSize() const override { return size; }
 
-    size_t getMaxSize() const override { return cache.getMaxSize(); }
+    double getLoadFactor() const override { return static_cast<double>(size) / configuration.max_size_in_cells; }
 
-    size_t getBytesAllocated() const override { return arena.size() + cache.getSizeInBytes(); }
+    size_t getBytesAllocated() const override
+    {
+        size_t attributes_size_in_bytes = 0;
+        size_t attributes_size = attributes.size();
+
+        for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
+        {
+            getAttributeContainer(attribute_index, [&](const auto & container)
+            {
+                attributes_size_in_bytes += container.capacity() * sizeof(container[0]);
+            });
+        }
+
+        return arena.size() + sizeof(Cell) * configuration.max_size_in_cells + attributes_size_in_bytes;
+    }
 
 private:
 
+    struct FetchedKey
+    {
+        FetchedKey(size_t element_index_, bool is_default_)
+            : element_index(element_index_)
+            , is_default(is_default_)
+        {}
+
+        size_t element_index;
+        bool is_default;
+    };
+
     template <typename KeysStorageFetchResult>
-    ALWAYS_INLINE KeysStorageFetchResult fetchColumnsForKeysImpl(
+    KeysStorageFetchResult fetchColumnsForKeysImpl(
         const PaddedPODArray<KeyType> & keys,
         const DictionaryStorageFetchRequest & fetch_request)
     {
         KeysStorageFetchResult result;
 
         result.fetched_columns = fetch_request.makeAttributesResultColumns();
-        result.key_index_to_state.resize_fill(keys.size(), {KeyState::not_found});
+        result.key_index_to_state.resize_fill(keys.size());
 
-        const auto now = std::chrono::system_clock::now();
+        const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
 
         size_t fetched_columns_index = 0;
+        size_t keys_size = keys.size();
 
         std::chrono::seconds max_lifetime_seconds(configuration.strict_max_lifetime_seconds);
 
-        size_t keys_size = keys.size();
+        PaddedPODArray<FetchedKey> fetched_keys;
+        fetched_keys.resize_fill(keys_size);
 
         for (size_t key_index = 0; key_index < keys_size; ++key_index)
         {
             auto key = keys[key_index];
-            auto * it = cache.find(key);
+            auto [key_state, cell_index] = getKeyStateAndCellIndex(key, now);
 
-            if (it)
+            if (unlikely(key_state == KeyState::not_found))
             {
-                /// Columns values for key are serialized in cache now deserialize them
-                const auto & cell = it->getMapped();
+                result.key_index_to_state[key_index] = {KeyState::not_found};
+                ++result.not_found_keys_size;
+                continue;
+            }
 
-                bool has_deadline = cellHasDeadline(cell);
+            auto & cell = cells[cell_index];
 
-                if (has_deadline && now > cell.deadline + max_lifetime_seconds)
-                {
-                    result.key_index_to_state[key_index] = {KeyState::not_found};
-                    ++result.not_found_keys_size;
-                    continue;
-                }
-                else if (has_deadline && now > cell.deadline)
-                {
-                    result.key_index_to_state[key_index] = {KeyState::expired, fetched_columns_index};
-                    ++result.expired_keys_size;
-                }
-                else
-                {
-                    result.key_index_to_state[key_index] = {KeyState::found, fetched_columns_index};
-                    ++result.found_keys_size;
-                }
+            result.expired_keys_size += static_cast<size_t>(key_state == KeyState::expired);
 
-                ++fetched_columns_index;
+            result.key_index_to_state[key_index] = {key_state, fetched_columns_index};
+            fetched_keys[fetched_columns_index] = FetchedKey(cell.element_index, cell.is_default);
 
-                if (cell.isDefault())
+            ++fetched_columns_index;
+
+            result.key_index_to_state[key_index].setDefaultValue(cell.is_default);
+            result.default_keys_size += cell.is_default;
+        }
+
+        result.found_keys_size = keys_size - (result.expired_keys_size + result.not_found_keys_size);
+
+        for (size_t attribute_index = 0; attribute_index < fetch_request.attributesSize(); ++attribute_index)
+        {
+            if (!fetch_request.shouldFillResultColumnWithIndex(attribute_index))
+                continue;
+
+            auto & attribute = attributes[attribute_index];
+            const auto & default_value_provider = fetch_request.defaultValueProviderAtIndex(attribute_index);
+
+            size_t fetched_keys_size = fetched_keys.size();
+            auto & fetched_column = *result.fetched_columns[attribute_index];
+            fetched_column.reserve(fetched_keys_size);
+
+            if (unlikely(attribute.is_complex_type))
+            {
+                auto & container = std::get<std::vector<Field>>(attribute.attribute_container);
+
+                for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
                 {
-                    result.key_index_to_state[key_index].setDefault();
-                    ++result.default_keys_size;
-                    insertDefaultValuesIntoColumns(result.fetched_columns, fetch_request, key_index);
-                }
-                else
-                {
-                    const char * place_for_serialized_columns = cell.place_for_serialized_columns;
-                    deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, place_for_serialized_columns);
+                    auto fetched_key = fetched_keys[fetched_key_index];
+
+                    if (unlikely(fetched_key.is_default))
+                        fetched_column.insert(default_value_provider.getDefaultValue(fetched_key_index));
+                    else
+                        fetched_column.insert(container[fetched_key.element_index]);
                 }
             }
             else
             {
-                result.key_index_to_state[key_index] = {KeyState::not_found};
-                ++result.not_found_keys_size;
+                auto type_call = [&](const auto & dictionary_attribute_type)
+                {
+                    using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+                    using AttributeType = typename Type::AttributeType;
+                    using ValueType = DictionaryValueType<AttributeType>;
+                    using ColumnType =
+                        std::conditional_t<std::is_same_v<AttributeType, String>, ColumnString,
+                            std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<ValueType>,
+                                ColumnVector<AttributeType>>>;
+
+                    auto & container = std::get<PaddedPODArray<ValueType>>(attribute.attribute_container);
+                    ColumnType & column_typed = static_cast<ColumnType &>(fetched_column);
+
+                    if constexpr (std::is_same_v<ColumnType, ColumnString>)
+                    {
+                        for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
+                        {
+                            auto fetched_key = fetched_keys[fetched_key_index];
+
+                            if (unlikely(fetched_key.is_default))
+                                column_typed.insert(default_value_provider.getDefaultValue(fetched_key_index));
+                            else
+                            {
+                                auto item = container[fetched_key.element_index];
+                                column_typed.insertData(item.data, item.size);
+                            }
+                        }
+                    }
+                    else
+                    {
+                        auto & data = column_typed.getData();
+
+                        for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
+                        {
+                            auto fetched_key = fetched_keys[fetched_key_index];
+
+                            if (unlikely(fetched_key.is_default))
+                                column_typed.insert(default_value_provider.getDefaultValue(fetched_key_index));
+                            else
+                            {
+                                auto item = container[fetched_key.element_index];
+                                data.push_back(item);
+                            }
+                        }
+                    }
+                };
+
+                callOnDictionaryAttributeType(attribute.type, type_call);
             }
         }
 
@@ -221,58 +304,108 @@ private:
 
     void insertColumnsForKeysImpl(const PaddedPODArray<KeyType> & keys, Columns columns)
     {
-        Arena temporary_values_pool;
-
-        size_t columns_to_serialize_size = columns.size();
-        PaddedPODArray<StringRef> temporary_column_data(columns_to_serialize_size);
-
         const auto now = std::chrono::system_clock::now();
 
-        size_t keys_size = keys.size();
+        Field column_value;
 
-        for (size_t key_index = 0; key_index < keys_size; ++key_index)
+        for (size_t key_index = 0; key_index < keys.size(); ++key_index)
         {
-            size_t allocated_size_for_columns = 0;
-            const char * block_start = nullptr;
-
             auto key = keys[key_index];
-            auto * it = cache.find(key);
 
-            for (size_t column_index = 0; column_index < columns_to_serialize_size; ++column_index)
+            size_t cell_index = getCellIndexForInsert(key);
+            auto & cell = cells[cell_index];
+
+            bool cell_was_default = cell.is_default;
+            cell.is_default = false;
+
+            bool was_inserted = cell.deadline == 0;
+
+            if (was_inserted)
             {
-                auto & column = columns[column_index];
-                temporary_column_data[column_index] = column->serializeValueIntoArena(key_index, temporary_values_pool, block_start);
-                allocated_size_for_columns += temporary_column_data[column_index].size;
-            }
+                if constexpr (std::is_same_v<KeyType, StringRef>)
+                    cell.key = copyStringInArena(key);
+                else
+                    cell.key = key;
 
-            char * place_for_serialized_columns = arena.alloc(allocated_size_for_columns);
-            memcpy(reinterpret_cast<void*>(place_for_serialized_columns), reinterpret_cast<const void*>(block_start), allocated_size_for_columns);
+                for (size_t attribute_index = 0; attribute_index < columns.size(); ++attribute_index)
+                {
+                    auto & column = columns[attribute_index];
 
-            if (it)
-            {
-                /// Cell exists need to free previous serialized place and update deadline
-                auto & cell = it->getMapped();
+                    getAttributeContainer(attribute_index, [&](auto & container)
+                    {
+                        container.emplace_back();
+                        cell.element_index = container.size() - 1;
 
-                if (cell.place_for_serialized_columns)
-                    arena.free(cell.place_for_serialized_columns, cell.allocated_size_for_columns);
+                        using ElementType = std::decay_t<decltype(container[0])>;
 
-                setCellDeadline(cell, now);
-                cell.allocated_size_for_columns = allocated_size_for_columns;
-                cell.place_for_serialized_columns = place_for_serialized_columns;
+                        column->get(key_index, column_value);
+
+                        if constexpr (std::is_same_v<ElementType, Field>)
+                            container.back() = column_value;
+                        else if constexpr (std::is_same_v<ElementType, StringRef>)
+                        {
+                            const String & string_value = column_value.get<String>();
+                            StringRef string_value_ref = StringRef {string_value.data(), string_value.size()};
+                            StringRef inserted_value = copyStringInArena(string_value_ref);
+                            container.back() = inserted_value;
+                        }
+                        else
+                            container.back() = column_value.get<NearestFieldType<ElementType>>();
+                    });
+                }
+
+                ++size;
             }
             else
             {
-                /// No cell exists so create and put in cache
-                Cell cell;
+                if (cell.key != key)
+                {
+                    if constexpr (std::is_same_v<KeyType, StringRef>)
+                    {
+                        char * data = const_cast<char *>(cell.key.data);
+                        arena.free(data, cell.key.size);
+                        cell.key = copyStringInArena(key);
+                    }
+                    else
+                        cell.key = key;
+                }
 
-                setCellDeadline(cell, now);
-                cell.allocated_size_for_columns = allocated_size_for_columns;
-                cell.place_for_serialized_columns = place_for_serialized_columns;
+                /// Put values into existing index
+                size_t index_to_use = cell.element_index;
 
-                insertCellInCache(key, cell);
+                for (size_t attribute_index = 0; attribute_index < columns.size(); ++attribute_index)
+                {
+                    auto & column = columns[attribute_index];
+
+                    getAttributeContainer(attribute_index, [&](auto & container)
+                    {
+                        using ElementType = std::decay_t<decltype(container[0])>;
+
+                        column->get(key_index, column_value);
+
+                        if constexpr (std::is_same_v<ElementType, Field>)
+                            container[index_to_use] = column_value;
+                        else if constexpr (std::is_same_v<ElementType, StringRef>)
+                        {
+                            const String & string_value = column_value.get<String>();
+                            StringRef string_ref_value = StringRef {string_value.data(), string_value.size()};
+                            StringRef inserted_value = copyStringInArena(string_ref_value);
+
+                            if (!cell_was_default)
+                            {
+                                StringRef previous_value = container[index_to_use];
+                                arena.free(const_cast<char *>(previous_value.data), previous_value.size);
+                            }
+
+                            container[index_to_use] = inserted_value;
+                        }
+                        else
+                            container[index_to_use] = column_value.get<NearestFieldType<ElementType>>();
+                    });
+                }
             }
 
-            temporary_values_pool.rollback(allocated_size_for_columns);
+            setCellDeadline(cell, now);
         }
     }
 
@@ -280,94 +413,224 @@ private:
     {
         const auto now = std::chrono::system_clock::now();
 
-        for (auto key : keys)
+        size_t keys_size = keys.size();
+
+        for (size_t key_index = 0; key_index < keys_size; ++key_index)
         {
-            auto * it = cache.find(key);
+            auto key = keys[key_index];
 
-            if (it)
+            size_t cell_index = getCellIndexForInsert(key);
+            auto & cell = cells[cell_index];
+
+            bool was_inserted = cell.deadline == 0;
+            bool cell_was_default = cell.is_default;
+
+            cell.is_default = true;
+
+            if (was_inserted)
             {
-                auto & cell = it->getMapped();
+                if constexpr (std::is_same_v<KeyType, StringRef>)
+                    cell.key = copyStringInArena(key);
+                else
+                    cell.key = key;
 
-                setCellDeadline(cell, now);
+                for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
+                {
+                    getAttributeContainer(attribute_index, [&](auto & container)
+                    {
+                        container.emplace_back();
+                        cell.element_index = container.size() - 1;
+                    });
+                }
 
-                if (cell.place_for_serialized_columns)
-                    arena.free(cell.place_for_serialized_columns, cell.allocated_size_for_columns);
-
-                cell.allocated_size_for_columns = 0;
-                cell.place_for_serialized_columns = nullptr;
+                ++size;
             }
             else
             {
-                Cell cell;
+                for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
+                {
+                    getAttributeContainer(attribute_index, [&](const auto & container)
+                    {
+                        using ElementType = std::decay_t<decltype(container[0])>;
 
-                setCellDeadline(cell, now);
-                cell.allocated_size_for_columns = 0;
-                cell.place_for_serialized_columns = nullptr;
+                        if constexpr (std::is_same_v<ElementType, StringRef>)
+                        {
+                            if (!cell_was_default)
+                            {
+                                StringRef previous_value = container[cell.element_index];
+                                arena.free(const_cast<char *>(previous_value.data), previous_value.size);
+                            }
+                        }
+                    });
+                }
 
-                insertCellInCache(key, cell);
+                if (cell.key != key)
+                {
+                    if constexpr (std::is_same_v<KeyType, StringRef>)
+                    {
+                        char * data = const_cast<char *>(cell.key.data);
+                        arena.free(data, cell.key.size);
+                        cell.key = copyStringInArena(key);
+                    }
+                    else
+                        cell.key = key;
+                }
             }
+
+            setCellDeadline(cell, now);
         }
     }
 
     PaddedPODArray<KeyType> getCachedKeysImpl() const
     {
         PaddedPODArray<KeyType> result;
-        result.reserve(cache.size());
+        result.reserve(size);
 
-        for (auto & node : cache)
+        for (auto & cell : cells)
         {
-            auto & cell = node.getMapped();
-
-            if (cell.isDefault())
+            if (cell.deadline == 0)
                 continue;
 
-            result.emplace_back(node.getKey());
+            if (cell.is_default)
+                continue;
+
+            result.emplace_back(cell.key);
         }
 
         return result;
     }
 
+    template <typename GetContainerFunc>
+    void getAttributeContainer(size_t attribute_index, GetContainerFunc && func)
+    {
+        auto & attribute = attributes[attribute_index];
+        auto & attribute_type = attribute.type;
+
+        if (unlikely(attribute.is_complex_type))
+        {
+            auto & container = std::get<std::vector<Field>>(attribute.attribute_container);
+            std::forward<GetContainerFunc>(func)(container);
+        }
+        else
+        {
+            auto type_call = [&](const auto & dictionary_attribute_type)
+            {
+                using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+                using AttributeType = typename Type::AttributeType;
+                using ValueType = DictionaryValueType<AttributeType>;
+
+                auto & container = std::get<PaddedPODArray<ValueType>>(attribute.attribute_container);
+                std::forward<GetContainerFunc>(func)(container);
+            };
+
+            callOnDictionaryAttributeType(attribute_type, type_call);
+        }
+    }
+
+    template <typename GetContainerFunc>
+    void getAttributeContainer(size_t attribute_index, GetContainerFunc && func) const
+    {
+        return const_cast<std::decay_t<decltype(*this)> *>(this)->template getAttributeContainer(attribute_index, std::forward<GetContainerFunc>(func));
+    }
+
+    StringRef copyStringInArena(StringRef value_to_copy)
+    {
+        size_t value_to_copy_size = value_to_copy.size;
+        char * place_for_key = arena.alloc(value_to_copy_size);
+        memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(value_to_copy.data), value_to_copy_size);
+        StringRef updated_value{place_for_key, value_to_copy_size};
+
+        return updated_value;
+    }
+
+    void setup(const DictionaryStructure & dictionary_structure)
+    {
+        /// For each dictionary attribute create storage attribute
+        /// For simple attributes create PODArray, for complex vector of Fields
+
+        attributes.reserve(dictionary_structure.attributes.size());
+
+        for (const auto & dictionary_attribute : dictionary_structure.attributes)
+        {
+            auto attribute_type = dictionary_attribute.underlying_type;
+
+            auto type_call = [&](const auto & dictionary_attribute_type)
+            {
+                using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+                using AttributeType = typename Type::AttributeType;
+                using ValueType = DictionaryValueType<AttributeType>;
+
+                attributes.emplace_back();
+                auto & last_attribute = attributes.back();
+                last_attribute.type = attribute_type;
+                last_attribute.is_complex_type = dictionary_attribute.is_nullable || dictionary_attribute.is_array;
+
+                if (dictionary_attribute.is_nullable)
+                    last_attribute.attribute_container = std::vector<Field>();
+                else
+                    last_attribute.attribute_container = PaddedPODArray<ValueType>();
+            };
+
+            callOnDictionaryAttributeType(attribute_type, type_call);
+        }
+    }
+
     using TimePoint = std::chrono::system_clock::time_point;
 
     struct Cell
     {
-        TimePoint deadline;
-        size_t allocated_size_for_columns;
-        char * place_for_serialized_columns;
-
-        inline bool isDefault() const { return place_for_serialized_columns == nullptr; }
-        inline void setDefault()
-        {
-            place_for_serialized_columns = nullptr;
-            allocated_size_for_columns = 0;
-        }
+        KeyType key;
+        size_t element_index;
+        bool is_default;
+        time_t deadline;
     };
 
-    void insertCellInCache(KeyType & key, const Cell & cell)
+    struct Attribute
     {
-        if constexpr (dictionary_key_type == DictionaryKeyType::complex)
-        {
-            /// Copy complex key into arena and put in cache
-            size_t key_size = key.size;
-            char * place_for_key = arena.alloc(key_size);
-            memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(key.data), key_size);
-            KeyType updated_key{place_for_key, key_size};
-            key = updated_key;
-        }
+        AttributeUnderlyingType type;
+        bool is_complex_type;
 
-        cache.insert(key, cell);
-    }
+        std::variant<
+            PaddedPODArray<UInt8>,
+            PaddedPODArray<UInt16>,
+            PaddedPODArray<UInt32>,
+            PaddedPODArray<UInt64>,
+            PaddedPODArray<UInt128>,
+            PaddedPODArray<Int8>,
+            PaddedPODArray<Int16>,
+            PaddedPODArray<Int32>,
+            PaddedPODArray<Int64>,
+            PaddedPODArray<Decimal32>,
+            PaddedPODArray<Decimal64>,
+            PaddedPODArray<Decimal128>,
+            PaddedPODArray<Float32>,
+            PaddedPODArray<Float64>,
+            PaddedPODArray<StringRef>,
+            std::vector<Field>> attribute_container;
+    };
 
-    inline static bool cellHasDeadline(const Cell & cell)
-    {
-        return cell.deadline != std::chrono::system_clock::from_time_t(0);
-    }
+    CacheDictionaryStorageConfiguration configuration;
+
+    pcg64 rnd_engine;
+
+    size_t size_overlap_mask = 0;
+
+    size_t size = 0;
+
+    PaddedPODArray<Cell> cells;
+
+    ArenaWithFreeLists arena;
+
+    std::vector<Attribute> attributes;
 
     inline void setCellDeadline(Cell & cell, TimePoint now)
     {
         if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
         {
-            cell.deadline = std::chrono::system_clock::from_time_t(0);
+            /// This maybe not obvious, but when we define is this cell is expired or expired permanently, we add strict_max_lifetime_seconds
+            /// to the expiration time. And it overflows pretty well.
+            auto deadline = std::chrono::time_point<std::chrono::system_clock>::max() - 2 * std::chrono::seconds(configuration.strict_max_lifetime_seconds);
+            cell.deadline = std::chrono::system_clock::to_time_t(deadline);
             return;
         }
 
@@ -375,44 +638,75 @@ private:
         size_t max_sec_lifetime = configuration.lifetime.max_sec;
 
         std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
-        cell.deadline = now + std::chrono::seconds(distribution(rnd_engine));
+
+        auto deadline = now + std::chrono::seconds(distribution(rnd_engine));
+        cell.deadline = std::chrono::system_clock::to_time_t(deadline);
     }
 
-    template <typename>
-    friend class ArenaCellDisposer;
-
-    CacheDictionaryStorageConfiguration configuration;
-
-    ArenaWithFreeLists arena;
-
-    pcg64 rnd_engine;
-
-    class ArenaCellDisposer
+    inline size_t getCellIndex(const KeyType key) const
     {
-    public:
-        ArenaWithFreeLists & arena;
+        const size_t hash = DefaultHash<KeyType>()(key);
+        const size_t index = hash & size_overlap_mask;
+        return index;
+    }
 
-        template <typename Key, typename Value>
-        void operator()(const Key & key, const Value & value) const
+    using KeyStateAndCellIndex = std::pair<KeyState::State, size_t>;
+
+    inline KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const
+    {
+        size_t place_value = getCellIndex(key);
+        const size_t place_value_end = place_value + max_collision_length;
+
+        time_t max_lifetime_seconds = static_cast<time_t>(configuration.strict_max_lifetime_seconds);
+
+        for (; place_value < place_value_end; ++place_value)
         {
-            /// In case of complex key we keep it in arena
-            if constexpr (std::is_same_v<Key, StringRef>)
-                arena.free(const_cast<char *>(key.data), key.size);
+            const auto cell_place_value = place_value & size_overlap_mask;
+            const auto & cell = cells[cell_place_value];
 
-            if (value.place_for_serialized_columns)
-                arena.free(value.place_for_serialized_columns, value.allocated_size_for_columns);
+            if (cell.key != key)
+                continue;
+
+            if (unlikely(now > cell.deadline + max_lifetime_seconds))
+                return std::make_pair(KeyState::not_found, cell_place_value);
+
+            if (unlikely(now > cell.deadline))
+                return std::make_pair(KeyState::expired, cell_place_value);
+
+            return std::make_pair(KeyState::found, cell_place_value);
         }
-    };
 
-    using SimpleKeyLRUHashMap = LRUHashMap<UInt64, Cell, ArenaCellDisposer>;
-    using ComplexKeyLRUHashMap = LRUHashMapWithSavedHash<StringRef, Cell, ArenaCellDisposer>;
+        return std::make_pair(KeyState::not_found, place_value & size_overlap_mask);
+    }
 
-    using CacheLRUHashMap = std::conditional_t<
-        dictionary_key_type == DictionaryKeyType::simple,
-        SimpleKeyLRUHashMap,
-        ComplexKeyLRUHashMap>;
+    inline size_t getCellIndexForInsert(const KeyType & key) const
+    {
+        size_t place_value = getCellIndex(key);
+        const size_t place_value_end = place_value + max_collision_length;
+        size_t oldest_place_value = place_value;
 
-    CacheLRUHashMap cache;
+        time_t oldest_time = std::numeric_limits<time_t>::max();
+
+        for (; place_value < place_value_end; ++place_value)
+        {
+            const size_t cell_place_value = place_value & size_overlap_mask;
+            const Cell cell = cells[cell_place_value];
+
+            if (cell.deadline == 0)
+                return cell_place_value;
+
+            if (cell.key == key)
+                return cell_place_value;
+
+            if (cell.deadline < oldest_time)
+            {
+                oldest_time = cell.deadline;
+                oldest_place_value = cell_place_value;
+            }
+        }
+
+        return oldest_place_value;
+    }
 };
 
 }
diff --git a/src/Dictionaries/ICacheDictionaryStorage.h b/src/Dictionaries/ICacheDictionaryStorage.h
index 8db2dab536c..72b3ef76f11 100644
--- a/src/Dictionaries/ICacheDictionaryStorage.h
+++ b/src/Dictionaries/ICacheDictionaryStorage.h
@@ -12,9 +12,9 @@ struct KeyState
 {
     enum State: uint8_t
     {
-        not_found = 2,
-        expired = 4,
-        found = 8,
+        not_found = 0,
+        expired = 1,
+        found = 2,
     };
 
     KeyState(State state_, size_t fetched_column_index_)
@@ -31,9 +31,10 @@ struct KeyState
     inline bool isNotFound() const { return state == State::not_found; }
     inline bool isDefault() const { return is_default; }
     inline void setDefault() { is_default = true; }
+    inline void setDefaultValue(bool is_default_value) { is_default = is_default_value; }
     /// Valid only if keyState is found or expired
     inline size_t getFetchedColumnIndex() const { return fetched_column_index; }
-
+    inline void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; }
 private:
     State state = not_found;
     size_t fetched_column_index = 0;
@@ -111,8 +112,8 @@ public:
     /// Return size of keys in storage
     virtual size_t getSize() const = 0;
 
-    /// Return maximum size of keys in storage
-    virtual size_t getMaxSize() const = 0;
+    /// Returns storage load factor
+    virtual double getLoadFactor() const = 0;
 
     /// Return bytes allocated in storage
     virtual size_t getBytesAllocated() const = 0;
diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h
index 16a8954de58..67f0465a2c7 100644
--- a/src/Dictionaries/SSDCacheDictionaryStorage.h
+++ b/src/Dictionaries/SSDCacheDictionaryStorage.h
@@ -17,7 +17,7 @@
 #include <Common/Arena.h>
 #include <Common/ArenaWithFreeLists.h>
 #include <Common/MemorySanitizer.h>
-#include <Common/HashTable/LRUHashMap.h>
+#include <Common/HashTable/HashMap.h>
 #include <IO/AIO.h>
 #include <Dictionaries/DictionaryStructure.h>
 #include <Dictionaries/ICacheDictionaryStorage.h>
@@ -56,7 +56,6 @@ struct SSDCacheDictionaryStorageConfiguration
 
     const std::string file_path;
     const size_t max_partitions_count;
-    const size_t max_stored_keys;
     const size_t block_size;
     const size_t file_blocks_size;
     const size_t read_buffer_blocks_size;
@@ -127,7 +126,7 @@ public:
 
     /// Reset block with new block_data
     /// block_data must be filled with zeroes if it is new block
-    ALWAYS_INLINE inline void reset(char * new_block_data)
+    inline void reset(char * new_block_data)
     {
         block_data = new_block_data;
         current_block_offset = block_header_size;
@@ -135,13 +134,13 @@ public:
     }
 
     /// Check if it is enough place to write key in block
-    ALWAYS_INLINE inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
+    inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
     {
         return (current_block_offset + (sizeof(cache_key.key) + sizeof(cache_key.size) + cache_key.size)) <= block_size;
     }
 
     /// Check if it is enough place to write key in block
-    ALWAYS_INLINE inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
+    inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
     {
         const StringRef & key = cache_key.key;
         size_t complex_key_size = sizeof(key.size) + key.size;
@@ -152,7 +151,7 @@ public:
     /// Write key and returns offset in ssd cache block where data is written
     /// It is client responsibility to check if there is enough place in block to write key
     /// Returns true if key was written and false if there was not enough place to write key
-    ALWAYS_INLINE inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
+    inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
     {
         assert(cache_key.size > 0);
 
@@ -181,7 +180,7 @@ public:
         return true;
     }
 
-    ALWAYS_INLINE inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
+    inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
     {
         assert(cache_key.size > 0);
 
@@ -216,20 +215,20 @@ public:
         return true;
     }
 
-    ALWAYS_INLINE inline size_t getKeysSize() const { return keys_size; }
+    inline size_t getKeysSize() const { return keys_size; }
 
     /// Write keys size into block header
-    ALWAYS_INLINE inline void writeKeysSize()
+    inline void writeKeysSize()
     {
         char * keys_size_offset_data = block_data + block_header_check_sum_size;
         std::memcpy(keys_size_offset_data, &keys_size, sizeof(size_t));
     }
 
     /// Get check sum from block header
-    ALWAYS_INLINE inline size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
+    inline size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
 
     /// Calculate check sum in block
-    ALWAYS_INLINE inline size_t calculateCheckSum() const
+    inline size_t calculateCheckSum() const
     {
         size_t calculated_check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
 
@@ -237,7 +236,7 @@ public:
     }
 
     /// Check if check sum from block header matched calculated check sum in block
-    ALWAYS_INLINE inline bool checkCheckSum() const
+    inline bool checkCheckSum() const
     {
         size_t calculated_check_sum = calculateCheckSum();
         size_t check_sum = getCheckSum();
@@ -246,16 +245,16 @@ public:
     }
 
     /// Write check sum in block header
-    ALWAYS_INLINE inline void writeCheckSum()
+    inline void writeCheckSum()
     {
         size_t check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
         std::memcpy(block_data, &check_sum, sizeof(size_t));
     }
 
-    ALWAYS_INLINE inline size_t getBlockSize() const { return block_size; }
+    inline size_t getBlockSize() const { return block_size; }
 
     /// Returns block data
-    ALWAYS_INLINE inline char * getBlockData() const { return block_data; }
+    inline char * getBlockData() const { return block_data; }
 
     /// Read keys that were serialized in block
     /// It is client responsibility to ensure that simple or complex keys were written in block
@@ -337,9 +336,7 @@ inline bool operator==(const SSDCacheIndex & lhs, const SSDCacheIndex & rhs)
     return lhs.block_index == rhs.block_index && lhs.offset_in_block == rhs.offset_in_block;
 }
 
-/** SSDCacheMemoryBuffer initialized with block size and memory buffer blocks size.
-  * Allocate block_size * memory_buffer_blocks_size bytes with page alignment.
-  * Logically represents multiple memory_buffer_blocks_size blocks and current write block.
+/** Logically represents multiple memory_buffer_blocks_size SSDCacheBlocks and current write block.
   * If key cannot be written into current_write_block, current block keys size and check summ is written
   * and buffer increase index of current_write_block_index.
   * If current_write_block_index == memory_buffer_blocks_size write key will always returns true.
@@ -444,7 +441,7 @@ private:
     size_t current_block_index = 0;
 };
 
-/// TODO: Add documentation
+/// Logically represents multiple memory_buffer_blocks_size SSDCacheBlocks on file system
 template <typename SSDCacheKeyType>
 class SSDCacheFileBuffer : private boost::noncopyable
 {
@@ -614,11 +611,13 @@ public:
     }
 
     template <typename FetchBlockFunc>
-    ALWAYS_INLINE void fetchBlocks(char * read_buffer, size_t read_from_file_buffer_blocks_size, const PaddedPODArray<size_t> & blocks_to_fetch, FetchBlockFunc && func) const
+    void fetchBlocks(size_t read_from_file_buffer_blocks_size, const PaddedPODArray<size_t> & blocks_to_fetch, FetchBlockFunc && func) const
     {
         if (blocks_to_fetch.empty())
             return;
 
+        Memory<Allocator<true>> read_buffer(read_from_file_buffer_blocks_size * block_size, 4096);
+
         size_t blocks_to_fetch_size = blocks_to_fetch.size();
 
         PaddedPODArray<iocb> requests;
@@ -631,7 +630,7 @@ public:
         {
             iocb request{};
 
-            char * buffer_place = read_buffer + block_size * (block_to_fetch_index % read_from_file_buffer_blocks_size);
+            char * buffer_place = read_buffer.data() + block_size * (block_to_fetch_index % read_from_file_buffer_blocks_size);
 
             #if defined(__FreeBSD__)
             request.aio.aio_lio_opcode = LIO_READ;
@@ -751,7 +750,7 @@ private:
         int fd = -1;
     };
 
-    ALWAYS_INLINE inline static int preallocateDiskSpace(int fd, size_t offset, size_t len)
+    inline static int preallocateDiskSpace(int fd, size_t offset, size_t len)
     {
         #if defined(__FreeBSD__)
             return posix_fallocate(fd, offset, len);
@@ -760,7 +759,7 @@ private:
         #endif
     }
 
-    ALWAYS_INLINE inline static char * getRequestBuffer(const iocb & request)
+    inline static char * getRequestBuffer(const iocb & request)
     {
         char * result = nullptr;
 
@@ -773,7 +772,7 @@ private:
         return result;
     }
 
-    ALWAYS_INLINE inline static ssize_t eventResult(io_event & event)
+    inline static ssize_t eventResult(io_event & event)
     {
         ssize_t  bytes_written;
 
@@ -795,7 +794,13 @@ private:
     size_t current_blocks_size = 0;
 };
 
-/// TODO: Add documentation
+/** ICacheDictionaryStorage implementation that keeps column data serialized in memory index and in disk partitions.
+  * Data is first written in memory buffer.
+  * If memory buffer is full then buffer is flushed to disk partition.
+  * If memory buffer cannot be flushed to associated disk partition, then if partition
+  * can be allocated (current partition index < max_partitions_size) storage allocates new partition, if not old partitions are reused.
+  * Index maps key to partition block and offset.
+  */
 template <DictionaryKeyType dictionary_key_type>
 class SSDCacheDictionaryStorage final : public ICacheDictionaryStorage
 {
@@ -806,9 +811,7 @@ public:
     explicit SSDCacheDictionaryStorage(const SSDCacheDictionaryStorageConfiguration & configuration_)
         : configuration(configuration_)
         , file_buffer(configuration_.file_path, configuration.block_size, configuration.file_blocks_size)
-        , read_from_file_buffer(configuration_.block_size * configuration_.read_buffer_blocks_size, 4096)
         , rnd_engine(randomSeed())
-        , index(configuration.max_stored_keys, false, { complex_key_arena })
     {
         memory_buffer_partitions.emplace_back(configuration.block_size, configuration.write_buffer_blocks_size);
     }
@@ -897,14 +900,31 @@ public:
 
     size_t getSize() const override { return index.size(); }
 
-    size_t getMaxSize() const override {return index.getMaxSize(); }
+    double getLoadFactor() const override
+    {
+        size_t partitions_size = memory_buffer_partitions.size();
+
+        if (partitions_size == configuration.max_partitions_count)
+            return 1.0;
+
+        auto & current_memory_partition = memory_buffer_partitions[current_partition_index];
+
+        size_t full_partitions = partitions_size - 1;
+        size_t blocks_in_memory = (full_partitions * configuration.write_buffer_blocks_size) + current_memory_partition.getCurrentBlockIndex();
+        size_t blocks_on_disk = file_buffer.getCurrentBlockIndex();
+
+        size_t max_blocks_size = (configuration.file_blocks_size + configuration.write_buffer_blocks_size) * configuration.max_partitions_count;
+
+        double load_factor = static_cast<double>(blocks_in_memory + blocks_on_disk) / max_blocks_size;
+        return load_factor;
+    }
 
     size_t getBytesAllocated() const override
     {
         size_t memory_partitions_bytes_size = memory_buffer_partitions.size() * configuration.write_buffer_blocks_size * configuration.block_size;
         size_t file_partitions_bytes_size = memory_buffer_partitions.size() * configuration.file_blocks_size * configuration.block_size;
 
-        return index.getSizeInBytes() + memory_partitions_bytes_size + file_partitions_bytes_size;
+        return index.getBufferSizeInBytes() + memory_partitions_bytes_size + file_partitions_bytes_size;
     }
 
 private:
@@ -920,8 +940,7 @@ private:
             default_value
         };
 
-        TimePoint deadline;
-
+        time_t deadline;
         SSDCacheIndex index;
         size_t in_memory_partition_index;
         CellState state;
@@ -933,13 +952,12 @@ private:
 
     struct KeyToBlockOffset
     {
-        KeyToBlockOffset(size_t key_index_, size_t offset_in_block_, bool is_expired_)
-            : key_index(key_index_), offset_in_block(offset_in_block_), is_expired(is_expired_)
+        KeyToBlockOffset(size_t key_index_, size_t offset_in_block_)
+            : key_index(key_index_), offset_in_block(offset_in_block_)
         {}
 
         size_t key_index = 0;
         size_t offset_in_block = 0;
-        bool is_expired = false;
     };
 
     template <typename Result>
@@ -950,20 +968,24 @@ private:
         Result result;
 
         result.fetched_columns = fetch_request.makeAttributesResultColumns();
-        result.key_index_to_state.resize_fill(keys.size(), {KeyState::not_found});
+        result.key_index_to_state.resize_fill(keys.size());
 
-        const auto now = std::chrono::system_clock::now();
+        const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
 
         size_t fetched_columns_index = 0;
 
-        using BlockIndexToKeysMap = std::unordered_map<size_t, std::vector<KeyToBlockOffset>, DefaultHash<size_t>>;
+        using BlockIndexToKeysMap = absl::flat_hash_map<size_t, PaddedPODArray<KeyToBlockOffset>, DefaultHash<size_t>>;
         BlockIndexToKeysMap block_to_keys_map;
         absl::flat_hash_set<size_t, DefaultHash<size_t>> unique_blocks_to_request;
         PaddedPODArray<size_t> blocks_to_request;
 
-        std::chrono::seconds strict_max_lifetime_seconds(configuration.strict_max_lifetime_seconds);
+        time_t strict_max_lifetime_seconds = static_cast<time_t>(configuration.strict_max_lifetime_seconds);
         size_t keys_size = keys.size();
 
+        for (size_t attribute_size = 0; attribute_size < fetch_request.attributesSize(); ++attribute_size)
+            if (fetch_request.shouldFillResultColumnWithIndex(attribute_size))
+                result.fetched_columns[attribute_size]->reserve(keys_size);
+
         for (size_t key_index = 0; key_index < keys_size; ++key_index)
         {
             auto key = keys[key_index];
@@ -978,9 +1000,7 @@ private:
 
             const auto & cell = it->getMapped();
 
-            bool has_deadline = cellHasDeadline(cell);
-
-            if (has_deadline && now > cell.deadline + strict_max_lifetime_seconds)
+            if (unlikely(now > cell.deadline + strict_max_lifetime_seconds))
             {
                 ++result.not_found_keys_size;
                 continue;
@@ -989,14 +1009,14 @@ private:
             bool cell_is_expired = false;
             KeyState::State key_state = KeyState::found;
 
-            if (has_deadline && now > cell.deadline)
+            if (now > cell.deadline)
             {
                 cell_is_expired = true;
                 key_state = KeyState::expired;
             }
 
-            result.expired_keys_size += cell_is_expired;
-            result.found_keys_size += !cell_is_expired;
+            result.expired_keys_size += static_cast<size_t>(cell_is_expired);
+            result.found_keys_size += static_cast<size_t>(!cell_is_expired);
 
             switch (cell.state)
             {
@@ -1012,13 +1032,20 @@ private:
                 }
                 case Cell::on_disk:
                 {
-                    block_to_keys_map[cell.index.block_index].emplace_back(key_index, cell.index.offset_in_block, cell_is_expired);
+                    PaddedPODArray<KeyToBlockOffset> & keys_block = block_to_keys_map[cell.index.block_index];
+                    keys_block.emplace_back(key_index, cell.index.offset_in_block);
 
-                    if (!unique_blocks_to_request.contains(cell.index.block_index))
-                    {
+                    KeyState::State state = cell_is_expired ? KeyState::expired : KeyState::found;
+
+                    /// Fetched column index will be set later during fetch blocks
+                    result.key_index_to_state[key_index] = {state, 0};
+
+                    auto insert_result = unique_blocks_to_request.insert(cell.index.block_index);
+                    bool was_inserted = insert_result.second;
+
+                    if (was_inserted)
                         blocks_to_request.emplace_back(cell.index.block_index);
-                        unique_blocks_to_request.insert(cell.index.block_index);
-                    }
+
                     break;
                 }
                 case Cell::default_value:
@@ -1037,7 +1064,7 @@ private:
         /// Sort blocks by offset before start async io requests
         std::sort(blocks_to_request.begin(), blocks_to_request.end());
 
-        file_buffer.fetchBlocks(read_from_file_buffer.m_data, configuration.read_buffer_blocks_size, blocks_to_request, [&](size_t block_index, char * block_data)
+        file_buffer.fetchBlocks(configuration.read_buffer_blocks_size, blocks_to_request, [&](size_t block_index, char * block_data)
         {
             auto & keys_in_block = block_to_keys_map[block_index];
 
@@ -1046,10 +1073,7 @@ private:
                 char * key_data = block_data + key_in_block.offset_in_block;
                 deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, key_data);
 
-                if (key_in_block.is_expired)
-                    result.key_index_to_state[key_in_block.key_index] = {KeyState::expired, fetched_columns_index};
-                else
-                    result.key_index_to_state[key_in_block.key_index] = {KeyState::found, fetched_columns_index};
+                result.key_index_to_state[key_in_block.key_index].setFetchedColumnIndex(fetched_columns_index);
 
                 ++fetched_columns_index;
             }
@@ -1087,7 +1111,7 @@ private:
                 throw Exception("Serialized columns size is greater than allowed block size and metadata", ErrorCodes::UNSUPPORTED_METHOD);
 
             /// We cannot reuse place that is already allocated in file or memory cache so we erase key from index
-            index.erase(key);
+            eraseKeyFromIndex(key);
 
             Cell cell;
             setCellDeadline(cell, now);
@@ -1114,8 +1138,7 @@ private:
 
         for (auto key : keys)
         {
-            /// We cannot reuse place that is already allocated in file or memory cache so we erase key from index
-            index.erase(key);
+            eraseKeyFromIndex(key);
 
             Cell cell;
 
@@ -1135,7 +1158,7 @@ private:
                 key = updated_key;
             }
 
-            index.insert(key, cell);
+            index[key] = cell;
         }
     }
 
@@ -1188,7 +1211,7 @@ private:
                 cell.index = cache_index;
                 cell.in_memory_partition_index = current_partition_index;
 
-                index.insert(ssd_cache_key.key, cell);
+                index[ssd_cache_key.key] = cell;
                 break;
             }
             else
@@ -1218,7 +1241,7 @@ private:
                             if (old_key_cell.isOnDisk() &&
                                 old_key_block >= block_index_in_file_before_write &&
                                 old_key_block < file_read_end_block_index)
-                                index.erase(old_key);
+                                eraseKeyFromIndex(old_key);
                         }
                     }
                 }
@@ -1271,7 +1294,7 @@ private:
                     cell.index = cache_index;
                     cell.in_memory_partition_index = current_partition_index;
 
-                    index.insert(ssd_cache_key.key, cell);
+                    index[ssd_cache_key.key] = cell;
                     break;
                 }
                 else
@@ -1296,16 +1319,12 @@ private:
         }
     }
 
-    inline static bool cellHasDeadline(const Cell & cell)
-    {
-        return cell.deadline != std::chrono::system_clock::from_time_t(0);
-    }
-
     inline void setCellDeadline(Cell & cell, TimePoint now)
     {
         if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
         {
-            cell.deadline = std::chrono::system_clock::from_time_t(0);
+            auto deadline = std::chrono::time_point<std::chrono::system_clock>::max() - 2 * std::chrono::seconds(configuration.strict_max_lifetime_seconds);
+            cell.deadline = std::chrono::system_clock::to_time_t(deadline);
             return;
         }
 
@@ -1313,47 +1332,45 @@ private:
         size_t max_sec_lifetime = configuration.lifetime.max_sec;
 
         std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
-        cell.deadline = now + std::chrono::seconds{distribution(rnd_engine)};
+        auto deadline = now + std::chrono::seconds(distribution(rnd_engine));
+        cell.deadline = std::chrono::system_clock::to_time_t(deadline);
     }
 
-    template <typename>
-    friend class ArenaCellKeyDisposer;
+    inline void eraseKeyFromIndex(KeyType key)
+    {
+        auto it = index.find(key);
+
+        if (it == nullptr)
+            return;
+
+        /// In case of complex key in arena key is serialized from hash table
+        KeyType key_copy = it->getKey();
+
+        index.erase(key);
+
+        if constexpr (std::is_same_v<KeyType, StringRef>)
+            complex_key_arena.free(const_cast<char *>(key_copy.data), key_copy.size);
+    }
 
     SSDCacheDictionaryStorageConfiguration configuration;
 
     SSDCacheFileBuffer<SSDCacheKeyType> file_buffer;
 
-    Memory<Allocator<true>> read_from_file_buffer;
-
     std::vector<SSDCacheMemoryBuffer<SSDCacheKeyType>> memory_buffer_partitions;
 
     pcg64 rnd_engine;
 
-    class ArenaCellKeyDisposer
-    {
-    public:
-        ArenaWithFreeLists & arena;
+    using SimpleKeyHashMap = HashMap<UInt64, Cell>;
+    using ComplexKeyHashMap = HashMapWithSavedHash<StringRef, Cell>;
 
-        template <typename Key, typename Value>
-        void operator()(const Key & key, const Value &) const
-        {
-            /// In case of complex key we keep it in arena
-            if constexpr (std::is_same_v<Key, StringRef>)
-                arena.free(const_cast<char *>(key.data), key.size);
-        }
-    };
-
-    using SimpleKeyLRUHashMap = LRUHashMap<UInt64, Cell, ArenaCellKeyDisposer>;
-    using ComplexKeyLRUHashMap = LRUHashMapWithSavedHash<StringRef, Cell, ArenaCellKeyDisposer>;
-
-    using CacheLRUHashMap = std::conditional_t<
+    using CacheMap = std::conditional_t<
         dictionary_key_type == DictionaryKeyType::simple,
-        SimpleKeyLRUHashMap,
-        ComplexKeyLRUHashMap>;
+        SimpleKeyHashMap,
+        ComplexKeyHashMap>;
 
     ArenaWithFreeLists complex_key_arena;
 
-    CacheLRUHashMap index;
+    CacheMap index;
 
     size_t current_partition_index = 0;
 
diff --git a/src/Dictionaries/benchmark b/src/Dictionaries/benchmark
deleted file mode 100644
index 37d0d92ac14..00000000000
--- a/src/Dictionaries/benchmark
+++ /dev/null
@@ -1,154 +0,0 @@
-clickhouse-client --query="DROP TABLE IF EXISTS simple_cache_dictionary_table_source";
-clickhouse-client --query="CREATE TABLE simple_cache_dictionary_table_source (id UInt64, value1 String, value2 UInt64, value3 String, value4 Float64, value5 Decimal64(4)) ENGINE=TinyLog;"
-clickhouse-client --query="INSERT INTO simple_cache_dictionary_table_source SELECT number, concat('Value1 ', toString(number)), number, concat('Value3 ', toString(number)), toFloat64(number), cast(number, 'Decimal64(4)') FROM system.numbers LIMIT 1000000;"
-
-clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_simple_cache_dictionary (
-    id UInt64,
-    value1 String,
-    value2 UInt64,
-    value3 String,
-    value4 Float64,
-    value5 Decimal64(4)
-)
-PRIMARY KEY id
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
-LIFETIME(MIN 300 MAX 300)
-LAYOUT(CACHE(SIZE_IN_CELLS 100000));"
-
-clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_ssd_simple_cache_dictionary (
-    id UInt64,
-    value1 String,
-    value2 UInt64,
-    value3 String,
-    value4 Float64,
-    value5 Decimal64(4)
-)
-PRIMARY KEY id
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
-LIFETIME(MIN 300 MAX 300)
-LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576 WRITE_BUFFER_SIZE 327680 MAX_STORED_KEYS 1048576 PATH '/opt/mkita/ClickHouse/build_release/programs/ssd_cache'));"
-
-clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_dummy_simple_cache_dictionary (
-    id UInt64,
-    value1 String,
-    value2 UInt64,
-    value3 String,
-    value4 Float64,
-    value5 Decimal64(4)
-)
-PRIMARY KEY id
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
-LIFETIME(MIN 300 MAX 300)
-LAYOUT(DUMMY_SIMPLE());"
-
-./clickhouse-benchmark --query="SELECT
-    dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value1', number),
-    dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value2', number),
-    dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value3', number),
-    dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value4', number),
-    dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value5', number)
-FROM system.numbers
-LIMIT 10000
-FORMAT Null"
-
-./clickhouse-benchmark --query="SELECT
-    dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number)
-FROM system.numbers
-LIMIT 10000
-FORMAT Null"
-
-./clickhouse-benchmark --query="SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number) FROM system.numbers_mt LIMIT 10000 FORMAT Null"
-
-./clickhouse-benchmark --query="SELECT
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number),
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value3', number),
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value4', number),
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value5', number)
-FROM system.numbers
-LIMIT 10000
-FORMAT Null"
-
-./clickhouse-benchmark --query="SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number) FROM system.numbers_mt LIMIT 10000 FORMAT Null"
-
-SELECT
-    dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number),
-    dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value2', number),
-    dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value3', number),
-    dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value4', number),
-    dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value5', number)
-FROM system.numbers
-    LIMIT 10000
-FORMAT Null
-
-SELECT dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number) FROM system.numbers LIMIT 10000 FORMAT Null
-
-SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number) FROM system.numbers LIMIT 10000
-FORMAT Null
-
-SELECT
-    dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number)
-FROM system.numbers
-    LIMIT 10000
-FORMAT
-    Null
-
-SELECT
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number),
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value3', number),
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value4', number),
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value5', number)
-FROM system.numbers
-    LIMIT 10000
-FORMAT
-    Null
-
-SELECT
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
-    dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number)
-FROM system.numbers
-LIMIT 10000
-FORMAT Null
-
-SELECT
-    dictGet('clickhouse_simple_cache_dictionary', 'value1', number)
-FROM system.numbers
-LIMIT 100000
-FORMAT Null
-
-SELECT
-    dictGet('clickhouse_simple_cache_dictionary', 'value2', number)
-FROM system.numbers
-LIMIT 100000
-FORMAT Null
-
-SELECT
-    dictGet('clickhouse_simple_cache_dictionary', 'value3', number)
-FROM system.numbers
-LIMIT 100000
-FORMAT Null
-
-SELECT
-    dictGet('clickhouse_simple_cache_dictionary', 'value4', number)
-FROM system.numbers
-LIMIT 100000
-FORMAT Null
-
-SELECT
-    dictGet('clickhouse_simple_cache_dictionary', 'value5', number)
-FROM system.numbers
-LIMIT 100000
-FORMAT Null
-
-SELECT
-    dictGet('clickhouse_simple_cache_dictionary', 'value1', number),
-    dictGet('clickhouse_simple_cache_dictionary', 'value2', number),
-    dictGet('clickhouse_simple_cache_dictionary', 'value3', number),
-    dictGet('clickhouse_simple_cache_dictionary', 'value4', number),
-    dictGet('clickhouse_simple_cache_dictionary', 'value5', number)
-FROM system.numbers
-LIMIT 100000
-FORMAT Null
-
-SELECT * FROM clickhouse_simple_cache_dictionary_table;
\ No newline at end of file
diff --git a/src/Dictionaries/registerCacheDictionaries.cpp b/src/Dictionaries/registerCacheDictionaries.cpp
index 92e6eb97b63..b93a08acb76 100644
--- a/src/Dictionaries/registerCacheDictionaries.cpp
+++ b/src/Dictionaries/registerCacheDictionaries.cpp
@@ -1,6 +1,6 @@
 #include "CacheDictionary.h"
-#include "SSDCacheDictionaryStorage.h"
 #include "CacheDictionaryStorage.h"
+#include "SSDCacheDictionaryStorage.h"
 #include <Dictionaries/DictionaryFactory.h>
 
 namespace DB
@@ -20,13 +20,13 @@ CacheDictionaryStorageConfiguration parseCacheStorageConfiguration(
     const DictionaryLifetime & dict_lifetime,
     DictionaryKeyType dictionary_key_type)
 {
-    String dictionary_type_prefix = dictionary_key_type == DictionaryKeyType::complex ? ".complex_key_cache." : ".cache.";
+    String dictionary_type_prefix = (dictionary_key_type == DictionaryKeyType::complex) ? ".complex_key_cache." : ".cache.";
     String dictionary_configuration_prefix = layout_prefix + dictionary_type_prefix;
 
     const size_t size = config.getUInt64(dictionary_configuration_prefix + "size_in_cells");
     if (size == 0)
         throw Exception(ErrorCodes::TOO_SMALL_BUFFER_SIZE,
-            "({}: cache dictionary cannot have 0 cells",
+            "({}): cache dictionary cannot have 0 cells",
             full_name);
 
     size_t dict_lifetime_seconds = static_cast<size_t>(dict_lifetime.max_sec);
@@ -59,7 +59,6 @@ SSDCacheDictionaryStorageConfiguration parseSSDCacheStorageConfiguration(
     static constexpr size_t DEFAULT_READ_BUFFER_SIZE_BYTES = 16 * DEFAULT_SSD_BLOCK_SIZE_BYTES;
     static constexpr size_t DEFAULT_WRITE_BUFFER_SIZE_BYTES = DEFAULT_SSD_BLOCK_SIZE_BYTES;
 
-    static constexpr size_t DEFAULT_MAX_STORED_KEYS = 100000;
     static constexpr size_t DEFAULT_PARTITIONS_COUNT = 16;
 
     const size_t max_partitions_count
@@ -94,16 +93,11 @@ SSDCacheDictionaryStorageConfiguration parseSSDCacheStorageConfiguration(
     if (directory_path.at(0) != '/')
         directory_path = std::filesystem::path{config.getString("path")}.concat(directory_path).string();
 
-    const size_t max_stored_keys_in_partition
-        = config.getInt64(dictionary_configuration_prefix + "max_stored_keys", DEFAULT_MAX_STORED_KEYS);
-    const size_t rounded_size = roundUpToPowerOfTwoOrZero(max_stored_keys_in_partition);
-
     SSDCacheDictionaryStorageConfiguration configuration{
         strict_max_lifetime_seconds,
         dict_lifetime,
         directory_path,
         max_partitions_count,
-        rounded_size,
         block_size,
         file_size / block_size,
         read_buffer_size / block_size,
@@ -194,7 +188,8 @@ DictionaryPtr createCacheDictionaryLayout(
     const bool allow_read_expired_keys = config.getBool(layout_prefix + ".cache.allow_read_expired_keys", false);
 
     auto storage_configuration = parseCacheStorageConfiguration(full_name, config, layout_prefix, dict_lifetime, dictionary_key_type);
-    auto storage = std::make_shared<CacheDictionaryStorage<dictionary_key_type>>(storage_configuration);
+
+    std::shared_ptr<ICacheDictionaryStorage> storage = std::make_shared<CacheDictionaryStorage<dictionary_key_type>>(dict_struct, storage_configuration);
 
     auto update_queue_configuration = parseCacheDictionaryUpdateQueueConfiguration(full_name, config, layout_prefix, dictionary_key_type);
 
diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h
index 132e94907f5..8af4a27ecc9 100644
--- a/src/Functions/FunctionsAES.h
+++ b/src/Functions/FunctionsAES.h
@@ -538,8 +538,9 @@ private:
 
         [[maybe_unused]] const auto block_size = static_cast<size_t>(EVP_CIPHER_block_size(evp_cipher));
         [[maybe_unused]] const auto iv_size = static_cast<size_t>(EVP_CIPHER_iv_length(evp_cipher));
-        const auto key_size = static_cast<size_t>(EVP_CIPHER_key_length(evp_cipher));
-        const auto tag_size = 16; // https://tools.ietf.org/html/rfc5116#section-5.1
+
+        const size_t key_size = static_cast<size_t>(EVP_CIPHER_key_length(evp_cipher));
+        static constexpr size_t tag_size = 16; // https://tools.ietf.org/html/rfc5116#section-5.1
 
         auto decrypted_result_column = ColumnString::create();
         auto & decrypted_result_column_data = decrypted_result_column->getChars();
@@ -549,9 +550,17 @@ private:
             size_t resulting_size = 0;
             for (size_t r = 0; r < input_rows_count; ++r)
             {
-                resulting_size += input_column->getDataAt(r).size + 1;
+                size_t string_size = input_column->getDataAt(r).size;
+                resulting_size += string_size + 1;  /// With terminating zero.
+
                 if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM)
+                {
+                    if (string_size < tag_size)
+                        throw Exception("Encrypted data is smaller than the size of additional data for AEAD mode, cannot decrypt.",
+                            ErrorCodes::BAD_ARGUMENTS);
+
                     resulting_size -= tag_size;
+                }
             }
 
 #if defined(MEMORY_SANITIZER)
@@ -565,6 +574,7 @@ private:
             decrypted_result_column_data.resize(resulting_size);
 #endif
         }
+
         auto * decrypted = decrypted_result_column_data.data();
 
         KeyHolder<mode> key_holder;
@@ -631,7 +641,7 @@ private:
                     // 1.a.2: Set AAD if present
                     if (aad_column)
                     {
-                        const auto aad_data = aad_column->getDataAt(r);
+                        StringRef aad_data = aad_column->getDataAt(r);
                         int tmp_len = 0;
                         if (aad_data.size != 0 && EVP_DecryptUpdate(evp_ctx, nullptr, &tmp_len,
                                 reinterpret_cast<const unsigned char *>(aad_data.data), aad_data.size) != 1)
diff --git a/src/Functions/SimdJSONParser.h b/src/Functions/SimdJSONParser.h
index a9adfa27e2c..7ff3c45130d 100644
--- a/src/Functions/SimdJSONParser.h
+++ b/src/Functions/SimdJSONParser.h
@@ -42,11 +42,11 @@ struct SimdJSONParser
         ALWAYS_INLINE bool isBool() const { return element.type() == simdjson::dom::element_type::BOOL; }
         ALWAYS_INLINE bool isNull() const { return element.type() == simdjson::dom::element_type::NULL_VALUE; }
 
-        ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().first; }
-        ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().first; }
-        ALWAYS_INLINE double getDouble() const { return element.get_double().first; }
-        ALWAYS_INLINE bool getBool() const { return element.get_bool().first; }
-        ALWAYS_INLINE std::string_view getString() const { return element.get_string().first; }
+        ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); }
+        ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); }
+        ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); }
+        ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); }
+        ALWAYS_INLINE std::string_view getString() const { return element.get_string().value_unsafe(); }
         ALWAYS_INLINE Array getArray() const;
         ALWAYS_INLINE Object getObject() const;
 
@@ -75,7 +75,7 @@ struct SimdJSONParser
         ALWAYS_INLINE Iterator begin() const { return array.begin(); }
         ALWAYS_INLINE Iterator end() const { return array.end(); }
         ALWAYS_INLINE size_t size() const { return array.size(); }
-        ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).first; }
+        ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).value_unsafe(); }
 
     private:
         simdjson::dom::array array;
@@ -111,7 +111,7 @@ struct SimdJSONParser
             if (x.error())
                 return false;
 
-            result = x.first;
+            result = x.value_unsafe();
             return true;
         }
 
@@ -137,7 +137,7 @@ struct SimdJSONParser
         if (document.error())
             return false;
 
-        result = document.first;
+        result = document.value_unsafe();
         return true;
     }
 
@@ -155,12 +155,12 @@ private:
 
 inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const
 {
-    return element.get_array().first;
+    return element.get_array().value_unsafe();
 }
 
 inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const
 {
-    return element.get_object().first;
+    return element.get_object().value_unsafe();
 }
 
 }
diff --git a/src/Functions/URL/ExtractFirstSignificantSubdomain.h b/src/Functions/URL/ExtractFirstSignificantSubdomain.h
index c13b5f50156..974574058e9 100644
--- a/src/Functions/URL/ExtractFirstSignificantSubdomain.h
+++ b/src/Functions/URL/ExtractFirstSignificantSubdomain.h
@@ -90,7 +90,70 @@ struct ExtractFirstSignificantSubdomain
             res_data += last_3_periods[1] + 1 - begin;
             res_size = last_3_periods[0] - last_3_periods[1] - 1;
         }
-   }
+    }
+
+    /// The difference with execute() is due to custom TLD list can have records of any level,
+    /// not only 2-nd level (like non-custom variant), so it requires more lookups.
+    template <class Lookup>
+    static void executeCustom(const Lookup & lookup, const Pos data, const size_t size, Pos & res_data, size_t & res_size, Pos * out_domain_end = nullptr)
+    {
+        res_data = data;
+        res_size = 0;
+
+        Pos tmp;
+        size_t domain_length;
+        ExtractDomain<without_www>::execute(data, size, tmp, domain_length);
+
+        if (domain_length == 0)
+            return;
+
+        if (out_domain_end)
+            *out_domain_end = tmp + domain_length;
+
+        /// cut useless dot
+        if (tmp[domain_length - 1] == '.')
+            --domain_length;
+
+        res_data = tmp;
+        res_size = domain_length;
+
+        auto begin = tmp;
+        auto end = begin + domain_length;
+        const char * last_2_periods[2]{};
+        const char * prev = begin - 1;
+
+        auto pos = find_first_symbols<'.'>(begin, end);
+        while (pos < end)
+        {
+            if (lookup(pos + 1, end - pos - 1))
+            {
+                res_data += prev + 1 - begin;
+                res_size = end - 1 - prev;
+                return;
+            }
+
+            last_2_periods[1] = last_2_periods[0];
+            last_2_periods[0] = pos;
+            prev = pos;
+            pos = find_first_symbols<'.'>(pos + 1, end);
+        }
+
+        /// if there is domain of the first level (i.e. no dots in the hostname) -> return nothing
+        if (!last_2_periods[0])
+            return;
+
+        /// if there is domain of the second level -> always return itself
+        if (!last_2_periods[1])
+        {
+            res_size = last_2_periods[0] - begin;
+            return;
+        }
+
+        /// if there is domain of the 3+ level, and zero records in TLD list ->
+        /// fallback to domain of the second level
+        res_data += last_2_periods[1] + 1 - begin;
+        res_size = last_2_periods[0] - last_2_periods[1] - 1;
+    }
 };
 
 }
diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h
index 244b32459c1..d6868834f75 100644
--- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h
+++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h
@@ -17,10 +17,10 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
-struct FirstSignificantSubdomainCustomtLookup
+struct FirstSignificantSubdomainCustomLookup
 {
     const TLDList & tld_list;
-    FirstSignificantSubdomainCustomtLookup(const std::string & tld_list_name)
+    FirstSignificantSubdomainCustomLookup(const std::string & tld_list_name)
         : tld_list(TLDListsHolder::getInstance().getTldList(tld_list_name))
     {
     }
@@ -63,7 +63,7 @@ public:
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
     {
         const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
-        FirstSignificantSubdomainCustomtLookup tld_lookup(column_tld_list_name->getValue<String>());
+        FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue<String>());
 
         /// FIXME: convertToFullColumnIfConst() is suboptimal
         auto column = arguments[0].column->convertToFullColumnIfConst();
@@ -79,7 +79,7 @@ public:
                 ErrorCodes::ILLEGAL_COLUMN);
     }
 
-    static void vector(FirstSignificantSubdomainCustomtLookup & tld_lookup,
+    static void vector(FirstSignificantSubdomainCustomLookup & tld_lookup,
         const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
         ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
     {
diff --git a/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp b/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp
index 11fd27e317b..7532ddd00f2 100644
--- a/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp
+++ b/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp
@@ -10,7 +10,7 @@ struct CutToFirstSignificantSubdomainCustom
 {
     static size_t getReserveLengthForElement() { return 15; }
 
-    static void execute(FirstSignificantSubdomainCustomtLookup & tld_lookup, const Pos data, const size_t size, Pos & res_data, size_t & res_size)
+    static void execute(FirstSignificantSubdomainCustomLookup & tld_lookup, const Pos data, const size_t size, Pos & res_data, size_t & res_size)
     {
         res_data = data;
         res_size = 0;
@@ -18,7 +18,7 @@ struct CutToFirstSignificantSubdomainCustom
         Pos tmp_data;
         size_t tmp_length;
         Pos domain_end;
-        ExtractFirstSignificantSubdomain<without_www>::execute(tld_lookup, data, size, tmp_data, tmp_length, &domain_end);
+        ExtractFirstSignificantSubdomain<without_www>::executeCustom(tld_lookup, data, size, tmp_data, tmp_length, &domain_end);
 
         if (tmp_length == 0)
             return;
diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp
index 2050e0c28ab..c025117af69 100644
--- a/src/Functions/array/mapPopulateSeries.cpp
+++ b/src/Functions/array/mapPopulateSeries.cpp
@@ -190,7 +190,7 @@ private:
             }
 
             static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30;
-            if (static_cast<size_t>(max_key - min_key) > MAX_ARRAY_SIZE)
+            if (static_cast<size_t>(max_key) - static_cast<size_t>(min_key) > MAX_ARRAY_SIZE)
                 throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName());
 
             /* fill the result arrays */
diff --git a/src/Functions/bar.cpp b/src/Functions/bar.cpp
index 7364311a1be..6f5298a8c5e 100644
--- a/src/Functions/bar.cpp
+++ b/src/Functions/bar.cpp
@@ -16,6 +16,7 @@ namespace ErrorCodes
     extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int BAD_ARGUMENTS;
 }
 
 namespace
@@ -110,6 +111,9 @@ public:
                 arguments[2].column->getFloat64(i),
                 max_width);
 
+            if (!isFinite(width))
+                throw Exception("Value of width must not be NaN and Inf", ErrorCodes::BAD_ARGUMENTS);
+
             size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1;
             dst_chars.resize(next_size);
             UnicodeBar::render(width, reinterpret_cast<char *>(&dst_chars[current_offset]));
diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp
index 592f0d6774d..ca9bc32486e 100644
--- a/src/Functions/registerFunctionsMiscellaneous.cpp
+++ b/src/Functions/registerFunctionsMiscellaneous.cpp
@@ -41,7 +41,8 @@ void registerFunctionThrowIf(FunctionFactory &);
 void registerFunctionVersion(FunctionFactory &);
 void registerFunctionBuildId(FunctionFactory &);
 void registerFunctionUptime(FunctionFactory &);
-void registerFunctionTimeZone(FunctionFactory &);
+void registerFunctionTimezone(FunctionFactory &);
+void registerFunctionTimezoneOf(FunctionFactory &);
 void registerFunctionRunningAccumulate(FunctionFactory &);
 void registerFunctionRunningDifference(FunctionFactory &);
 void registerFunctionRunningDifferenceStartingWithFirstValue(FunctionFactory &);
@@ -111,7 +112,8 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
     registerFunctionVersion(factory);
     registerFunctionBuildId(factory);
     registerFunctionUptime(factory);
-    registerFunctionTimeZone(factory);
+    registerFunctionTimezone(factory);
+    registerFunctionTimezoneOf(factory);
     registerFunctionRunningAccumulate(factory);
     registerFunctionRunningDifference(factory);
     registerFunctionRunningDifferenceStartingWithFirstValue(factory);
diff --git a/src/Functions/timezone.cpp b/src/Functions/timezone.cpp
index 4522f21c8b2..2cd0c28612b 100644
--- a/src/Functions/timezone.cpp
+++ b/src/Functions/timezone.cpp
@@ -12,13 +12,13 @@ namespace
 
 /** Returns the server time zone.
   */
-class FunctionTimeZone : public IFunction
+class FunctionTimezone : public IFunction
 {
 public:
     static constexpr auto name = "timezone";
     static FunctionPtr create(const Context &)
     {
-        return std::make_shared<FunctionTimeZone>();
+        return std::make_shared<FunctionTimezone>();
     }
 
     String getName() const override
@@ -45,9 +45,10 @@ public:
 
 }
 
-void registerFunctionTimeZone(FunctionFactory & factory)
+void registerFunctionTimezone(FunctionFactory & factory)
 {
-    factory.registerFunction<FunctionTimeZone>();
+    factory.registerFunction<FunctionTimezone>();
+    factory.registerAlias("timeZone", "timezone");
 }
 
 }
diff --git a/src/Functions/timezoneOf.cpp b/src/Functions/timezoneOf.cpp
new file mode 100644
index 00000000000..1d007a6e10e
--- /dev/null
+++ b/src/Functions/timezoneOf.cpp
@@ -0,0 +1,118 @@
+#include <Functions/IFunctionImpl.h>
+#include <Functions/FunctionFactory.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <common/DateLUTImpl.h>
+#include <Core/Field.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+
+namespace
+{
+
+
+/** timezoneOf(x) - get the name of the timezone of DateTime data type.
+  * Example: Europe/Moscow.
+  */
+class ExecutableFunctionTimezoneOf : public IExecutableFunctionImpl
+{
+public:
+    static constexpr auto name = "timezoneOf";
+    String getName() const override { return name; }
+
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+
+    /// Execute the function on the columns.
+    ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        DataTypePtr type_no_nullable = removeNullable(arguments[0].type);
+
+        return DataTypeString().createColumnConst(input_rows_count,
+            dynamic_cast<const TimezoneMixin &>(*type_no_nullable).getTimeZone().getTimeZone());
+    }
+};
+
+
+class BaseFunctionTimezoneOf : public IFunctionBaseImpl
+{
+public:
+    BaseFunctionTimezoneOf(DataTypes argument_types_, DataTypePtr return_type_)
+        : argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}
+
+    static constexpr auto name = "timezoneOf";
+    String getName() const override { return name; }
+
+    bool isDeterministic() const override { return true; }
+    bool isDeterministicInScopeOfQuery() const override { return true; }
+
+    const DataTypes & getArgumentTypes() const override { return argument_types; }
+    const DataTypePtr & getResultType() const override { return return_type; }
+
+    ExecutableFunctionImplPtr prepare(const ColumnsWithTypeAndName &) const override
+    {
+        return std::make_unique<ExecutableFunctionTimezoneOf>();
+    }
+
+    ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const ColumnsWithTypeAndName & arguments) const override
+    {
+        DataTypePtr type_no_nullable = removeNullable(arguments[0].type);
+
+        return DataTypeString().createColumnConst(1,
+            dynamic_cast<const TimezoneMixin &>(*type_no_nullable).getTimeZone().getTimeZone());
+    }
+
+private:
+    DataTypes argument_types;
+    DataTypePtr return_type;
+};
+
+
+class FunctionTimezoneOfBuilder : public IFunctionOverloadResolverImpl
+{
+public:
+    static constexpr auto name = "timezoneOf";
+    String getName() const override { return name; }
+    static FunctionOverloadResolverImplPtr create(const Context &) { return std::make_unique<FunctionTimezoneOfBuilder>(); }
+
+    size_t getNumberOfArguments() const override { return 1; }
+
+    DataTypePtr getReturnType(const DataTypes & types) const override
+    {
+        DataTypePtr type_no_nullable = removeNullable(types[0]);
+
+        if (isDateTime(type_no_nullable) || isDateTime64(type_no_nullable))
+            return std::make_shared<DataTypeString>();
+        else
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad argument for function {}, should be DateTime or DateTime64", name);
+    }
+
+    FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
+    {
+        return std::make_unique<BaseFunctionTimezoneOf>(DataTypes{arguments[0].type}, return_type);
+    }
+
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+    ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; }
+};
+
+}
+
+void registerFunctionTimezoneOf(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionTimezoneOfBuilder>();
+    factory.registerAlias("timeZoneOf", "timezoneOf");
+}
+
+}
+
diff --git a/src/Functions/toTimeZone.cpp b/src/Functions/toTimezone.cpp
similarity index 90%
rename from src/Functions/toTimeZone.cpp
rename to src/Functions/toTimezone.cpp
index fbf3a0778a6..d12f926b284 100644
--- a/src/Functions/toTimeZone.cpp
+++ b/src/Functions/toTimezone.cpp
@@ -21,11 +21,11 @@ namespace
 {
 
 /// Just changes time zone information for data type. The calculation is free.
-class FunctionToTimeZone : public IFunction
+class FunctionToTimezone : public IFunction
 {
 public:
-    static constexpr auto name = "toTimeZone";
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionToTimeZone>(); }
+    static constexpr auto name = "toTimezone";
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionToTimezone>(); }
 
     String getName() const override
     {
@@ -64,7 +64,8 @@ public:
 
 void registerFunctionToTimeZone(FunctionFactory & factory)
 {
-    factory.registerFunction<FunctionToTimeZone>();
+    factory.registerFunction<FunctionToTimezone>();
+    factory.registerAlias("toTimeZone", "toTimezone");
 }
 
 }
diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index 3ac64828b9c..aed2bd9b70d 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -467,6 +467,7 @@ SRCS(
     timeSlot.cpp
     timeSlots.cpp
     timezone.cpp
+    timezoneOf.cpp
     timezoneOffset.cpp
     toColumnTypeName.cpp
     toCustomWeek.cpp
@@ -506,7 +507,7 @@ SRCS(
     toStartOfTenMinutes.cpp
     toStartOfYear.cpp
     toTime.cpp
-    toTimeZone.cpp
+    toTimezone.cpp
     toTypeName.cpp
     toUnixTimestamp64Micro.cpp
     toUnixTimestamp64Milli.cpp
diff --git a/src/IO/BrotliWriteBuffer.cpp b/src/IO/BrotliWriteBuffer.cpp
index e87eeb1a2be..512ed5fc93f 100644
--- a/src/IO/BrotliWriteBuffer.cpp
+++ b/src/IO/BrotliWriteBuffer.cpp
@@ -106,7 +106,7 @@ void BrotliWriteBuffer::finish()
     try
     {
         finishImpl();
-        out->next();
+        out->finalize();
         finished = true;
     }
     catch (...)
diff --git a/src/IO/LZMADeflatingWriteBuffer.cpp b/src/IO/LZMADeflatingWriteBuffer.cpp
index 96f1d34b01b..7ea4f7945dc 100644
--- a/src/IO/LZMADeflatingWriteBuffer.cpp
+++ b/src/IO/LZMADeflatingWriteBuffer.cpp
@@ -105,7 +105,7 @@ void LZMADeflatingWriteBuffer::finish()
     try
     {
         finishImpl();
-        out->next();
+        out->finalize();
         finished = true;
     }
     catch (...)
diff --git a/src/IO/PeekableReadBuffer.cpp b/src/IO/PeekableReadBuffer.cpp
index 1d999d586b2..15fdd9448ec 100644
--- a/src/IO/PeekableReadBuffer.cpp
+++ b/src/IO/PeekableReadBuffer.cpp
@@ -82,6 +82,7 @@ bool PeekableReadBuffer::peekNext()
         checkpoint.emplace(memory.data());
         checkpoint_in_own_memory = true;
     }
+
     if (currentlyReadFromOwnMemory())
     {
         /// Update buffer size
@@ -99,7 +100,6 @@ bool PeekableReadBuffer::peekNext()
                 pos_offset = 0;
         }
         BufferBase::set(memory.data(), peeked_size + bytes_to_copy, pos_offset);
-
     }
 
     peeked_size += bytes_to_copy;
@@ -113,12 +113,21 @@ void PeekableReadBuffer::rollbackToCheckpoint(bool drop)
 {
     checkStateCorrect();
 
-    if (!checkpoint)
-        throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
-    else if (checkpointInOwnMemory() == currentlyReadFromOwnMemory())
+    assert(checkpoint);
+
+    if (checkpointInOwnMemory() == currentlyReadFromOwnMemory())
+    {
+        /// Both checkpoint and position are in the same buffer.
         pos = *checkpoint;
-    else /// Checkpoint is in own memory and pos is not. Switch to reading from own memory
+    }
+    else
+    {
+        /// Checkpoint is in own memory and position is not.
+        assert(checkpointInOwnMemory());
+
+        /// Switch to reading from own memory.
         BufferBase::set(memory.data(), peeked_size, *checkpoint - memory.data());
+    }
 
     if (drop)
         dropCheckpoint();
@@ -134,6 +143,7 @@ bool PeekableReadBuffer::nextImpl()
 
     checkStateCorrect();
     bool res;
+    bool checkpoint_at_end = checkpoint && *checkpoint == working_buffer.end() && currentlyReadFromOwnMemory();
 
     if (checkpoint)
     {
@@ -163,6 +173,13 @@ bool PeekableReadBuffer::nextImpl()
     BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset());
     nextimpl_working_buffer_offset = sub_buf.offset();
 
+    if (checkpoint_at_end)
+    {
+        checkpoint.emplace(working_buffer.begin());
+        peeked_size = 0;
+        checkpoint_in_own_memory = false;
+    }
+
     checkStateCorrect();
     return res;
 }
diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h
index 4f6e669b31d..4515c6f8ce5 100644
--- a/src/IO/PeekableReadBuffer.h
+++ b/src/IO/PeekableReadBuffer.h
@@ -43,10 +43,7 @@ public:
     /// Forget checkpoint and all data between checkpoint and position
     ALWAYS_INLINE inline void dropCheckpoint()
     {
-#ifndef NDEBUG
-        if (!checkpoint)
-            throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
-#endif
+        assert(checkpoint);
         if (!currentlyReadFromOwnMemory())
         {
             /// Don't need to store unread data anymore
diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp
index 37896a387bb..c70993c5c3a 100644
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@@ -1,6 +1,7 @@
 #include <Poco/Net/NetException.h>
 
 #include <IO/ReadBufferFromPocoSocket.h>
+#include <IO/TimeoutSetter.h>
 #include <Common/Exception.h>
 #include <Common/NetException.h>
 #include <Common/Stopwatch.h>
@@ -27,23 +28,23 @@ bool ReadBufferFromPocoSocket::nextImpl()
     ssize_t bytes_read = 0;
     Stopwatch watch;
 
-    int flags = 0;
-    if (async_callback)
-        flags |= MSG_DONTWAIT;
-
     /// Add more details to exceptions.
     try
     {
-        bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size(), flags);
-
-        /// If async_callback is specified, and read is blocking, run async_callback and try again later.
+        /// If async_callback is specified, and read will block, run async_callback and try again later.
         /// It is expected that file descriptor may be polled externally.
         /// Note that receive timeout is not checked here. External code should check it while polling.
-        while (bytes_read < 0 && async_callback && errno == EAGAIN)
-        {
+        while (async_callback && !socket.poll(0, Poco::Net::Socket::SELECT_READ))
             async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), socket_description);
-            bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size(), flags);
-        }
+
+        /// receiveBytes in SecureStreamSocket throws TimeoutException after max(receive_timeout, send_timeout),
+        /// but we want to get this exception exactly after receive_timeout. So, set send_timeout = receive_timeout
+        /// before receiveBytes.
+        std::unique_ptr<TimeoutSetter> timeout_setter = nullptr;
+        if (socket.secure())
+            timeout_setter = std::make_unique<TimeoutSetter>(dynamic_cast<Poco::Net::StreamSocket &>(socket), socket.getReceiveTimeout(), socket.getReceiveTimeout());
+
+        bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size());
     }
     catch (const Poco::Net::NetException & e)
     {
diff --git a/src/Client/TimeoutSetter.cpp b/src/IO/TimeoutSetter.cpp
similarity index 97%
rename from src/Client/TimeoutSetter.cpp
rename to src/IO/TimeoutSetter.cpp
index 87368f93ba3..f06cafecff8 100644
--- a/src/Client/TimeoutSetter.cpp
+++ b/src/IO/TimeoutSetter.cpp
@@ -1,4 +1,4 @@
-#include "TimeoutSetter.h"
+#include <IO/TimeoutSetter.h>
 
 #include <common/logger_useful.h>
 
diff --git a/src/Client/TimeoutSetter.h b/src/IO/TimeoutSetter.h
similarity index 100%
rename from src/Client/TimeoutSetter.h
rename to src/IO/TimeoutSetter.h
diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp
index 284fa5dbd97..4edfc8a2795 100644
--- a/src/IO/WriteBufferFromPocoSocket.cpp
+++ b/src/IO/WriteBufferFromPocoSocket.cpp
@@ -1,6 +1,7 @@
 #include <Poco/Net/NetException.h>
 
 #include <IO/WriteBufferFromPocoSocket.h>
+#include <IO/TimeoutSetter.h>
 
 #include <Common/Exception.h>
 #include <Common/NetException.h>
@@ -40,6 +41,13 @@ void WriteBufferFromPocoSocket::nextImpl()
         /// Add more details to exceptions.
         try
         {
+            /// sendBytes in SecureStreamSocket throws TimeoutException after max(receive_timeout, send_timeout),
+            /// but we want to get this exception exactly after send_timeout. So, set receive_timeout = send_timeout
+            /// before sendBytes.
+            std::unique_ptr<TimeoutSetter> timeout_setter = nullptr;
+            if (socket.secure())
+                timeout_setter = std::make_unique<TimeoutSetter>(dynamic_cast<Poco::Net::StreamSocket &>(socket), socket.getSendTimeout(), socket.getSendTimeout());
+
             res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written);
         }
         catch (const Poco::Net::NetException & e)
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index 7373b24991a..93aaf9456b5 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -120,7 +120,7 @@ WriteBufferFromS3::~WriteBufferFromS3()
     }
     catch (...)
     {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
+        tryLogCurrentException(log);
     }
 }
 
diff --git a/src/IO/ZlibDeflatingWriteBuffer.cpp b/src/IO/ZlibDeflatingWriteBuffer.cpp
index 5da82b52279..7e91820f298 100644
--- a/src/IO/ZlibDeflatingWriteBuffer.cpp
+++ b/src/IO/ZlibDeflatingWriteBuffer.cpp
@@ -107,7 +107,7 @@ void ZlibDeflatingWriteBuffer::finish()
     try
     {
         finishImpl();
-        out->next();
+        out->finalize();
         finished = true;
     }
     catch (...)
diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp
index 27694797db6..5b97588b33e 100644
--- a/src/IO/ZstdDeflatingWriteBuffer.cpp
+++ b/src/IO/ZstdDeflatingWriteBuffer.cpp
@@ -94,7 +94,7 @@ void ZstdDeflatingWriteBuffer::finish()
     try
     {
         finishImpl();
-        out->next();
+        out->finalize();
         finished = true;
     }
     catch (...)
diff --git a/src/IO/tests/gtest_peekable_read_buffer.cpp b/src/IO/tests/gtest_peekable_read_buffer.cpp
index 8c491338bd3..2e5ca47c0aa 100644
--- a/src/IO/tests/gtest_peekable_read_buffer.cpp
+++ b/src/IO/tests/gtest_peekable_read_buffer.cpp
@@ -6,11 +6,6 @@
 #include <IO/ConcatReadBuffer.h>
 #include <IO/PeekableReadBuffer.h>
 
-namespace DB::ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 static void readAndAssert(DB::ReadBuffer & buf, const char * str)
 {
     size_t n = strlen(str);
@@ -48,20 +43,6 @@ try
         readAndAssert(peekable, "01234");
     }
 
-#ifndef ABORT_ON_LOGICAL_ERROR
-    bool exception = false;
-    try
-    {
-        peekable.rollbackToCheckpoint();
-    }
-    catch (DB::Exception & e)
-    {
-        if (e.code() != DB::ErrorCodes::LOGICAL_ERROR)
-            throw;
-        exception = true;
-    }
-    ASSERT_TRUE(exception);
-#endif
     assertAvailable(peekable, "56789");
 
     readAndAssert(peekable, "56");
diff --git a/src/IO/ya.make b/src/IO/ya.make
index 6605cf64277..58df027c561 100644
--- a/src/IO/ya.make
+++ b/src/IO/ya.make
@@ -50,6 +50,7 @@ SRCS(
     ReadBufferFromPocoSocket.cpp
     ReadHelpers.cpp
     SeekAvoidingReadBuffer.cpp
+    TimeoutSetter.cpp
     UseSSL.cpp
     WriteBufferFromFile.cpp
     WriteBufferFromFileBase.cpp
diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp
index 73257ba5185..853fe296d1c 100644
--- a/src/Interpreters/ExternalLoader.cpp
+++ b/src/Interpreters/ExternalLoader.cpp
@@ -818,13 +818,10 @@ private:
             if (!min_id)
                 min_id = getMinIDToFinishLoading(forced_to_reload);
 
-            if (info->state_id >= min_id)
-                return true; /// stop
-
             if (info->loading_id < min_id)
                 startLoading(*info, forced_to_reload, *min_id);
 
-            /// Wait for the next event if loading wasn't completed, and stop otherwise.
+            /// Wait for the next event if loading wasn't completed, or stop otherwise.
             return (info->state_id >= min_id);
         };
 
@@ -850,9 +847,6 @@ private:
                 if (filter && !filter(name))
                     continue;
 
-                if (info.state_id >= min_id)
-                    continue;
-
                 if (info.loading_id < min_id)
                     startLoading(info, forced_to_reload, *min_id);
 
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index d1af86e7b11..f8bcbf02ab4 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -260,7 +260,8 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
             renamed = true;
         }
 
-        database->loadStoredObjects(context, has_force_restore_data_flag, create.attach && force_attach);
+        /// We use global context here, because storages lifetime is bigger than query context lifetime
+        database->loadStoredObjects(context.getGlobalContext(), has_force_restore_data_flag, create.attach && force_attach);
     }
     catch (...)
     {
@@ -970,7 +971,8 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
     if (create.as_table_function)
     {
         const auto & factory = TableFunctionFactory::instance();
-        res = factory.get(create.as_table_function, context)->execute(create.as_table_function, context, create.table, properties.columns);
+        auto table_func = factory.get(create.as_table_function, context);
+        res = table_func->execute(create.as_table_function, context, create.table, properties.columns);
         res->renameInMemory({create.database, create.table, create.uuid});
     }
     else
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index b4f64528471..1f6b0c37437 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -393,7 +393,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
             view = nullptr;
         }
 
-        if (try_move_to_prewhere && storage && query.where() && !query.prewhere() && !query.final())
+        if (try_move_to_prewhere && storage && query.where() && !query.prewhere())
         {
             /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
             if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
diff --git a/src/Interpreters/InterserverIOHandler.h b/src/Interpreters/InterserverIOHandler.h
index b4768c30f32..b0c95ed3835 100644
--- a/src/Interpreters/InterserverIOHandler.h
+++ b/src/Interpreters/InterserverIOHandler.h
@@ -9,8 +9,6 @@
 #include <Common/ActionBlocker.h>
 #include <common/types.h>
 
-#include <Poco/Net/HTMLForm.h>
-
 #include <atomic>
 #include <map>
 #include <shared_mutex>
diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp
index e922f49c896..a97ef41204a 100644
--- a/src/Interpreters/WindowDescription.cpp
+++ b/src/Interpreters/WindowDescription.cpp
@@ -1,5 +1,6 @@
 #include <Interpreters/WindowDescription.h>
 
+#include <Core/Field.h>
 #include <IO/Operators.h>
 #include <Parsers/ASTFunction.h>
 
@@ -60,7 +61,7 @@ void WindowFrame::toString(WriteBuffer & buf) const
     }
     else
     {
-        buf << abs(begin_offset);
+        buf << applyVisitor(FieldVisitorToString(), begin_offset);
         buf << " "
             << (begin_preceding ? "PRECEDING" : "FOLLOWING");
     }
@@ -77,7 +78,7 @@ void WindowFrame::toString(WriteBuffer & buf) const
     }
     else
     {
-        buf << abs(end_offset);
+        buf << applyVisitor(FieldVisitorToString(), end_offset);
         buf << " "
             << (end_preceding ? "PRECEDING" : "FOLLOWING");
     }
@@ -121,23 +122,33 @@ void WindowFrame::checkValid() const
     if (end_type == BoundaryType::Offset
         && begin_type == BoundaryType::Offset)
     {
-        // Frame starting with following rows can't have preceding rows.
-        if (!(end_preceding && !begin_preceding))
+        // Frame start offset must be less or equal that the frame end offset.
+        bool begin_less_equal_end;
+        if (begin_preceding && end_preceding)
         {
-            // Frame start offset must be less or equal that the frame end offset.
-            const bool begin_before_end
-                = begin_offset * (begin_preceding ? -1 : 1)
-                    <= end_offset * (end_preceding ? -1 : 1);
-
-            if (!begin_before_end)
-            {
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                    "Frame start offset {} {} does not precede the frame end offset {} {}",
-                    begin_offset, begin_preceding ? "PRECEDING" : "FOLLOWING",
-                    end_offset, end_preceding ? "PRECEDING" : "FOLLOWING");
-            }
-            return;
+            begin_less_equal_end = begin_offset >= end_offset;
         }
+        else if (begin_preceding && !end_preceding)
+        {
+            begin_less_equal_end = true;
+        }
+        else if (!begin_preceding && end_preceding)
+        {
+            begin_less_equal_end = false;
+        }
+        else /* if (!begin_preceding && !end_preceding) */
+        {
+            begin_less_equal_end = begin_offset <= end_offset;
+        }
+
+        if (!begin_less_equal_end)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Frame start offset {} {} does not precede the frame end offset {} {}",
+                begin_offset, begin_preceding ? "PRECEDING" : "FOLLOWING",
+                end_offset, end_preceding ? "PRECEDING" : "FOLLOWING");
+        }
+        return;
     }
 
     throw Exception(ErrorCodes::BAD_ARGUMENTS,
diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h
index faad4649f91..70a4e0e44e0 100644
--- a/src/Interpreters/WindowDescription.h
+++ b/src/Interpreters/WindowDescription.h
@@ -44,14 +44,13 @@ struct WindowFrame
     // Offset might be both preceding and following, controlled by begin_preceding,
     // but the offset value must be positive.
     BoundaryType begin_type = BoundaryType::Unbounded;
-    // This should have been a Field but I'm getting some crazy linker errors.
-    int64_t begin_offset = 0;
+    Field begin_offset = 0;
     bool begin_preceding = true;
 
     // Here as well, Unbounded can only be UNBOUNDED FOLLOWING, and end_preceding
     // must be false.
     BoundaryType end_type = BoundaryType::Current;
-    int64_t end_offset = 0;
+    Field end_offset = 0;
     bool end_preceding = false;
 
 
diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp
index d47f64cb1dc..5d124add0df 100644
--- a/src/Interpreters/convertFieldToType.cpp
+++ b/src/Interpreters/convertFieldToType.cpp
@@ -377,6 +377,11 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co
     else if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&to_type))
     {
         const IDataType & nested_type = *nullable_type->getNestedType();
+
+        /// NULL remains NULL after any conversion.
+        if (WhichDataType(nested_type).isNothing())
+            return {};
+
         if (from_type_hint && from_type_hint->equals(nested_type))
             return from_value;
         return convertFieldToTypeImpl(from_value, nested_type, from_type_hint);
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index aa5508bf190..4715c7f201b 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -137,8 +137,8 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
     if (window())
     {
         s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str <<
-            "WINDOW " << (s.hilite ? hilite_none : "");
-        window()->formatImpl(s, state, frame);
+            "WINDOW" << (s.hilite ? hilite_none : "");
+        window()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
     }
 
     if (orderBy())
diff --git a/src/Parsers/ASTWindowDefinition.cpp b/src/Parsers/ASTWindowDefinition.cpp
index aee951fc1f3..35374df6177 100644
--- a/src/Parsers/ASTWindowDefinition.cpp
+++ b/src/Parsers/ASTWindowDefinition.cpp
@@ -35,6 +35,8 @@ String ASTWindowDefinition::getID(char) const
 void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
     FormatState & state, FormatStateStacked format_frame) const
 {
+    format_frame.expression_list_prepend_whitespace = false;
+
     if (partition_by)
     {
         settings.ostr << "PARTITION BY ";
@@ -70,7 +72,8 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
         }
         else
         {
-            settings.ostr << abs(frame.begin_offset);
+            settings.ostr << applyVisitor(FieldVisitorToString(),
+                frame.begin_offset);
             settings.ostr << " "
                 << (!frame.begin_preceding ? "FOLLOWING" : "PRECEDING");
         }
@@ -85,7 +88,8 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
         }
         else
         {
-            settings.ostr << abs(frame.end_offset);
+            settings.ostr << applyVisitor(FieldVisitorToString(),
+                frame.end_offset);
             settings.ostr << " "
                 << (!frame.end_preceding ? "FOLLOWING" : "PRECEDING");
         }
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index a54573432a1..913813d5486 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -581,30 +581,20 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
         else if (parser_literal.parse(pos, ast_literal, expected))
         {
             const Field & value = ast_literal->as<ASTLiteral &>().value;
-            if (!isInt64FieldType(value.getType()))
+            if ((node->frame.type == WindowFrame::FrameType::Rows
+                    || node->frame.type == WindowFrame::FrameType::Groups)
+                && !(value.getType() == Field::Types::UInt64
+                     || (value.getType() == Field::Types::Int64
+                            && value.get<Int64>() >= 0)))
             {
                 throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                    "Only integer frame offsets are supported, '{}' is not supported.",
+                    "Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.",
+                    WindowFrame::toString(node->frame.type),
+                    applyVisitor(FieldVisitorToString(), value),
                     Field::Types::toString(value.getType()));
             }
-            node->frame.begin_offset = value.get<Int64>();
+            node->frame.begin_offset = value;
             node->frame.begin_type = WindowFrame::BoundaryType::Offset;
-            // We can easily get a UINT64_MAX here, which doesn't even fit into
-            // int64_t. Not sure what checks we are going to need here after we
-            // support floats and dates.
-            if (node->frame.begin_offset > INT_MAX || node->frame.begin_offset < INT_MIN)
-            {
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                    "Frame offset must be between {} and {}, but {} is given",
-                    INT_MAX, INT_MIN, node->frame.begin_offset);
-            }
-
-            if (node->frame.begin_offset < 0)
-            {
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                    "Frame start offset must be greater than zero, {} given",
-                    node->frame.begin_offset);
-            }
         }
         else
         {
@@ -652,28 +642,20 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
             else if (parser_literal.parse(pos, ast_literal, expected))
             {
                 const Field & value = ast_literal->as<ASTLiteral &>().value;
-                if (!isInt64FieldType(value.getType()))
+                if ((node->frame.type == WindowFrame::FrameType::Rows
+                        || node->frame.type == WindowFrame::FrameType::Groups)
+                    && !(value.getType() == Field::Types::UInt64
+                         || (value.getType() == Field::Types::Int64
+                                && value.get<Int64>() >= 0)))
                 {
                     throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "Only integer frame offsets are supported, '{}' is not supported.",
+                        "Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.",
+                        WindowFrame::toString(node->frame.type),
+                        applyVisitor(FieldVisitorToString(), value),
                         Field::Types::toString(value.getType()));
                 }
-                node->frame.end_offset = value.get<Int64>();
+                node->frame.end_offset = value;
                 node->frame.end_type = WindowFrame::BoundaryType::Offset;
-
-                if (node->frame.end_offset > INT_MAX || node->frame.end_offset < INT_MIN)
-                {
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "Frame offset must be between {} and {}, but {} is given",
-                        INT_MAX, INT_MIN, node->frame.end_offset);
-                }
-
-                if (node->frame.end_offset < 0)
-                {
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "Frame end offset must be greater than zero, {} given",
-                        node->frame.end_offset);
-                }
             }
             else
             {
diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp
index ffa8250a3f3..1fa4d396113 100644
--- a/src/Parsers/Lexer.cpp
+++ b/src/Parsers/Lexer.cpp
@@ -275,7 +275,8 @@ Token Lexer::nextTokenImpl()
                         else
                             ++pos;
                     }
-                    return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, end);
+                    pos = end;
+                    return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, pos);
                 }
             }
             return Token(TokenType::Slash, token_begin, pos);
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 1fc51bd4112..4a5282c1e6b 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -3,6 +3,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <Common/Arena.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/getLeastSupertype.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/convertFieldToType.h>
 
@@ -27,7 +28,8 @@ public:
     virtual ~IWindowFunction() = default;
 
     // Must insert the result for current_row.
-    virtual void windowInsertResultInto(IColumn & to, const WindowTransform * transform) = 0;
+    virtual void windowInsertResultInto(const WindowTransform * transform,
+        size_t function_index) = 0;
 };
 
 // Compares ORDER BY column values at given rows to find the boundaries of frame:
@@ -37,7 +39,7 @@ template <typename ColumnType>
 static int compareValuesWithOffset(const IColumn * _compared_column,
     size_t compared_row, const IColumn * _reference_column,
     size_t reference_row,
-    uint64_t _offset,
+    const Field & _offset,
     bool offset_is_preceding)
 {
     // Casting the columns to the known type here makes it faster, probably
@@ -46,7 +48,8 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
         _compared_column);
     const auto * reference_column = assert_cast<const ColumnType *>(
         _reference_column);
-    const auto offset = static_cast<typename ColumnType::ValueType>(_offset);
+    const auto offset = _offset.get<typename ColumnType::ValueType>();
+    assert(offset >= 0);
 
     const auto compared_value_data = compared_column->getDataAt(compared_row);
     assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
@@ -101,6 +104,53 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
     }
 }
 
+// A specialization of compareValuesWithOffset for floats.
+template <typename ColumnType>
+static int compareValuesWithOffsetFloat(const IColumn * _compared_column,
+    size_t compared_row, const IColumn * _reference_column,
+    size_t reference_row,
+    const Field & _offset,
+    bool offset_is_preceding)
+{
+    // Casting the columns to the known type here makes it faster, probably
+    // because the getData call can be devirtualized.
+    const auto * compared_column = assert_cast<const ColumnType *>(
+        _compared_column);
+    const auto * reference_column = assert_cast<const ColumnType *>(
+        _reference_column);
+    const auto offset = _offset.get<typename ColumnType::ValueType>();
+    assert(offset >= 0);
+
+    const auto compared_value_data = compared_column->getDataAt(compared_row);
+    assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
+    auto compared_value = unalignedLoad<typename ColumnType::ValueType>(
+        compared_value_data.data);
+
+    const auto reference_value_data = reference_column->getDataAt(reference_row);
+    assert(reference_value_data.size == sizeof(typename ColumnType::ValueType));
+    auto reference_value = unalignedLoad<typename ColumnType::ValueType>(
+        reference_value_data.data);
+
+    // Floats overflow to Inf and the comparison will work normally, so we don't
+    // have to do anything.
+    if (offset_is_preceding)
+    {
+        reference_value -= offset;
+    }
+    else
+    {
+        reference_value += offset;
+    }
+
+    const auto result =  compared_value < reference_value ? -1
+        : compared_value == reference_value ? 0 : 1;
+
+//    fmt::print(stderr, "compared {}, offset {}, reference {}, result {}\n",
+//        compared_value, offset, reference_value, result);
+
+    return result;
+}
+
 // Helper macros to dispatch on type of the ORDER BY column
 #define APPLY_FOR_ONE_TYPE(FUNCTION, TYPE) \
 else if (typeid_cast<const TYPE *>(column)) \
@@ -114,14 +164,20 @@ if (false) /* NOLINT */ \
 { \
     /* Do nothing, a starter condition. */ \
 } \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
 APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt8>) \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
 APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt16>) \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
 APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt32>) \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
 APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt64>) \
+\
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int128>) \
+\
+APPLY_FOR_ONE_TYPE(FUNCTION##Float, ColumnVector<Float32>) \
+APPLY_FOR_ONE_TYPE(FUNCTION##Float, ColumnVector<Float64>) \
+\
 else \
 { \
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, \
@@ -193,9 +249,28 @@ WindowTransform::WindowTransform(const Block & input_header_,
                 == WindowFrame::BoundaryType::Offset))
     {
         assert(order_by_indices.size() == 1);
-        const IColumn * column = input_header.getByPosition(
-            order_by_indices[0]).column.get();
+        const auto & entry = input_header.getByPosition(order_by_indices[0]);
+        const IColumn * column = entry.column.get();
         APPLY_FOR_TYPES(compareValuesWithOffset)
+
+        // Check that the offset type matches the window type.
+        // Convert the offsets to the ORDER BY column type. We can't just check
+        // that it matches, because e.g. the int literals are always (U)Int64,
+        // but the column might be Int8 and so on.
+        if (window_description.frame.begin_type
+            == WindowFrame::BoundaryType::Offset)
+        {
+            window_description.frame.begin_offset = convertFieldToTypeOrThrow(
+                window_description.frame.begin_offset,
+                *entry.type);
+        }
+        if (window_description.frame.end_type
+            == WindowFrame::BoundaryType::Offset)
+        {
+            window_description.frame.end_offset = convertFieldToTypeOrThrow(
+                window_description.frame.end_offset,
+                *entry.type);
+        }
     }
 }
 
@@ -391,7 +466,7 @@ void WindowTransform::advanceFrameStartRowsOffset()
 {
     // Just recalculate it each time by walking blocks.
     const auto [moved_row, offset_left] = moveRowNumber(current_row,
-        window_description.frame.begin_offset
+        window_description.frame.begin_offset.get<UInt64>()
             * (window_description.frame.begin_preceding ? -1 : 1));
 
     frame_start = moved_row;
@@ -638,7 +713,7 @@ void WindowTransform::advanceFrameEndRowsOffset()
     // Walk the specified offset from the current row. The "+1" is needed
     // because the frame_end is a past-the-end pointer.
     const auto [moved_row, offset_left] = moveRowNumber(current_row,
-        window_description.frame.end_offset
+        window_description.frame.end_offset.get<UInt64>()
             * (window_description.frame.end_preceding ? -1 : 1)
             + 1);
 
@@ -852,14 +927,14 @@ void WindowTransform::writeOutCurrentRow()
     for (size_t wi = 0; wi < workspaces.size(); ++wi)
     {
         auto & ws = workspaces[wi];
-        IColumn * result_column = block.output_columns[wi].get();
 
         if (ws.window_function_impl)
         {
-            ws.window_function_impl->windowInsertResultInto(*result_column, this);
+            ws.window_function_impl->windowInsertResultInto(this, wi);
         }
         else
         {
+            IColumn * result_column = block.output_columns[wi].get();
             const auto * a = ws.aggregate_function.get();
             auto * buf = ws.aggregate_function_state.data();
             // FIXME does it also allocate the result on the arena?
@@ -1280,8 +1355,11 @@ struct WindowFunctionRank final : public WindowFunction
     DataTypePtr getReturnType() const override
     { return std::make_shared<DataTypeUInt64>(); }
 
-    void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
+    void windowInsertResultInto(const WindowTransform * transform,
+        size_t function_index) override
     {
+        IColumn & to = *transform->blockAt(transform->current_row)
+            .output_columns[function_index];
         assert_cast<ColumnUInt64 &>(to).getData().push_back(
             transform->peer_group_start_row_number);
     }
@@ -1297,8 +1375,11 @@ struct WindowFunctionDenseRank final : public WindowFunction
     DataTypePtr getReturnType() const override
     { return std::make_shared<DataTypeUInt64>(); }
 
-    void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
+    void windowInsertResultInto(const WindowTransform * transform,
+        size_t function_index) override
     {
+        IColumn & to = *transform->blockAt(transform->current_row)
+            .output_columns[function_index];
         assert_cast<ColumnUInt64 &>(to).getData().push_back(
             transform->peer_group_number);
     }
@@ -1314,13 +1395,123 @@ struct WindowFunctionRowNumber final : public WindowFunction
     DataTypePtr getReturnType() const override
     { return std::make_shared<DataTypeUInt64>(); }
 
-    void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
+    void windowInsertResultInto(const WindowTransform * transform,
+        size_t function_index) override
     {
+        IColumn & to = *transform->blockAt(transform->current_row)
+            .output_columns[function_index];
         assert_cast<ColumnUInt64 &>(to).getData().push_back(
             transform->current_row_number);
     }
 };
 
+// ClickHouse-specific variant of lag/lead that respects the window frame.
+template <bool is_lead>
+struct WindowFunctionLagLeadInFrame final : public WindowFunction
+{
+    WindowFunctionLagLeadInFrame(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : WindowFunction(name_, argument_types_, parameters_)
+    {
+        if (!parameters.empty())
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Function {} cannot be parameterized", name_);
+        }
+
+        if (argument_types.empty())
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Function {} takes at least one argument", name_);
+        }
+
+        if (argument_types.size() == 1)
+        {
+            return;
+        }
+
+        if (!isInt64FieldType(argument_types[1]->getDefault().getType()))
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Offset must be an integer, '{}' given",
+                argument_types[1]->getName());
+        }
+
+        if (argument_types.size() == 2)
+        {
+            return;
+        }
+
+        if (!getLeastSupertype({argument_types[0], argument_types[2]}))
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "The default value type '{}' is not convertible to the argument type '{}'",
+                argument_types[2]->getName(),
+                argument_types[0]->getName());
+        }
+
+        if (argument_types.size() > 3)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Function '{}' accepts at most 3 arguments, {} given",
+                name, argument_types.size());
+        }
+    }
+
+    DataTypePtr getReturnType() const override
+    { return argument_types[0]; }
+
+    void windowInsertResultInto(const WindowTransform * transform,
+        size_t function_index) override
+    {
+        const auto & current_block = transform->blockAt(transform->current_row);
+        IColumn & to = *current_block.output_columns[function_index];
+        const auto & workspace = transform->workspaces[function_index];
+
+        int offset = 1;
+        if (argument_types.size() > 1)
+        {
+            offset = (*current_block.input_columns[
+                    workspace.argument_column_indices[1]])[
+                        transform->current_row.row].get<Int64>();
+            if (offset < 0)
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "The offset for function {} must be nonnegative, {} given",
+                    getName(), offset);
+            }
+        }
+
+        const auto [target_row, offset_left] = transform->moveRowNumber(
+            transform->current_row, offset * (is_lead ? 1 : -1));
+
+        if (offset_left != 0
+            || target_row < transform->frame_start
+            || transform->frame_end <= target_row)
+        {
+            // Offset is outside the frame.
+            if (argument_types.size() > 2)
+            {
+                // Column with default values is specified.
+                to.insertFrom(*current_block.input_columns[
+                            workspace.argument_column_indices[2]],
+                    transform->current_row.row);
+            }
+            else
+            {
+                to.insertDefault();
+            }
+        }
+        else
+        {
+            // Offset is inside the frame.
+            to.insertFrom(*transform->blockAt(target_row).input_columns[
+                    workspace.argument_column_indices[0]],
+                target_row.row);
+        }
+    }
+};
+
 void registerWindowFunctions(AggregateFunctionFactory & factory)
 {
     // Why didn't I implement lag/lead yet? Because they are a mess. I imagine
@@ -1332,9 +1523,10 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
     // the whole partition like Postgres does, because using a linear amount
     // of additional memory is not an option when we have a lot of data. We must
     // be able to process at least the lag/lead in streaming fashion.
-    // Our best bet is probably rewriting, say `lag(value, offset)` to
-    // `any(value) over (rows between offset preceding and offset preceding)`,
-    // at the query planning stage.
+    // A partial solution for constant offsets is rewriting, say `lag(value, offset)
+    // to `any(value) over (rows between offset preceding and offset preceding)`.
+    // We also implement non-standard functions `lag/leadInFrame`, that are
+    // analogous to `lag/lead`, but respect the frame.
     // Functions like cume_dist() do require materializing the entire
     // partition, but it's probably also simpler to implement them by rewriting
     // to a (rows between unbounded preceding and unbounded following) frame,
@@ -1360,6 +1552,20 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
             return std::make_shared<WindowFunctionRowNumber>(name, argument_types,
                 parameters);
         });
+
+    factory.registerFunction("lagInFrame", [](const std::string & name,
+            const DataTypes & argument_types, const Array & parameters)
+        {
+            return std::make_shared<WindowFunctionLagLeadInFrame<false>>(
+                name, argument_types, parameters);
+        });
+
+    factory.registerFunction("leadInFrame", [](const std::string & name,
+            const DataTypes & argument_types, const Array & parameters)
+        {
+            return std::make_shared<WindowFunctionLagLeadInFrame<true>>(
+                name, argument_types, parameters);
+        });
 }
 
 }
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 5001b984e9a..882bf429c0a 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -110,7 +110,9 @@ public:
     Status prepare() override;
     void work() override;
 
-private:
+    /*
+     * Implementation details.
+     */
     void advancePartitionEnd();
 
     bool arePeers(const RowNumber & x, const RowNumber & y) const;
@@ -321,10 +323,7 @@ public:
     int (* compare_values_with_offset) (
         const IColumn * compared_column, size_t compared_row,
         const IColumn * reference_column, size_t reference_row,
-        // We can make it a Field later if we need the Decimals. Now we only
-        // have ints and datetime, and the underlying Field type for them is
-        // uint64_t anyway.
-        uint64_t offset,
+        const Field & offset,
         bool offset_is_preceding);
 };
 
diff --git a/src/Server/HTTP/HTMLForm.cpp b/src/Server/HTTP/HTMLForm.cpp
index ca407858c33..a00950c8e27 100644
--- a/src/Server/HTTP/HTMLForm.cpp
+++ b/src/Server/HTTP/HTMLForm.cpp
@@ -369,6 +369,11 @@ bool HTMLForm::MultipartReadBuffer::nextImpl()
     else
         boundary_hit = startsWith(line, boundary);
 
+    if (!line.empty())
+        /// If we don't make sure that memory is contiguous then situation may happen, when part of the line is inside internal memory
+        /// and other part is inside sub-buffer, thus we'll be unable to setup our working buffer properly.
+        in.makeContinuousMemoryFromCheckpointToPos();
+
     in.rollbackToCheckpoint(true);
 
     /// Rolling back to checkpoint may change underlying buffers.
diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index 740072e8e9f..426e4ca2138 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -107,6 +107,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
         }
         catch (...)
         {
+            tryLogCurrentException(log);
             out.finalize();
         }
     };
@@ -116,6 +117,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
         if (auto [message, success] = checkAuthentication(request); success)
         {
             processQuery(request, response, used_output);
+            used_output.out->finalize();
             LOG_DEBUG(log, "Done processing query");
         }
         else
diff --git a/src/Server/NuKeeperTCPHandler.cpp b/src/Server/NuKeeperTCPHandler.cpp
index b283356d27d..b676331f6c0 100644
--- a/src/Server/NuKeeperTCPHandler.cpp
+++ b/src/Server/NuKeeperTCPHandler.cpp
@@ -240,16 +240,10 @@ Poco::Timespan NuKeeperTCPHandler::receiveHandshake()
         throw Exception("Unexpected protocol version: " + toString(protocol_version), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
 
     Coordination::read(last_zxid_seen, *in);
-
-    if (last_zxid_seen != 0)
-        throw Exception("Non zero last_zxid_seen is not supported", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
-
     Coordination::read(timeout_ms, *in);
+
+    /// TODO Stop ignoring this value
     Coordination::read(previous_session_id, *in);
-
-    if (previous_session_id != 0)
-        throw Exception("Non zero previous session id is not supported", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
-
     Coordination::read(passwd, *in);
 
     int8_t readonly;
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index ee2f7c96b5a..c3dd8346c8e 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -8,10 +8,10 @@
 #include <Core/Protocol.h>
 #include <Core/QueryProcessingStage.h>
 #include <IO/Progress.h>
+#include <IO/TimeoutSetter.h>
 #include <DataStreams/BlockIO.h>
 #include <Interpreters/InternalTextLogsQueue.h>
 #include <Interpreters/Context.h>
-#include <Client/TimeoutSetter.h>
 
 #include "IServer.h"
 
diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp
index f3b0e3022f1..affb76314b1 100644
--- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp
+++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp
@@ -26,7 +26,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl
     HDFSBuilderWrapper builder;
     HDFSFSPtr fs;
 
-    explicit ReadBufferFromHDFSImpl(const std::string & hdfs_name_,
+    ReadBufferFromHDFSImpl(const std::string & hdfs_name_,
         const Poco::Util::AbstractConfiguration & config_)
         : hdfs_uri(hdfs_name_),
           builder(createHDFSBuilder(hdfs_uri, config_))
diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp
index 2cbc36e02fe..39f6d1f632e 100644
--- a/src/Storages/IStorage.cpp
+++ b/src/Storages/IStorage.cpp
@@ -1,8 +1,5 @@
 #include <Storages/IStorage.h>
 
-#include <sparsehash/dense_hash_map>
-#include <sparsehash/dense_hash_set>
-
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/quoteString.h>
 #include <IO/Operators.h>
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 7f2a9cdb1f6..3d15681a27e 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -333,40 +333,49 @@ IMergeTreeDataPart::State IMergeTreeDataPart::getState() const
 }
 
 
-DayNum IMergeTreeDataPart::getMinDate() const
+std::pair<DayNum, DayNum> IMergeTreeDataPart::getMinMaxDate() const
 {
     if (storage.minmax_idx_date_column_pos != -1 && minmax_idx.initialized)
-        return DayNum(minmax_idx.hyperrectangle[storage.minmax_idx_date_column_pos].left.get<UInt64>());
+    {
+        const auto & hyperrectangle = minmax_idx.hyperrectangle[storage.minmax_idx_date_column_pos];
+        return {DayNum(hyperrectangle.left.get<UInt64>()), DayNum(hyperrectangle.right.get<UInt64>())};
+    }
     else
-        return DayNum();
+        return {};
 }
 
-
-DayNum IMergeTreeDataPart::getMaxDate() const
-{
-    if (storage.minmax_idx_date_column_pos != -1 && minmax_idx.initialized)
-        return DayNum(minmax_idx.hyperrectangle[storage.minmax_idx_date_column_pos].right.get<UInt64>());
-    else
-        return DayNum();
-}
-
-time_t IMergeTreeDataPart::getMinTime() const
+std::pair<time_t, time_t> IMergeTreeDataPart::getMinMaxTime() const
 {
     if (storage.minmax_idx_time_column_pos != -1 && minmax_idx.initialized)
-        return minmax_idx.hyperrectangle[storage.minmax_idx_time_column_pos].left.get<UInt64>();
+    {
+        const auto & hyperrectangle = minmax_idx.hyperrectangle[storage.minmax_idx_time_column_pos];
+
+        /// The case of DateTime
+        if (hyperrectangle.left.getType() == Field::Types::UInt64)
+        {
+            assert(hyperrectangle.right.getType() == Field::Types::UInt64);
+            return {hyperrectangle.left.get<UInt64>(), hyperrectangle.right.get<UInt64>()};
+        }
+        /// The case of DateTime64
+        else if (hyperrectangle.left.getType() == Field::Types::Decimal64)
+        {
+            assert(hyperrectangle.right.getType() == Field::Types::Decimal64);
+
+            auto left = hyperrectangle.left.get<DecimalField<Decimal64>>();
+            auto right = hyperrectangle.right.get<DecimalField<Decimal64>>();
+
+            assert(left.getScale() == right.getScale());
+
+            return { left.getValue() / left.getScaleMultiplier(), right.getValue() / right.getScaleMultiplier() };
+        }
+        else
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Part minmax index by time is neither DateTime or DateTime64");
+    }
     else
-        return 0;
+        return {};
 }
 
 
-time_t IMergeTreeDataPart::getMaxTime() const
-{
-    if (storage.minmax_idx_time_column_pos != -1 && minmax_idx.initialized)
-        return minmax_idx.hyperrectangle[storage.minmax_idx_time_column_pos].right.get<UInt64>();
-    else
-        return 0;
-}
-
 void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns)
 {
     columns = new_columns;
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 83f8c672001..92b05e5cbd2 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -155,13 +155,11 @@ public:
 
     bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); }
 
-    /// If the partition key includes date column (a common case), these functions will return min and max values for this column.
-    DayNum getMinDate() const;
-    DayNum getMaxDate() const;
+    /// If the partition key includes date column (a common case), this function will return min and max values for that column.
+    std::pair<DayNum, DayNum> getMinMaxDate() const;
 
-    /// otherwise, if the partition key includes dateTime column (also a common case), these functions will return min and max values for this column.
-    time_t getMinTime() const;
-    time_t getMaxTime() const;
+    /// otherwise, if the partition key includes dateTime column (also a common case), this function will return min and max values for that column.
+    std::pair<time_t, time_t> getMinMaxTime() const;
 
     bool isEmpty() const { return rows_count == 0; }
 
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 8f5dec8077d..6833d2e2fd4 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -444,7 +444,8 @@ bool KeyCondition::addCondition(const String & column, const Range & range)
   */
 bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type)
 {
-    String column_name = expr->getColumnNameWithoutAlias();
+    // Constant expr should use alias names if any
+    String column_name = expr->getColumnName();
 
     if (const auto * lit = expr->as<ASTLiteral>())
     {
@@ -607,7 +608,8 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
     if (strict)
         return false;
 
-    String expr_name = node->getColumnNameWithoutAlias();
+    // Constant expr should use alias names if any
+    String expr_name = node->getColumnName();
     const auto & sample_block = key_expr->getSampleBlock();
     if (!sample_block.has(expr_name))
         return false;
@@ -675,7 +677,8 @@ bool KeyCondition::canConstantBeWrappedByFunctions(
     if (strict)
         return false;
 
-    String expr_name = ast->getColumnNameWithoutAlias();
+    // Constant expr should use alias names if any
+    String expr_name = ast->getColumnName();
     const auto & sample_block = key_expr->getSampleBlock();
     if (!sample_block.has(expr_name))
         return false;
@@ -1011,6 +1014,8 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
       * Therefore, use the full name of the expression for search.
       */
     const auto & sample_block = key_expr->getSampleBlock();
+
+    // Key columns should use canonical names for index analysis
     String name = node->getColumnNameWithoutAlias();
 
     auto it = key_columns.find(name);
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 654262542b4..09b7dcd3a78 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -469,15 +469,19 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa
     DataTypes minmax_idx_columns_types = getMinMaxColumnsTypes(new_partition_key);
 
     /// Try to find the date column in columns used by the partition key (a common case).
-    bool encountered_date_column = false;
+    /// If there are no - DateTime or DateTime64 would also suffice.
+
+    bool has_date_column = false;
+    bool has_datetime_column = false;
+
     for (size_t i = 0; i < minmax_idx_columns_types.size(); ++i)
     {
-        if (typeid_cast<const DataTypeDate *>(minmax_idx_columns_types[i].get()))
+        if (isDate(minmax_idx_columns_types[i]))
         {
-            if (!encountered_date_column)
+            if (!has_date_column)
             {
                 minmax_idx_date_column_pos = i;
-                encountered_date_column = true;
+                has_date_column = true;
             }
             else
             {
@@ -486,16 +490,18 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa
             }
         }
     }
-    if (!encountered_date_column)
+    if (!has_date_column)
     {
         for (size_t i = 0; i < minmax_idx_columns_types.size(); ++i)
         {
-            if (typeid_cast<const DataTypeDateTime *>(minmax_idx_columns_types[i].get()))
+            if (isDateTime(minmax_idx_columns_types[i])
+                || isDateTime64(minmax_idx_columns_types[i])
+            )
             {
-                if (!encountered_date_column)
+                if (!has_datetime_column)
                 {
                     minmax_idx_time_column_pos = i;
-                    encountered_date_column = true;
+                    has_datetime_column = true;
                 }
                 else
                 {
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 747819c77eb..96a3dba12f7 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -551,11 +551,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         .checksum_on_read = settings.checksum_on_read,
     };
 
-    /// PREWHERE
-    String prewhere_column;
-    if (select.prewhere())
-        prewhere_column = select.prewhere()->getColumnName();
-
     struct DataSkippingIndexAndCondition
     {
         MergeTreeIndexPtr index;
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 34cac56d74c..692d2ac4b94 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -37,6 +37,8 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
     : table_columns{ext::map<std::unordered_set>(
         metadata_snapshot->getColumns().getAllPhysical(), [](const NameAndTypePair & col) { return col.name; })}
     , queried_columns{queried_columns_}
+    , sorting_key_names{NameSet(
+          metadata_snapshot->getSortingKey().column_names.begin(), metadata_snapshot->getSortingKey().column_names.end())}
     , block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)}
     , log{log_}
     , column_sizes{std::move(column_sizes_)}
@@ -114,12 +116,12 @@ static bool isConditionGood(const ASTPtr & condition)
 }
 
 
-void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node) const
+void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node, bool is_final) const
 {
     if (const auto * func_and = node->as<ASTFunction>(); func_and && func_and->name == "and")
     {
         for (const auto & elem : func_and->arguments->children)
-            analyzeImpl(res, elem);
+            analyzeImpl(res, elem, is_final);
     }
     else
     {
@@ -133,7 +135,7 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node)
         cond.viable =
             /// Condition depend on some column. Constant expressions are not moved.
             !cond.identifiers.empty()
-            && !cannotBeMoved(node)
+            && !cannotBeMoved(node, is_final)
             /// Do not take into consideration the conditions consisting only of the first primary key column
             && !hasPrimaryKeyAtoms(node)
             /// Only table columns are considered. Not array joined columns. NOTE We're assuming that aliases was expanded.
@@ -149,10 +151,10 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node)
 }
 
 /// Transform conjunctions chain in WHERE expression to Conditions list.
-MergeTreeWhereOptimizer::Conditions MergeTreeWhereOptimizer::analyze(const ASTPtr & expression) const
+MergeTreeWhereOptimizer::Conditions MergeTreeWhereOptimizer::analyze(const ASTPtr & expression, bool is_final) const
 {
     Conditions res;
-    analyzeImpl(res, expression);
+    analyzeImpl(res, expression, is_final);
     return res;
 }
 
@@ -183,7 +185,7 @@ void MergeTreeWhereOptimizer::optimize(ASTSelectQuery & select) const
     if (!select.where() || select.prewhere())
         return;
 
-    Conditions where_conditions = analyze(select.where());
+    Conditions where_conditions = analyze(select.where(), select.final());
     Conditions prewhere_conditions;
 
     UInt64 total_size_of_moved_conditions = 0;
@@ -300,6 +302,12 @@ bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const ASTPtr & ast) const
 }
 
 
+bool MergeTreeWhereOptimizer::isSortingKey(const String & column_name) const
+{
+    return sorting_key_names.count(column_name);
+}
+
+
 bool MergeTreeWhereOptimizer::isConstant(const ASTPtr & expr) const
 {
     const auto column_name = expr->getColumnName();
@@ -319,7 +327,7 @@ bool MergeTreeWhereOptimizer::isSubsetOfTableColumns(const NameSet & identifiers
 }
 
 
-bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr) const
+bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr, bool is_final) const
 {
     if (const auto * function_ptr = ptr->as<ASTFunction>())
     {
@@ -336,12 +344,13 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr) const
     {
         /// disallow moving result of ARRAY JOIN to PREWHERE
         if (array_joined_names.count(*opt_name) ||
-            array_joined_names.count(Nested::extractTableName(*opt_name)))
+            array_joined_names.count(Nested::extractTableName(*opt_name)) ||
+            (is_final && !isSortingKey(*opt_name)))
             return true;
     }
 
     for (const auto & child : ptr->children)
-        if (cannotBeMoved(child))
+        if (cannotBeMoved(child, is_final))
             return true;
 
     return false;
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index cad77fb9eed..8fd973e9ba3 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -67,10 +67,10 @@ private:
 
     using Conditions = std::list<Condition>;
 
-    void analyzeImpl(Conditions & res, const ASTPtr & node) const;
+    void analyzeImpl(Conditions & res, const ASTPtr & node, bool is_final) const;
 
     /// Transform conjunctions chain in WHERE expression to Conditions list.
-    Conditions analyze(const ASTPtr & expression) const;
+    Conditions analyze(const ASTPtr & expression, bool is_final) const;
 
     /// Transform Conditions list to WHERE or PREWHERE expression.
     static ASTPtr reconstruct(const Conditions & conditions);
@@ -85,6 +85,8 @@ private:
 
     bool isPrimaryKeyAtom(const ASTPtr & ast) const;
 
+    bool isSortingKey(const String & column_name) const;
+
     bool isConstant(const ASTPtr & expr) const;
 
     bool isSubsetOfTableColumns(const NameSet & identifiers) const;
@@ -95,7 +97,7 @@ private:
       *
       * Also, disallow moving expressions with GLOBAL [NOT] IN.
       */
-    bool cannotBeMoved(const ASTPtr & ptr) const;
+    bool cannotBeMoved(const ASTPtr & ptr, bool is_final) const;
 
     void determineArrayJoinedNames(ASTSelectQuery & select);
 
@@ -104,6 +106,7 @@ private:
     String first_primary_key_column;
     const StringSet table_columns;
     const Names queried_columns;
+    const NameSet sorting_key_names;
     const Block block_with_constants;
     Poco::Logger * log;
     std::unordered_map<std::string, UInt64> column_sizes;
diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp
index 85f3bea9e0c..7aaec9b7e76 100644
--- a/src/Storages/StorageFactory.cpp
+++ b/src/Storages/StorageFactory.cpp
@@ -179,6 +179,7 @@ StoragePtr StorageFactory::get(
         .attach = query.attach,
         .has_force_restore_data_flag = has_force_restore_data_flag
     };
+    assert(&arguments.context == &arguments.context.getGlobalContext());
 
     auto res = storages.at(name).creator_fn(arguments);
     if (!empty_engine_args.empty())
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 9b93d7183fd..0849f65477d 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -4932,7 +4932,7 @@ bool StorageReplicatedMergeTree::waitForTableReplicaToProcessLogEntry(
 
     const auto & stop_waiting = [&]()
     {
-        bool stop_waiting_itself = waiting_itself && is_dropped;
+        bool stop_waiting_itself = waiting_itself && (partial_shutdown_called || is_dropped);
         bool stop_waiting_non_active = !wait_for_non_active && !getZooKeeper()->exists(table_zookeeper_path + "/replicas/" + replica + "/is_active");
         return stop_waiting_itself || stop_waiting_non_active;
     };
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index ca984f9ece9..2d3879340dc 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -33,7 +33,7 @@ namespace ErrorCodes
 
 IStorageURLBase::IStorageURLBase(
     const Poco::URI & uri_,
-    const Context & context_,
+    const Context & /*context_*/,
     const StorageID & table_id_,
     const String & format_name_,
     const std::optional<FormatSettings> & format_settings_,
@@ -42,13 +42,10 @@ IStorageURLBase::IStorageURLBase(
     const String & compression_method_)
     : IStorage(table_id_)
     , uri(uri_)
-    , context_global(context_)
     , compression_method(compression_method_)
     , format_name(format_name_)
     , format_settings(format_settings_)
 {
-    context_global.getRemoteHostFilter().checkURL(uri);
-
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_);
     storage_metadata.setConstraints(constraints_);
@@ -237,14 +234,28 @@ Pipe IStorageURLBase::read(
         chooseCompressionMethod(request_uri.getPath(), compression_method)));
 }
 
-BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/)
+BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & context)
 {
     return std::make_shared<StorageURLBlockOutputStream>(uri, format_name,
-        format_settings, metadata_snapshot->getSampleBlock(), context_global,
-        ConnectionTimeouts::getHTTPTimeouts(context_global),
+        format_settings, metadata_snapshot->getSampleBlock(), context,
+        ConnectionTimeouts::getHTTPTimeouts(context),
         chooseCompressionMethod(uri.toString(), compression_method));
 }
 
+StorageURL::StorageURL(const Poco::URI & uri_,
+           const StorageID & table_id_,
+           const String & format_name_,
+           const std::optional<FormatSettings> & format_settings_,
+           const ColumnsDescription & columns_,
+           const ConstraintsDescription & constraints_,
+           Context & context_,
+           const String & compression_method_)
+    : IStorageURLBase(uri_, context_, table_id_, format_name_,
+                      format_settings_, columns_, constraints_, compression_method_)
+{
+    context_.getRemoteHostFilter().checkURL(uri);
+}
+
 void registerStorageURL(StorageFactory & factory)
 {
     factory.registerStorage("URL", [](const StorageFactory::Arguments & args)
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index 21b2e3e27a1..2b2384b1043 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -45,7 +45,6 @@ protected:
         const String & compression_method_);
 
     Poco::URI uri;
-    const Context & context_global;
     String compression_method;
     String format_name;
     // For URL engine, we use format settings from server context + `SETTINGS`
@@ -114,11 +113,7 @@ public:
             const ColumnsDescription & columns_,
             const ConstraintsDescription & constraints_,
             Context & context_,
-            const String & compression_method_)
-        : IStorageURLBase(uri_, context_, table_id_, format_name_,
-            format_settings_, columns_, constraints_, compression_method_)
-    {
-    }
+            const String & compression_method_);
 
     String getName() const override
     {
diff --git a/src/Storages/System/StorageSystemErrors.cpp b/src/Storages/System/StorageSystemErrors.cpp
index 5243cb11aa3..09d0aaddb3d 100644
--- a/src/Storages/System/StorageSystemErrors.cpp
+++ b/src/Storages/System/StorageSystemErrors.cpp
@@ -1,6 +1,7 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeArray.h>
 #include <Storages/System/StorageSystemErrors.h>
 #include <Common/ErrorCodes.h>
 #include <Interpreters/Context.h>
@@ -16,7 +17,7 @@ NamesAndTypesList StorageSystemErrors::getNamesAndTypes()
         { "value",                   std::make_shared<DataTypeUInt64>() },
         { "last_error_time",         std::make_shared<DataTypeDateTime>() },
         { "last_error_message",      std::make_shared<DataTypeString>() },
-        { "last_error_stacktrace",   std::make_shared<DataTypeString>() },
+        { "last_error_trace",        std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()) },
         { "remote",                  std::make_shared<DataTypeUInt8>() },
     };
 }
@@ -34,7 +35,14 @@ void StorageSystemErrors::fillData(MutableColumns & res_columns, const Context &
             res_columns[col_num++]->insert(error.count);
             res_columns[col_num++]->insert(error.error_time_ms / 1000);
             res_columns[col_num++]->insert(error.message);
-            res_columns[col_num++]->insert(error.stacktrace);
+            {
+                Array trace_array;
+                trace_array.reserve(error.trace.size());
+                for (size_t i = 0; i < error.trace.size(); ++i)
+                    trace_array.emplace_back(reinterpret_cast<intptr_t>(error.trace[i]));
+
+                res_columns[col_num++]->insert(trace_array);
+            }
             res_columns[col_num++]->insert(remote);
         }
     };
diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index eece092206d..6a643dbe1b9 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -137,14 +137,17 @@ void StorageSystemParts::processNextStorage(
         if (columns_mask[src_index++])
             columns[res_index++]->insert(static_cast<UInt64>(part.use_count() - 1));
 
+        auto min_max_date = part->getMinMaxDate();
+        auto min_max_time = part->getMinMaxTime();
+
         if (columns_mask[src_index++])
-            columns[res_index++]->insert(part->getMinDate());
+            columns[res_index++]->insert(min_max_date.first);
         if (columns_mask[src_index++])
-            columns[res_index++]->insert(part->getMaxDate());
+            columns[res_index++]->insert(min_max_date.second);
         if (columns_mask[src_index++])
-            columns[res_index++]->insert(static_cast<UInt32>(part->getMinTime()));
+            columns[res_index++]->insert(static_cast<UInt32>(min_max_time.first));
         if (columns_mask[src_index++])
-            columns[res_index++]->insert(static_cast<UInt32>(part->getMaxTime()));
+            columns[res_index++]->insert(static_cast<UInt32>(min_max_time.second));
         if (columns_mask[src_index++])
             columns[res_index++]->insert(part->info.partition_id);
         if (columns_mask[src_index++])
diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp
index 8754e424281..703de70d17f 100644
--- a/src/Storages/System/StorageSystemPartsColumns.cpp
+++ b/src/Storages/System/StorageSystemPartsColumns.cpp
@@ -32,6 +32,8 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_
         {"refcount",                                   std::make_shared<DataTypeUInt32>()},
         {"min_date",                                   std::make_shared<DataTypeDate>()},
         {"max_date",                                   std::make_shared<DataTypeDate>()},
+        {"min_time",                                   std::make_shared<DataTypeDateTime>()},
+        {"max_time",                                   std::make_shared<DataTypeDateTime>()},
         {"partition_id",                               std::make_shared<DataTypeString>()},
         {"min_block_number",                           std::make_shared<DataTypeInt64>()},
         {"max_block_number",                           std::make_shared<DataTypeInt64>()},
@@ -95,8 +97,10 @@ void StorageSystemPartsColumns::processNextStorage(
 
         /// For convenience, in returned refcount, don't add references that was due to local variables in this method: all_parts, active_parts.
         auto use_count = part.use_count() - 1;
-        auto min_date = part->getMinDate();
-        auto max_date = part->getMaxDate();
+
+        auto min_max_date = part->getMinMaxDate();
+        auto min_max_time = part->getMinMaxTime();
+
         auto index_size_in_bytes = part->getIndexSizeInBytes();
         auto index_size_in_allocated_bytes = part->getIndexSizeInAllocatedBytes();
 
@@ -141,9 +145,14 @@ void StorageSystemPartsColumns::processNextStorage(
                 columns[res_index++]->insert(UInt64(use_count));
 
             if (columns_mask[src_index++])
-                columns[res_index++]->insert(min_date);
+                columns[res_index++]->insert(min_max_date.first);
             if (columns_mask[src_index++])
-                columns[res_index++]->insert(max_date);
+                columns[res_index++]->insert(min_max_date.second);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(static_cast<UInt32>(min_max_time.first));
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(static_cast<UInt32>(min_max_time.second));
+
             if (columns_mask[src_index++])
                 columns[res_index++]->insert(part->info.partition_id);
             if (columns_mask[src_index++])
diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp
index 804a5b232ec..b637838c6da 100644
--- a/src/TableFunctions/ITableFunction.cpp
+++ b/src/TableFunctions/ITableFunction.cpp
@@ -20,12 +20,20 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, const Context &
     ProfileEvents::increment(ProfileEvents::TableFunctionExecute);
     context.checkAccess(AccessType::CREATE_TEMPORARY_TABLE | StorageFactory::instance().getSourceAccessType(getStorageTypeName()));
 
-    if (cached_columns.empty() || (hasStaticStructure() && cached_columns == getActualTableStructure(context)))
+    if (cached_columns.empty())
         return executeImpl(ast_function, context, table_name, std::move(cached_columns));
 
-    auto get_storage = [=, tf = shared_from_this()]() -> StoragePtr
+    /// We have table structure, so it's CREATE AS table_function().
+    /// We should use global context here because there will be no query context on server startup
+    /// and because storage lifetime is bigger than query context lifetime.
+    const Context & global_context = context.getGlobalContext();
+    if (hasStaticStructure() && cached_columns == getActualTableStructure(context))
+        return executeImpl(ast_function, global_context, table_name, std::move(cached_columns));
+
+    auto this_table_function = shared_from_this();
+    auto get_storage = [=, &global_context]() -> StoragePtr
     {
-        return tf->executeImpl(ast_function, context, table_name, cached_columns);
+        return this_table_function->executeImpl(ast_function, global_context, table_name, cached_columns);
     };
 
     /// It will request actual table structure and create underlying storage lazily
diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp
index e04a86b5abf..21c78d199db 100644
--- a/src/TableFunctions/ITableFunctionXDBC.cpp
+++ b/src/TableFunctions/ITableFunctionXDBC.cpp
@@ -55,15 +55,21 @@ void ITableFunctionXDBC::parseArguments(const ASTPtr & ast_function, const Conte
         connection_string = args[0]->as<ASTLiteral &>().value.safeGet<String>();
         remote_table_name = args[1]->as<ASTLiteral &>().value.safeGet<String>();
     }
+}
 
-    /// Have to const_cast, because bridges store their commands inside context
-    helper = createBridgeHelper(const_cast<Context &>(context), context.getSettingsRef().http_receive_timeout.value, connection_string);
-    helper->startBridgeSync();
+void ITableFunctionXDBC::startBridgeIfNot(const Context & context) const
+{
+    if (!helper)
+    {
+        /// Have to const_cast, because bridges store their commands inside context
+        helper = createBridgeHelper(const_cast<Context &>(context), context.getSettingsRef().http_receive_timeout.value, connection_string);
+        helper->startBridgeSync();
+    }
 }
 
 ColumnsDescription ITableFunctionXDBC::getActualTableStructure(const Context & context) const
 {
-    assert(helper);
+    startBridgeIfNot(context);
 
     /* Infer external table structure */
     Poco::URI columns_info_uri = helper->getColumnsInfoURI();
@@ -87,7 +93,7 @@ ColumnsDescription ITableFunctionXDBC::getActualTableStructure(const Context & c
 
 StoragePtr ITableFunctionXDBC::executeImpl(const ASTPtr & /*ast_function*/, const Context & context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
 {
-    assert(helper);
+    startBridgeIfNot(context);
     auto columns = getActualTableStructure(context);
     auto result = std::make_shared<StorageXDBC>(StorageID(getDatabaseName(), table_name), schema_name, remote_table_name, columns, context, helper);
     result->startup();
diff --git a/src/TableFunctions/ITableFunctionXDBC.h b/src/TableFunctions/ITableFunctionXDBC.h
index fb0a0fd1185..f3ff64c2f2d 100644
--- a/src/TableFunctions/ITableFunctionXDBC.h
+++ b/src/TableFunctions/ITableFunctionXDBC.h
@@ -29,10 +29,12 @@ private:
 
     void parseArguments(const ASTPtr & ast_function, const Context & context) override;
 
+    void startBridgeIfNot(const Context & context) const;
+
     String connection_string;
     String schema_name;
     String remote_table_name;
-    BridgeHelperPtr helper;
+    mutable BridgeHelperPtr helper;
 };
 
 class TableFunctionJDBC : public ITableFunctionXDBC
diff --git a/tests/integration/helpers/dictionary.py b/tests/integration/helpers/dictionary.py
index b3f7a729777..41d87180c8a 100644
--- a/tests/integration/helpers/dictionary.py
+++ b/tests/integration/helpers/dictionary.py
@@ -7,12 +7,12 @@ class Layout(object):
         'flat': '<flat/>',
         'hashed': '<hashed/>',
         'cache': '<cache><size_in_cells>128</size_in_cells></cache>',
-        'ssd_cache': '<ssd_cache><path>/etc/clickhouse/dictionaries/all</path><max_stored_keys>128</max_stored_keys></ssd_cache>',
+        'ssd_cache': '<ssd_cache><path>/etc/clickhouse/dictionaries/all</path></ssd_cache>',
         'complex_key_hashed': '<complex_key_hashed/>',
         'complex_key_hashed_one_key': '<complex_key_hashed/>',
         'complex_key_hashed_two_keys': '<complex_key_hashed/>',
         'complex_key_cache': '<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>',
-        'complex_key_ssd_cache': '<complex_key_ssd_cache><path>/etc/clickhouse/dictionaries/all</path><max_stored_keys>128</max_stored_keys></complex_key_ssd_cache>',
+        'complex_key_ssd_cache': '<complex_key_ssd_cache><path>/etc/clickhouse/dictionaries/all</path></complex_key_ssd_cache>',
         'range_hashed': '<range_hashed/>',
         'direct': '<direct/>',
         'complex_key_direct': '<complex_key_direct/>'
diff --git a/tests/integration/test_dictionaries_complex_key_cache_string/configs/dictionaries/ssd_complex_key_cache_string.xml b/tests/integration/test_dictionaries_complex_key_cache_string/configs/dictionaries/ssd_complex_key_cache_string.xml
index 85f811d2d85..c8fdbcbe0ef 100644
--- a/tests/integration/test_dictionaries_complex_key_cache_string/configs/dictionaries/ssd_complex_key_cache_string.xml
+++ b/tests/integration/test_dictionaries_complex_key_cache_string/configs/dictionaries/ssd_complex_key_cache_string.xml
@@ -42,7 +42,6 @@
                 <read_buffer_size>131072</read_buffer_size>
                 <write_buffer_size>1048576</write_buffer_size>
 		        <path>/etc/clickhouse/dictionaries/radars</path>
-                <max_stored_keys>1048576</max_stored_keys>
             </complex_key_ssd_cache>
         </layout>
         <lifetime>1</lifetime>
diff --git a/tests/integration/test_dictionaries_postgresql/test.py b/tests/integration/test_dictionaries_postgresql/test.py
index 5ceb6496b90..10d9f4213e1 100644
--- a/tests/integration/test_dictionaries_postgresql/test.py
+++ b/tests/integration/test_dictionaries_postgresql/test.py
@@ -80,7 +80,7 @@ def test_load_dictionaries(started_cluster):
     create_dict(table_name)
     dict_name = 'dict0'
 
-    node1.query("SYSTEM RELOAD DICTIONARIES")
+    node1.query("SYSTEM RELOAD DICTIONARY {}".format(dict_name))
     assert node1.query("SELECT count() FROM `test`.`dict_table_{}`".format(table_name)).rstrip() == '10000'
     assert node1.query("SELECT dictGetUInt32('{}', 'id', toUInt64(0))".format(dict_name)) == '0\n'
     assert node1.query("SELECT dictGetUInt32('{}', 'value', toUInt64(9999))".format(dict_name)) == '9999\n'
diff --git a/tests/integration/test_dictionaries_update_and_reload/test.py b/tests/integration/test_dictionaries_update_and_reload/test.py
index 5c8abcda38e..533a29dc245 100644
--- a/tests/integration/test_dictionaries_update_and_reload/test.py
+++ b/tests/integration/test_dictionaries_update_and_reload/test.py
@@ -141,7 +141,8 @@ def test_reload_after_loading(started_cluster):
     time.sleep(1)  # see the comment above
     replace_in_file_in_container('/etc/clickhouse-server/config.d/executable.xml', '81', '82')
     replace_in_file_in_container('/etc/clickhouse-server/config.d/file.txt', '101', '102')
-    query("SYSTEM RELOAD DICTIONARIES")
+    query("SYSTEM RELOAD DICTIONARY 'file'")
+    query("SYSTEM RELOAD DICTIONARY 'executable'")
     assert query("SELECT dictGetInt32('executable', 'a', toUInt64(7))") == "82\n"
     assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "102\n"
 
diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py
index b1daf2271d0..1a0e5a3dd91 100644
--- a/tests/integration/test_distributed_inter_server_secret/test.py
+++ b/tests/integration/test_distributed_inter_server_secret/test.py
@@ -97,12 +97,14 @@ def test_insecure():
     n1.query('SELECT * FROM dist_insecure')
 
 def test_insecure_insert_async():
+    n1.query("TRUNCATE TABLE data")
     n1.query('INSERT INTO dist_insecure SELECT * FROM numbers(2)')
     n1.query('SYSTEM FLUSH DISTRIBUTED ON CLUSTER insecure dist_insecure')
     assert int(n1.query('SELECT count() FROM dist_insecure')) == 2
     n1.query('TRUNCATE TABLE data ON CLUSTER insecure')
 
 def test_insecure_insert_sync():
+    n1.query("TRUNCATE TABLE data")
     n1.query('INSERT INTO dist_insecure SELECT * FROM numbers(2)', settings={'insert_distributed_sync': 1})
     assert int(n1.query('SELECT count() FROM dist_insecure')) == 2
     n1.query('TRUNCATE TABLE data ON CLUSTER secure')
@@ -111,12 +113,14 @@ def test_secure():
     n1.query('SELECT * FROM dist_secure')
 
 def test_secure_insert_async():
+    n1.query("TRUNCATE TABLE data")
     n1.query('INSERT INTO dist_secure SELECT * FROM numbers(2)')
     n1.query('SYSTEM FLUSH DISTRIBUTED ON CLUSTER secure dist_secure')
     assert int(n1.query('SELECT count() FROM dist_secure')) == 2
     n1.query('TRUNCATE TABLE data ON CLUSTER secure')
 
 def test_secure_insert_sync():
+    n1.query("TRUNCATE TABLE data")
     n1.query('INSERT INTO dist_secure SELECT * FROM numbers(2)', settings={'insert_distributed_sync': 1})
     assert int(n1.query('SELECT count() FROM dist_secure')) == 2
     n1.query('TRUNCATE TABLE data ON CLUSTER secure')
@@ -126,6 +130,7 @@ def test_secure_insert_sync():
 # Buffer() flush happens with global context, that does not have user
 # And so Context::user/ClientInfo::current_user/ClientInfo::initial_user will be empty
 def test_secure_insert_buffer_async():
+    n1.query("TRUNCATE TABLE data")
     n1.query('INSERT INTO dist_secure_buffer SELECT * FROM numbers(2)')
     n1.query('SYSTEM FLUSH DISTRIBUTED ON CLUSTER secure dist_secure')
     # no Buffer flush happened
@@ -141,6 +146,7 @@ def test_secure_disagree():
         n1.query('SELECT * FROM dist_secure_disagree')
 
 def test_secure_disagree_insert():
+    n1.query("TRUNCATE TABLE data")
     n1.query('INSERT INTO dist_secure_disagree SELECT * FROM numbers(2)')
     with pytest.raises(QueryRuntimeException, match='.*Hash mismatch.*'):
         n1.query('SYSTEM FLUSH DISTRIBUTED ON CLUSTER secure_disagree dist_secure_disagree')
diff --git a/tests/integration/test_max_http_connections_for_replication/test.py b/tests/integration/test_max_http_connections_for_replication/test.py
index 2dc4e2a8810..634697c8668 100644
--- a/tests/integration/test_max_http_connections_for_replication/test.py
+++ b/tests/integration/test_max_http_connections_for_replication/test.py
@@ -43,6 +43,8 @@ def start_small_cluster():
 
 
 def test_single_endpoint_connections_count(start_small_cluster):
+    node1.query("TRUNCATE TABLE test_table")
+    node2.query("SYSTEM SYNC REPLICA test_table")
     def task(count):
         print(("Inserting ten times from {}".format(count)))
         for i in range(count, count + 10):
@@ -58,9 +60,11 @@ def test_single_endpoint_connections_count(start_small_cluster):
 
 
 def test_keepalive_timeout(start_small_cluster):
-    current_count = int(node1.query("select count() from test_table").strip())
+    node1.query("TRUNCATE TABLE test_table")
+    node2.query("SYSTEM SYNC REPLICA test_table")
+
     node1.query("insert into test_table values ('2017-06-16', 777, 0)")
-    assert_eq_with_retry(node2, "select count() from test_table", str(current_count + 1))
+    assert_eq_with_retry(node2, "select count() from test_table", str(1))
     # Server keepAliveTimeout is 3 seconds, default client session timeout is 8
     # lets sleep in that interval
     time.sleep(4)
@@ -69,7 +73,7 @@ def test_keepalive_timeout(start_small_cluster):
 
     time.sleep(3)
 
-    assert_eq_with_retry(node2, "select count() from test_table", str(current_count + 2))
+    assert_eq_with_retry(node2, "select count() from test_table", str(2))
 
     assert not node2.contains_in_log("No message received"), "Found 'No message received' in clickhouse-server.log"
 
diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py
index 6bb6a6ee777..2ef71927bdf 100644
--- a/tests/integration/test_odbc_interaction/test.py
+++ b/tests/integration/test_odbc_interaction/test.py
@@ -74,6 +74,9 @@ def started_cluster():
         node1.exec_in_container(
             ["bash", "-c", "echo 'CREATE TABLE t4(X INTEGER PRIMARY KEY ASC, Y, Z);' | sqlite3 {}".format(sqlite_db)],
             privileged=True, user='root')
+        node1.exec_in_container(
+            ["bash", "-c", "echo 'CREATE TABLE tf1(x INTEGER PRIMARY KEY ASC, y, z);' | sqlite3 {}".format(sqlite_db)],
+            privileged=True, user='root')
         print("sqlite tables created")
         mysql_conn = get_mysql_conn()
         print("mysql connection received")
@@ -177,6 +180,21 @@ def test_sqlite_simple_select_function_works(started_cluster):
     assert node1.query(
         "select count(), sum(x) from odbc('DSN={}', '{}') group by x".format(sqlite_setup["DSN"], 't1')) == "1\t1\n"
 
+def test_sqlite_table_function(started_cluster):
+    sqlite_setup = node1.odbc_drivers["SQLite3"]
+    sqlite_db = sqlite_setup["Database"]
+
+    node1.exec_in_container(["bash", "-c", "echo 'INSERT INTO tf1 values(1, 2, 3);' | sqlite3 {}".format(sqlite_db)],
+                            privileged=True, user='root')
+    node1.query("create table odbc_tf as odbc('DSN={}', '{}')".format(sqlite_setup["DSN"], 'tf1'))
+    assert node1.query("select * from odbc_tf") == "1\t2\t3\n"
+
+    assert node1.query("select y from odbc_tf") == "2\n"
+    assert node1.query("select z from odbc_tf") == "3\n"
+    assert node1.query("select x from odbc_tf") == "1\n"
+    assert node1.query("select x, y from odbc_tf") == "1\t2\n"
+    assert node1.query("select z, x, y from odbc_tf") == "3\t1\t2\n"
+    assert node1.query("select count(), sum(x) from odbc_tf group by x") == "1\t1\n"
 
 def test_sqlite_simple_select_storage_works(started_cluster):
     sqlite_setup = node1.odbc_drivers["SQLite3"]
@@ -342,6 +360,7 @@ def test_bridge_dies_with_parent(started_cluster):
 
     assert clickhouse_pid is None
     assert bridge_pid is None
+    node1.start_clickhouse(20)
 
 
 def test_odbc_postgres_date_data_type(started_cluster):
diff --git a/tests/integration/test_secure_socket/__init__.py b/tests/integration/test_secure_socket/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_secure_socket/configs_secure/config.d/remote_servers.xml b/tests/integration/test_secure_socket/configs_secure/config.d/remote_servers.xml
new file mode 100644
index 00000000000..0c109d6d768
--- /dev/null
+++ b/tests/integration/test_secure_socket/configs_secure/config.d/remote_servers.xml
@@ -0,0 +1,14 @@
+<yandex>
+    <tcp_port_secure>9440</tcp_port_secure>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <replica>
+                    <host>node2</host>
+                    <port>9440</port>
+                    <secure>1</secure>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</yandex>
diff --git a/tests/integration/test_secure_socket/configs_secure/config.d/ssl_conf.xml b/tests/integration/test_secure_socket/configs_secure/config.d/ssl_conf.xml
new file mode 100644
index 00000000000..fe39e3712b8
--- /dev/null
+++ b/tests/integration/test_secure_socket/configs_secure/config.d/ssl_conf.xml
@@ -0,0 +1,18 @@
+<yandex>
+    <openSSL>
+        <server>
+            <certificateFile>/etc/clickhouse-server/config.d/server.crt</certificateFile>
+            <privateKeyFile>/etc/clickhouse-server/config.d/server.key</privateKeyFile>
+            <dhParamsFile>/etc/clickhouse-server/config.d/dhparam.pem</dhParamsFile>
+            <verificationMode>none</verificationMode>
+            <cacheSessions>true</cacheSessions>
+        </server>
+        <client>
+            <cacheSessions>true</cacheSessions>
+            <verificationMode>none</verificationMode>
+            <invalidCertificateHandler>
+                <name>AcceptCertificateHandler</name>
+            </invalidCertificateHandler>
+        </client>
+    </openSSL>
+</yandex>
diff --git a/tests/integration/test_secure_socket/configs_secure/dhparam.pem b/tests/integration/test_secure_socket/configs_secure/dhparam.pem
new file mode 100644
index 00000000000..2e6cee0798d
--- /dev/null
+++ b/tests/integration/test_secure_socket/configs_secure/dhparam.pem
@@ -0,0 +1,8 @@
+-----BEGIN DH PARAMETERS-----
+MIIBCAKCAQEAua92DDli13gJ+//ZXyGaggjIuidqB0crXfhUlsrBk9BV1hH3i7fR
+XGP9rUdk2ubnB3k2ejBStL5oBrkHm9SzUFSQHqfDjLZjKoUpOEmuDc4cHvX1XTR5
+Pr1vf5cd0yEncJWG5W4zyUB8k++SUdL2qaeslSs+f491HBLDYn/h8zCgRbBvxhxb
+9qeho1xcbnWeqkN6Kc9bgGozA16P9NLuuLttNnOblkH+lMBf42BSne/TWt3AlGZf
+slKmmZcySUhF8aKfJnLKbkBCFqOtFRh8zBA9a7g+BT/lSANATCDPaAk1YVih2EKb
+dpc3briTDbRsiqg2JKMI7+VdULY9bh3EawIBAg==
+-----END DH PARAMETERS-----
diff --git a/tests/integration/test_secure_socket/configs_secure/server.crt b/tests/integration/test_secure_socket/configs_secure/server.crt
new file mode 100644
index 00000000000..7ade2d96273
--- /dev/null
+++ b/tests/integration/test_secure_socket/configs_secure/server.crt
@@ -0,0 +1,19 @@
+-----BEGIN CERTIFICATE-----
+MIIC/TCCAeWgAwIBAgIJANjx1QSR77HBMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV
+BAMMCWxvY2FsaG9zdDAgFw0xODA3MzAxODE2MDhaGA8yMjkyMDUxNDE4MTYwOFow
+FDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB
+CgKCAQEAs9uSo6lJG8o8pw0fbVGVu0tPOljSWcVSXH9uiJBwlZLQnhN4SFSFohfI
+4K8U1tBDTnxPLUo/V1K9yzoLiRDGMkwVj6+4+hE2udS2ePTQv5oaMeJ9wrs+5c9T
+4pOtlq3pLAdm04ZMB1nbrEysceVudHRkQbGHzHp6VG29Fw7Ga6YpqyHQihRmEkTU
+7UCYNA+Vk7aDPdMS/khweyTpXYZimaK9f0ECU3/VOeG3fH6Sp2X6FN4tUj/aFXEj
+sRmU5G2TlYiSIUMF2JPdhSihfk1hJVALrHPTU38SOL+GyyBRWdNcrIwVwbpvsvPg
+pryMSNxnpr0AK0dFhjwnupIv5hJIOQIDAQABo1AwTjAdBgNVHQ4EFgQUjPLb3uYC
+kcamyZHK4/EV8jAP0wQwHwYDVR0jBBgwFoAUjPLb3uYCkcamyZHK4/EV8jAP0wQw
+DAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAM/ocuDvfPus/KpMVD51j
+4IdlU8R0vmnYLQ+ygzOAo7+hUWP5j0yvq4ILWNmQX6HNvUggCgFv9bjwDFhb/5Vr
+85ieWfTd9+LTjrOzTw4avdGwpX9G+6jJJSSq15tw5ElOIFb/qNA9O4dBiu8vn03C
+L/zRSXrARhSqTW5w/tZkUcSTT+M5h28+Lgn9ysx4Ff5vi44LJ1NnrbJbEAIYsAAD
++UA+4MBFKx1r6hHINULev8+lCfkpwIaeS8RL+op4fr6kQPxnULw8wT8gkuc8I4+L
+P9gg/xDHB44T3ADGZ5Ib6O0DJaNiToO6rnoaaxs0KkotbvDWvRoxEytSbXKoYjYp
+0g==
+-----END CERTIFICATE-----
diff --git a/tests/integration/test_secure_socket/configs_secure/server.key b/tests/integration/test_secure_socket/configs_secure/server.key
new file mode 100644
index 00000000000..f0fb61ac443
--- /dev/null
+++ b/tests/integration/test_secure_socket/configs_secure/server.key
@@ -0,0 +1,28 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCz25KjqUkbyjyn
+DR9tUZW7S086WNJZxVJcf26IkHCVktCeE3hIVIWiF8jgrxTW0ENOfE8tSj9XUr3L
+OguJEMYyTBWPr7j6ETa51LZ49NC/mhox4n3Cuz7lz1Pik62WreksB2bThkwHWdus
+TKxx5W50dGRBsYfMenpUbb0XDsZrpimrIdCKFGYSRNTtQJg0D5WTtoM90xL+SHB7
+JOldhmKZor1/QQJTf9U54bd8fpKnZfoU3i1SP9oVcSOxGZTkbZOViJIhQwXYk92F
+KKF+TWElUAusc9NTfxI4v4bLIFFZ01ysjBXBum+y8+CmvIxI3GemvQArR0WGPCe6
+ki/mEkg5AgMBAAECggEATrbIBIxwDJOD2/BoUqWkDCY3dGevF8697vFuZKIiQ7PP
+TX9j4vPq0DfsmDjHvAPFkTHiTQXzlroFik3LAp+uvhCCVzImmHq0IrwvZ9xtB43f
+7Pkc5P6h1l3Ybo8HJ6zRIY3TuLtLxuPSuiOMTQSGRL0zq3SQ5DKuGwkz+kVjHXUN
+MR2TECFwMHKQ5VLrC+7PMpsJYyOMlDAWhRfUalxC55xOXTpaN8TxNnwQ8K2ISVY5
+212Jz/a4hn4LdwxSz3Tiu95PN072K87HLWx3EdT6vW4Ge5P/A3y+smIuNAlanMnu
+plHBRtpATLiTxZt/n6npyrfQVbYjSH7KWhB8hBHtaQKBgQDh9Cq1c/KtqDtE0Ccr
+/r9tZNTUwBE6VP+3OJeKdEdtsfuxjOCkS1oAjgBJiSDOiWPh1DdoDeVZjPKq6pIu
+Mq12OE3Doa8znfCXGbkSzEKOb2unKZMJxzrz99kXt40W5DtrqKPNb24CNqTiY8Aa
+CjtcX+3weat82VRXvph6U8ltMwKBgQDLxjiQQzNoY7qvg7CwJCjf9qq8jmLK766g
+1FHXopqS+dTxDLM8eJSRrpmxGWJvNeNc1uPhsKsKgotqAMdBUQTf7rSTbt4MyoH5
+bUcRLtr+0QTK9hDWMOOvleqNXha68vATkohWYfCueNsC60qD44o8RZAS6UNy3ENq
+cM1cxqe84wKBgQDKkHutWnooJtajlTxY27O/nZKT/HA1bDgniMuKaz4R4Gr1PIez
+on3YW3V0d0P7BP6PWRIm7bY79vkiMtLEKdiKUGWeyZdo3eHvhDb/3DCawtau8L2K
+GZsHVp2//mS1Lfz7Qh8/L/NedqCQ+L4iWiPnZ3THjjwn3CoZ05ucpvrAMwKBgB54
+nay039MUVq44Owub3KDg+dcIU62U+cAC/9oG7qZbxYPmKkc4oL7IJSNecGHA5SbU
+2268RFdl/gLz6tfRjbEOuOHzCjFPdvAdbysanpTMHLNc6FefJ+zxtgk9sJh0C4Jh
+vxFrw9nTKKzfEl12gQ1SOaEaUIO0fEBGbe8ZpauRAoGAMAlGV+2/K4ebvAJKOVTa
+dKAzQ+TD2SJmeR1HZmKDYddNqwtZlzg3v4ZhCk4eaUmGeC1Bdh8MDuB3QQvXz4Dr
+vOIP4UVaOr+uM+7TgAgVnP4/K6IeJGzUDhX93pmpWhODfdu/oojEKVcpCojmEmS1
+KCBtmIrQLqzMpnBpLNuSY+Q=
+-----END PRIVATE KEY-----
diff --git a/tests/integration/test_secure_socket/configs_secure/users.d/users.xml b/tests/integration/test_secure_socket/configs_secure/users.d/users.xml
new file mode 100644
index 00000000000..479017f6370
--- /dev/null
+++ b/tests/integration/test_secure_socket/configs_secure/users.d/users.xml
@@ -0,0 +1,6 @@
+<yandex>
+    <profiles>
+        <default>
+        </default>
+    </profiles>
+</yandex>
diff --git a/tests/integration/test_secure_socket/test.py b/tests/integration/test_secure_socket/test.py
new file mode 100644
index 00000000000..c2bad80bca0
--- /dev/null
+++ b/tests/integration/test_secure_socket/test.py
@@ -0,0 +1,84 @@
+import os.path
+import time
+
+import pytest
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import TSV
+
+cluster = ClickHouseCluster(__file__)
+
+NODES = {'node' + str(i): None for i in (1, 2)}
+
+config = '''<yandex>
+    <profiles>
+        <default>
+            <sleep_in_send_data_ms>{sleep_in_send_data_ms}</sleep_in_send_data_ms>
+        </default>
+    </profiles>
+</yandex>'''
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    cluster.__with_ssl_config = True
+    main_configs = [
+        "configs_secure/config.d/remote_servers.xml",
+        "configs_secure/server.crt",
+        "configs_secure/server.key",
+        "configs_secure/dhparam.pem",
+        "configs_secure/config.d/ssl_conf.xml",
+    ]
+
+    NODES['node1'] =  cluster.add_instance('node1', main_configs=main_configs)
+    NODES['node2'] =  cluster.add_instance('node2', main_configs=main_configs, user_configs=["configs_secure/users.d/users.xml"])
+
+    try:
+        cluster.start()
+        NODES['node2'].query("CREATE TABLE base_table (x UInt64) ENGINE = MergeTree  ORDER BY x;")
+        NODES['node2'].query("INSERT INTO base_table VALUES (5);")
+        NODES['node1'].query("CREATE TABLE distributed_table (x UInt64) ENGINE = Distributed(test_cluster, default, base_table);")
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test(started_cluster):
+    NODES['node2'].replace_config('/etc/clickhouse-server/users.d/users.xml', config.format(sleep_in_send_data_ms=1000000))
+    
+    attempts = 0
+    while attempts < 1000:
+        setting = NODES['node2'].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_data_ms'")
+        if int(setting) == 1000000:
+            break
+        time.sleep(0.1)
+        attempts += 1
+
+    assert attempts < 1000
+
+
+    start = time.time()
+    NODES['node1'].query_and_get_error('SELECT * FROM distributed_table settings receive_timeout=5, use_hedged_requests=0, async_socket_for_remote=0;')
+    end = time.time()
+    assert end - start < 10
+
+    start = time.time()
+    error = NODES['node1'].query_and_get_error('SELECT * FROM distributed_table settings receive_timeout=5, use_hedged_requests=0;')
+    end = time.time()
+
+    assert end - start < 10
+
+    # Check that exception about timeout wasn't thrown from DB::ReadBufferFromPocoSocket::nextImpl().
+    assert error.find('DB::ReadBufferFromPocoSocket::nextImpl()') == -1
+
+    start = time.time()
+    error = NODES['node1'].query_and_get_error('SELECT * FROM distributed_table settings receive_timeout=5;')
+    end = time.time()
+
+    assert end - start < 10
+
+    # Check that exception about timeout wasn't thrown from DB::ReadBufferFromPocoSocket::nextImpl().
+    assert error.find('DB::ReadBufferFromPocoSocket::nextImpl()') == -1
+
+
diff --git a/tests/integration/test_system_clusters_actual_information/configs/users.xml b/tests/integration/test_system_clusters_actual_information/configs/users.xml
deleted file mode 100644
index 156cd3a6b59..00000000000
--- a/tests/integration/test_system_clusters_actual_information/configs/users.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0"?>
-<yandex>
-    <profiles>
-        <default>
-            <sleep_in_send_tables_status>5</sleep_in_send_tables_status>
-        </default>
-    </profiles>
-</yandex>
diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py
index 389e249790f..67614b88029 100644
--- a/tests/integration/test_ttl_replicated/test.py
+++ b/tests/integration/test_ttl_replicated/test.py
@@ -396,6 +396,10 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run):
     node_right.query("OPTIMIZE TABLE test_ttl_group_by FINAL")
     node_right.query("OPTIMIZE TABLE test_ttl_where FINAL")
 
+    node_left.query("SYSTEM SYNC REPLICA test_ttl_delete", timeout=20)
+    node_left.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20)
+    node_left.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20)
+
     assert node_left.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"
     assert node_right.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"
 
diff --git a/tests/jepsen.nukeeper/.gitignore b/tests/jepsen.nukeeper/.gitignore
new file mode 100644
index 00000000000..d956ab0a125
--- /dev/null
+++ b/tests/jepsen.nukeeper/.gitignore
@@ -0,0 +1,13 @@
+/target
+/classes
+/checkouts
+profiles.clj
+pom.xml
+pom.xml.asc
+*.jar
+*.class
+/.lein-*
+/.nrepl-port
+/.prepl-port
+.hgignore
+.hg/
diff --git a/tests/jepsen.nukeeper/LICENSE b/tests/jepsen.nukeeper/LICENSE
new file mode 100644
index 00000000000..231512650b9
--- /dev/null
+++ b/tests/jepsen.nukeeper/LICENSE
@@ -0,0 +1,280 @@
+Eclipse Public License - v 2.0
+
+    THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE
+    PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION
+    OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
+
+1. DEFINITIONS
+
+"Contribution" means:
+
+  a) in the case of the initial Contributor, the initial content
+     Distributed under this Agreement, and
+
+  b) in the case of each subsequent Contributor:
+     i) changes to the Program, and
+     ii) additions to the Program;
+  where such changes and/or additions to the Program originate from
+  and are Distributed by that particular Contributor. A Contribution
+  "originates" from a Contributor if it was added to the Program by
+  such Contributor itself or anyone acting on such Contributor's behalf.
+  Contributions do not include changes or additions to the Program that
+  are not Modified Works.
+
+"Contributor" means any person or entity that Distributes the Program.
+
+"Licensed Patents" mean patent claims licensable by a Contributor which
+are necessarily infringed by the use or sale of its Contribution alone
+or when combined with the Program.
+
+"Program" means the Contributions Distributed in accordance with this
+Agreement.
+
+"Recipient" means anyone who receives the Program under this Agreement
+or any Secondary License (as applicable), including Contributors.
+
+"Derivative Works" shall mean any work, whether in Source Code or other
+form, that is based on (or derived from) the Program and for which the
+editorial revisions, annotations, elaborations, or other modifications
+represent, as a whole, an original work of authorship.
+
+"Modified Works" shall mean any work in Source Code or other form that
+results from an addition to, deletion from, or modification of the
+contents of the Program, including, for purposes of clarity any new file
+in Source Code form that contains any contents of the Program. Modified
+Works shall not include works that contain only declarations,
+interfaces, types, classes, structures, or files of the Program solely
+in each case in order to link to, bind by name, or subclass the Program
+or Modified Works thereof.
+
+"Distribute" means the acts of a) distributing or b) making available
+in any manner that enables the transfer of a copy.
+
+"Source Code" means the form of a Program preferred for making
+modifications, including but not limited to software source code,
+documentation source, and configuration files.
+
+"Secondary License" means either the GNU General Public License,
+Version 2.0, or any later versions of that license, including any
+exceptions or additional permissions as identified by the initial
+Contributor.
+
+2. GRANT OF RIGHTS
+
+  a) Subject to the terms of this Agreement, each Contributor hereby
+  grants Recipient a non-exclusive, worldwide, royalty-free copyright
+  license to reproduce, prepare Derivative Works of, publicly display,
+  publicly perform, Distribute and sublicense the Contribution of such
+  Contributor, if any, and such Derivative Works.
+
+  b) Subject to the terms of this Agreement, each Contributor hereby
+  grants Recipient a non-exclusive, worldwide, royalty-free patent
+  license under Licensed Patents to make, use, sell, offer to sell,
+  import and otherwise transfer the Contribution of such Contributor,
+  if any, in Source Code or other form. This patent license shall
+  apply to the combination of the Contribution and the Program if, at
+  the time the Contribution is added by the Contributor, such addition
+  of the Contribution causes such combination to be covered by the
+  Licensed Patents. The patent license shall not apply to any other
+  combinations which include the Contribution. No hardware per se is
+  licensed hereunder.
+
+  c) Recipient understands that although each Contributor grants the
+  licenses to its Contributions set forth herein, no assurances are
+  provided by any Contributor that the Program does not infringe the
+  patent or other intellectual property rights of any other entity.
+  Each Contributor disclaims any liability to Recipient for claims
+  brought by any other entity based on infringement of intellectual
+  property rights or otherwise. As a condition to exercising the
+  rights and licenses granted hereunder, each Recipient hereby
+  assumes sole responsibility to secure any other intellectual
+  property rights needed, if any. For example, if a third party
+  patent license is required to allow Recipient to Distribute the
+  Program, it is Recipient's responsibility to acquire that license
+  before distributing the Program.
+
+  d) Each Contributor represents that to its knowledge it has
+  sufficient copyright rights in its Contribution, if any, to grant
+  the copyright license set forth in this Agreement.
+
+  e) Notwithstanding the terms of any Secondary License, no
+  Contributor makes additional grants to any Recipient (other than
+  those set forth in this Agreement) as a result of such Recipient's
+  receipt of the Program under the terms of a Secondary License
+  (if permitted under the terms of Section 3).
+
+3. REQUIREMENTS
+
+3.1 If a Contributor Distributes the Program in any form, then:
+
+  a) the Program must also be made available as Source Code, in
+  accordance with section 3.2, and the Contributor must accompany
+  the Program with a statement that the Source Code for the Program
+  is available under this Agreement, and informs Recipients how to
+  obtain it in a reasonable manner on or through a medium customarily
+  used for software exchange; and
+
+  b) the Contributor may Distribute the Program under a license
+  different than this Agreement, provided that such license:
+     i) effectively disclaims on behalf of all other Contributors all
+     warranties and conditions, express and implied, including
+     warranties or conditions of title and non-infringement, and
+     implied warranties or conditions of merchantability and fitness
+     for a particular purpose;
+
+     ii) effectively excludes on behalf of all other Contributors all
+     liability for damages, including direct, indirect, special,
+     incidental and consequential damages, such as lost profits;
+
+     iii) does not attempt to limit or alter the recipients' rights
+     in the Source Code under section 3.2; and
+
+     iv) requires any subsequent distribution of the Program by any
+     party to be under a license that satisfies the requirements
+     of this section 3.
+
+3.2 When the Program is Distributed as Source Code:
+
+  a) it must be made available under this Agreement, or if the
+  Program (i) is combined with other material in a separate file or
+  files made available under a Secondary License, and (ii) the initial
+  Contributor attached to the Source Code the notice described in
+  Exhibit A of this Agreement, then the Program may be made available
+  under the terms of such Secondary Licenses, and
+
+  b) a copy of this Agreement must be included with each copy of
+  the Program.
+
+3.3 Contributors may not remove or alter any copyright, patent,
+trademark, attribution notices, disclaimers of warranty, or limitations
+of liability ("notices") contained within the Program from any copy of
+the Program which they Distribute, provided that Contributors may add
+their own appropriate notices.
+
+4. COMMERCIAL DISTRIBUTION
+
+Commercial distributors of software may accept certain responsibilities
+with respect to end users, business partners and the like. While this
+license is intended to facilitate the commercial use of the Program,
+the Contributor who includes the Program in a commercial product
+offering should do so in a manner which does not create potential
+liability for other Contributors. Therefore, if a Contributor includes
+the Program in a commercial product offering, such Contributor
+("Commercial Contributor") hereby agrees to defend and indemnify every
+other Contributor ("Indemnified Contributor") against any losses,
+damages and costs (collectively "Losses") arising from claims, lawsuits
+and other legal actions brought by a third party against the Indemnified
+Contributor to the extent caused by the acts or omissions of such
+Commercial Contributor in connection with its distribution of the Program
+in a commercial product offering. The obligations in this section do not
+apply to any claims or Losses relating to any actual or alleged
+intellectual property infringement. In order to qualify, an Indemnified
+Contributor must: a) promptly notify the Commercial Contributor in
+writing of such claim, and b) allow the Commercial Contributor to control,
+and cooperate with the Commercial Contributor in, the defense and any
+related settlement negotiations. The Indemnified Contributor may
+participate in any such claim at its own expense.
+
+For example, a Contributor might include the Program in a commercial
+product offering, Product X. That Contributor is then a Commercial
+Contributor. If that Commercial Contributor then makes performance
+claims, or offers warranties related to Product X, those performance
+claims and warranties are such Commercial Contributor's responsibility
+alone. Under this section, the Commercial Contributor would have to
+defend claims against the other Contributors related to those performance
+claims and warranties, and if a court requires any other Contributor to
+pay any damages as a result, the Commercial Contributor must pay
+those damages.
+
+5. NO WARRANTY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT
+PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS"
+BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR
+IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF
+TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR
+PURPOSE. Each Recipient is solely responsible for determining the
+appropriateness of using and distributing the Program and assumes all
+risks associated with its exercise of rights under this Agreement,
+including but not limited to the risks and costs of program errors,
+compliance with applicable laws, damage to or loss of data, programs
+or equipment, and unavailability or interruption of operations.
+
+6. DISCLAIMER OF LIABILITY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT
+PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS
+SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST
+PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
+EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+7. GENERAL
+
+If any provision of this Agreement is invalid or unenforceable under
+applicable law, it shall not affect the validity or enforceability of
+the remainder of the terms of this Agreement, and without further
+action by the parties hereto, such provision shall be reformed to the
+minimum extent necessary to make such provision valid and enforceable.
+
+If Recipient institutes patent litigation against any entity
+(including a cross-claim or counterclaim in a lawsuit) alleging that the
+Program itself (excluding combinations of the Program with other software
+or hardware) infringes such Recipient's patent(s), then such Recipient's
+rights granted under Section 2(b) shall terminate as of the date such
+litigation is filed.
+
+All Recipient's rights under this Agreement shall terminate if it
+fails to comply with any of the material terms or conditions of this
+Agreement and does not cure such failure in a reasonable period of
+time after becoming aware of such noncompliance. If all Recipient's
+rights under this Agreement terminate, Recipient agrees to cease use
+and distribution of the Program as soon as reasonably practicable.
+However, Recipient's obligations under this Agreement and any licenses
+granted by Recipient relating to the Program shall continue and survive.
+
+Everyone is permitted to copy and distribute copies of this Agreement,
+but in order to avoid inconsistency the Agreement is copyrighted and
+may only be modified in the following manner. The Agreement Steward
+reserves the right to publish new versions (including revisions) of
+this Agreement from time to time. No one other than the Agreement
+Steward has the right to modify this Agreement. The Eclipse Foundation
+is the initial Agreement Steward. The Eclipse Foundation may assign the
+responsibility to serve as the Agreement Steward to a suitable separate
+entity. Each new version of the Agreement will be given a distinguishing
+version number. The Program (including Contributions) may always be
+Distributed subject to the version of the Agreement under which it was
+received. In addition, after a new version of the Agreement is published,
+Contributor may elect to Distribute the Program (including its
+Contributions) under the new version.
+
+Except as expressly stated in Sections 2(a) and 2(b) above, Recipient
+receives no rights or licenses to the intellectual property of any
+Contributor under this Agreement, whether expressly, by implication,
+estoppel or otherwise. All rights in the Program not expressly granted
+under this Agreement are reserved. Nothing in this Agreement is intended
+to be enforceable by any entity that is not a Contributor or Recipient.
+No third-party beneficiary rights are created under this Agreement.
+
+Exhibit A - Form of Secondary Licenses Notice
+
+"This Source Code may also be made available under the following
+Secondary Licenses when the conditions for such availability set forth
+in the Eclipse Public License, v. 2.0 are satisfied: GNU General Public
+License as published by the Free Software Foundation, either version 2
+of the License, or (at your option) any later version, with the GNU
+Classpath Exception which is available at
+https://www.gnu.org/software/classpath/license.html."
+
+  Simply including a copy of this Agreement, including this Exhibit A
+  is not sufficient to license the Source Code under Secondary Licenses.
+
+  If it is not possible or desirable to put the notice in a particular
+  file, then You may include the notice in a location (such as a LICENSE
+  file in a relevant directory) where a recipient would be likely to
+  look for such a notice.
+
+  You may add additional accurate notices of copyright ownership.
diff --git a/tests/jepsen.nukeeper/README.md b/tests/jepsen.nukeeper/README.md
new file mode 100644
index 00000000000..8f3754b8f7b
--- /dev/null
+++ b/tests/jepsen.nukeeper/README.md
@@ -0,0 +1,155 @@
+# Jepsen tests ClickHouse Keeper
+
+A Clojure library designed to test ZooKeeper-like implementation inside ClickHouse.
+
+## Test scenarios (workloads)
+
+### CAS register
+
+CAS Register has three operations: read number, write number, compare-and-swap number. This register is simulated as a single ZooKeeper node. Read transforms to ZooKeeper's `getData` request. Write transforms to the `set` request. Compare-and-swap implemented via `getData` + compare in code + `set` new value with `version` from `getData`.
+
+In this test, we use a linearizable checker, so Jepsen validates that history was linearizable. One of the heaviest workloads.
+
+Strictly requires `quorum_reads` to be true.
+
+### Set
+
+Set has two operations: add a number to set and read all values from set. This workload is simulated on a single ZooKeeper node with a string value that represents Clojure set data structure. Add operation very similar to compare-and-swap. We read string value from ZooKeeper node with `getData`, parse it to Clojure's set, add new value to the set and try to write it with the received version.
+
+In this test, Jepsen validates that all successfully added values can be read. Generator for this workload performs only add operations until a timeout and after that tries to read set once.
+
+### Unique IDs
+
+In the Unique IDs workload we have only one operation: generate a new unique number. It's implemented using ZooKeeper's sequential nodes. For each generates request client just creates a new sequential node in ZooKeeper with a fixed prefix. After that cuts the prefix off from the returned path and parses the number from the rest part.
+
+Jepsen checks that all returned IDs were unique.
+
+### Counter
+
+Counter workload has two operations: read counter value and add some number to the counter. Its implementation is quite weird. We add number `N` to the counter creating `N` sequential nodes in a single ZooKeeper transaction. Counter read implemented as `getChildren` ZooKeeper request and count of all returned nodes.
+
+Jepsen checks that counter value lies in the interval of possible value. Strictly requires `quorum_reads` to be true.
+
+### Total queue
+
+Simulates an unordered queue with three operations: enqueue number, dequeue, and drain. Enqueue operation uses `create` request with node name equals to number. `Dequeue` operation is more interesting. We list (`getChildren`) all nodes and remember the parent node version. After that we choose the smallest one and prepare the transaction: `check` parent node version + set an empty value to parent node + delete smalled child node. Drain operation is just `getChildren` on the parent path.
+
+Jepsen checks that all enqueued values were dequeued or drained. Duplicates are allowed because  Jepsen doesn't know the value of the unknown-status (`:info`) dequeue operation. So when we try to `dequeue` some element we should return it even if our delete transaction failed with `Connection loss` error.
+
+### Linear queue
+
+Same with the total queue, but without drain operation. Checks linearizability between enqueue and dequeue. Sometimes consume more than 10GB during validation even for very short histories.
+
+
+## Nemesis
+
+We use almost all standard nemeses with small changes for our storage.
+
+### Random node killer (random-node-killer)
+
+Sleep 5 seconds, kills random node, sleep for 5 seconds, and starts it back.
+
+### All nodes killer (all-nodes-killer)
+
+Kill all nodes at once, sleep for 5 seconds, and starts them back.
+
+### Simple partitioner (simple-partitioner)
+
+Partition one node from others using iptables. No one can see the victim and the victim cannot see anybody.
+
+### Random node stop (random-node-hammer-time)
+
+Send `SIGSTOP` to the random node. Sleep 5 seconds. Send `SIGCONT`.
+
+### All nodes stop (all-nodes-hammer-time)
+
+Send `SIGSTOP` to all nodes. Sleep 5 seconds. Send `SIGCONT`.
+
+### Logs corruptor (logs-corruptor)
+
+Corrupts latest log (change one random byte) in `clickhouse_path/coordination/logs`. Restarts nodes.
+
+### Snapshots corruptor (snapshots-corruptor)
+
+Corrupts latest snapshot (change one random byte) in `clickhouse_path/coordination/snapshots`. Restarts nodes.
+
+### Logs and snapshots corruptor  (logs-and-snapshots-corruptor)
+
+Corrupts both the latest log and snapshot. Restarts node.
+
+### Drop data corruptor (drop-data-corruptor)
+
+Drop all data from `clickhouse_path/coordinator`. Restarts node.
+
+### Bridge partitioner (bridge-partitioner)
+
+Two nodes don't see each other but can see another node. The last node can see both.
+
+### Blind node partitioner (blind-node-partitioner)
+
+One of the nodes cannot see another, but they can see it.
+
+### Blind others partitioner (blind-others-partitioner)
+
+Two nodes don't see one node but it can see both.
+
+## Usage
+
+### Dependencies
+
+- leiningen (https://leiningen.org/)
+- clojure (https://clojure.org/)
+- jvm
+
+### Options for `lein run`
+
+- `test` Run a single test.
+- `test-all` Run all available tests from tests-set.
+- `-w (--workload)` One of the workloads. Option for a single `test`.
+- `--nemesis` One of nemeses. Option for a single `test`.
+- `-q (--quorum)` Run test with quorum reads.
+- `-r (--rate)` How many operations per second Jepsen will generate in a single thread.
+- `-s (--snapshot-distance)` ClickHouse Keeper setting. How often we will create a new snapshot.
+- `--stale-log-gap` ClickHosue Keeper setting. A leader will send a snapshot instead of a log to this node if it's committed index less than leaders - this setting value.
+- `--reserved-log-items` ClickHouse Keeper setting. How many log items to keep after the snapshot.
+- `--ops-per-key` Option for CAS register workload. Total ops that will be generated for a single register.
+- `--lightweight-run` Run some lightweight tests without linearizability checks. Option for `tests-all` run.
+- `--reuse-binary` Don't download clickhouse binary if it already exists on the node.
+- `--clickhouse-source` URL to clickhouse `.deb`, `.tgz` or binary.
+- `--time-limit` (in seconds) How long Jepsen will generate new operations.
+- `--nodes-file` File with nodes for SSH. Newline separated.
+- `--username` SSH username for nodes.
+- `--password` SSH password for nodes.
+- `--concurrency` How many threads Jepsen will use for concurrent requests.
+- `--test-count` How many times to run a single test or how many tests to run from the tests set.
+
+
+### Examples:
+
+1. Run `Set` workload with `logs-and-snapshots-corruptor` ten times:
+
+```sh
+$ lein run test --nodes-file nodes.txt --username root --password '' --time-limit 30 --concurrency 50 -r 50 --workload set --nemesis logs-and-snapshots-corruptor  --clickhouse-source 'https://clickhouse-builds.s3.yandex.net/someurl/clickhouse-common-static_21.4.1.6321_amd64.deb' -q --test-count 10 --reuse-binary
+```
+
+2. Run ten random tests from `lightweight-run` with some custom Keeper settings:
+
+``` sh
+$ lein run test-all --nodes-file nodes.txt --username root --password '' --time-limit 30 --concurrency 50 -r 50 --snapshot-distance 100 --stale-log-gap 100 --reserved-log-items 10 --lightweight-run  --clickhouse-source 'someurl' -q --reuse-binary --test-count 10
+```
+
+
+## License
+
+Copyright © 2021 FIXME
+
+This program and the accompanying materials are made available under the
+terms of the Eclipse Public License 2.0 which is available at
+http://www.eclipse.org/legal/epl-2.0.
+
+This Source Code may also be made available under the following Secondary
+Licenses when the conditions for such availability set forth in the Eclipse
+Public License, v. 2.0 are satisfied: GNU General Public License as published by
+the Free Software Foundation, either version 2 of the License, or (at your
+option) any later version, with the GNU Classpath Exception which is available
+at https://www.gnu.org/software/classpath/license.html.
diff --git a/tests/jepsen.nukeeper/doc/intro.md b/tests/jepsen.nukeeper/doc/intro.md
new file mode 100644
index 00000000000..c6e5ccbd04a
--- /dev/null
+++ b/tests/jepsen.nukeeper/doc/intro.md
@@ -0,0 +1,3 @@
+# Introduction to jepsen.nukeeper
+
+TODO: write [great documentation](http://jacobian.org/writing/what-to-write/)
diff --git a/tests/jepsen.nukeeper/project.clj b/tests/jepsen.nukeeper/project.clj
new file mode 100644
index 00000000000..e7150c9e5d4
--- /dev/null
+++ b/tests/jepsen.nukeeper/project.clj
@@ -0,0 +1,13 @@
+(defproject jepsen.nukeeper "0.1.0-SNAPSHOT"
+  :injections [(.. System (setProperty "zookeeper.request.timeout" "10000"))]
+  :description "A jepsen tests for ClickHouse NuKeeper"
+  :url "https://clickhouse.tech/"
+  :license {:name "EPL-2.0"
+            :url "https://www.eclipse.org/legal/epl-2.0/"}
+  :main jepsen.nukeeper.main
+  :plugins [[lein-cljfmt "0.7.0"]]
+  :dependencies [[org.clojure/clojure "1.10.1"]
+                 [jepsen "0.2.3"]
+                 [zookeeper-clj "0.9.4"]
+                 [org.apache.zookeeper/zookeeper "3.6.1" :exclusions [org.slf4j/slf4j-log4j12]]]
+  :repl-options {:init-ns jepsen.nukeeper.main})
diff --git a/tests/jepsen.nukeeper/resources/config.xml b/tests/jepsen.nukeeper/resources/config.xml
new file mode 120000
index 00000000000..c7596baa075
--- /dev/null
+++ b/tests/jepsen.nukeeper/resources/config.xml
@@ -0,0 +1 @@
+../../../programs/server/config.xml
\ No newline at end of file
diff --git a/tests/jepsen.nukeeper/resources/listen.xml b/tests/jepsen.nukeeper/resources/listen.xml
new file mode 100644
index 00000000000..de8c737ff75
--- /dev/null
+++ b/tests/jepsen.nukeeper/resources/listen.xml
@@ -0,0 +1,3 @@
+<yandex>
+    <listen_host>::</listen_host>
+</yandex>
diff --git a/tests/jepsen.nukeeper/resources/test_keeper_config.xml b/tests/jepsen.nukeeper/resources/test_keeper_config.xml
new file mode 100644
index 00000000000..c69fb0f228c
--- /dev/null
+++ b/tests/jepsen.nukeeper/resources/test_keeper_config.xml
@@ -0,0 +1,36 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>{id}</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>10000</operation_timeout_ms>
+            <session_timeout_ms>30000</session_timeout_ms>
+            <force_sync>false</force_sync>
+            <startup_timeout>120000</startup_timeout>
+            <raft_logs_level>trace</raft_logs_level>
+            <quorum_reads>{quorum_reads}</quorum_reads>
+            <snapshot_distance>{snapshot_distance}</snapshot_distance>
+            <stale_log_gap>{stale_log_gap}</stale_log_gap>
+            <reserved_log_items>{reserved_log_items}</reserved_log_items>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>{srv1}</hostname>
+                <port>9444</port>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>{srv2}</hostname>
+                <port>9444</port>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>{srv3}</hostname>
+                <port>9444</port>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/jepsen.nukeeper/resources/users.xml b/tests/jepsen.nukeeper/resources/users.xml
new file mode 120000
index 00000000000..41b137a130f
--- /dev/null
+++ b/tests/jepsen.nukeeper/resources/users.xml
@@ -0,0 +1 @@
+../../../programs/server/users.xml
\ No newline at end of file
diff --git a/tests/jepsen.nukeeper/src/jepsen/nukeeper/constants.clj b/tests/jepsen.nukeeper/src/jepsen/nukeeper/constants.clj
new file mode 100644
index 00000000000..d6245d450f5
--- /dev/null
+++ b/tests/jepsen.nukeeper/src/jepsen/nukeeper/constants.clj
@@ -0,0 +1,18 @@
+(ns jepsen.nukeeper.constants)
+
+(def common-prefix "/home/robot-clickhouse")
+
+(def binary-name "clickhouse")
+
+(def binary-path (str common-prefix "/" binary-name))
+(def pid-file-path (str common-prefix "/clickhouse.pid"))
+
+(def data-dir (str common-prefix "/db"))
+(def logs-dir (str common-prefix "/logs"))
+(def configs-dir (str common-prefix "/config"))
+(def sub-configs-dir (str configs-dir "/config.d"))
+(def coordination-data-dir (str data-dir "/coordination"))
+(def coordination-snapshots-dir (str coordination-data-dir "/snapshots"))
+(def coordination-logs-dir (str coordination-data-dir "/logs"))
+
+(def stderr-file (str logs-dir "/stderr.log"))
diff --git a/tests/jepsen.nukeeper/src/jepsen/nukeeper/counter.clj b/tests/jepsen.nukeeper/src/jepsen/nukeeper/counter.clj
new file mode 100644
index 00000000000..b426a8ea90d
--- /dev/null
+++ b/tests/jepsen.nukeeper/src/jepsen/nukeeper/counter.clj
@@ -0,0 +1,50 @@
+(ns jepsen.nukeeper.counter
+  (:require
+   [clojure.tools.logging :refer :all]
+   [jepsen
+    [checker :as checker]
+    [client :as client]
+    [generator :as gen]]
+   [jepsen.nukeeper.utils :refer :all]
+   [zookeeper :as zk])
+  (:import (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
+
+(defn r   [_ _] {:type :invoke, :f :read})
+(defn add [_ _] {:type :invoke, :f :add, :value (rand-int 5)})
+
+(defrecord CounterClient [conn nodename]
+  client/Client
+  (open! [this test node]
+    (assoc
+     (assoc this
+            :conn (zk-connect node 9181 30000))
+     :nodename node))
+
+  (setup! [this test])
+
+  (invoke! [this test op]
+    (case (:f op)
+      :read (exec-with-retries 30 (fn []
+                                    (assoc op
+                                           :type :ok
+                                           :value (count (zk-list conn "/")))))
+      :add (try
+             (do
+               (zk-multi-create-many-seq-nodes conn "/seq-" (:value op))
+               (assoc op :type :ok))
+             (catch Exception _ (assoc op :type :info, :error :connect-error)))))
+
+  (teardown! [_ test])
+
+  (close! [_ test]
+    (zk/close conn)))
+
+(defn workload
+  "A generator, client, and checker for a set test."
+  [opts]
+  {:client    (CounterClient. nil nil)
+   :checker   (checker/counter)
+   :generator (->> (range)
+                   (map (fn [x]
+                          (->> (gen/mix [r add])))))
+   :final-generator (gen/once {:type :invoke, :f :read, :value nil})})
diff --git a/tests/jepsen.nukeeper/src/jepsen/nukeeper/db.clj b/tests/jepsen.nukeeper/src/jepsen/nukeeper/db.clj
new file mode 100644
index 00000000000..d82d628cc95
--- /dev/null
+++ b/tests/jepsen.nukeeper/src/jepsen/nukeeper/db.clj
@@ -0,0 +1,128 @@
+(ns jepsen.nukeeper.db
+  (:require [clojure.tools.logging :refer :all]
+            [jepsen
+             [control :as c]
+             [db :as db]
+             [util :as util :refer [meh]]]
+            [jepsen.nukeeper.constants :refer :all]
+            [jepsen.nukeeper.utils :refer :all]
+            [clojure.java.io :as io]
+            [jepsen.control.util :as cu]
+            [jepsen.os.ubuntu :as ubuntu]))
+
+(defn get-clickhouse-sky
+  [version]
+  (c/exec :sky :get :-d common-prefix :-N :Backbone version)
+  (str common-prefix "/clickhouse"))
+
+(defn get-clickhouse-url
+  [url]
+  (let [download-result (cu/wget! url)]
+    (do (c/exec :mv download-result common-prefix)
+        (str common-prefix "/" download-result))))
+
+(defn download-clickhouse
+  [source]
+  (info "Downloading clickhouse from" source)
+  (cond
+    (clojure.string/starts-with? source "rbtorrent:") (get-clickhouse-sky source)
+    (clojure.string/starts-with? source "http") (get-clickhouse-url source)
+    :else (throw (Exception. (str "Don't know how to download clickhouse from" source)))))
+
+(defn unpack-deb
+  [path]
+  (do
+    (c/exec :dpkg :-x path common-prefix)
+    (c/exec :rm :-f path)
+    (c/exec :mv (str common-prefix "/usr/bin/clickhouse") common-prefix)
+    (c/exec :rm :-rf (str common-prefix "/usr") (str common-prefix "/etc"))))
+
+(defn unpack-tgz
+  [path]
+  (do
+    (c/exec :mkdir :-p (str common-prefix "/unpacked"))
+    (c/exec :tar :-zxvf path :-C (str common-prefix "/unpacked"))
+    (c/exec :rm :-f path)
+    (let [subdir (c/exec :ls (str common-prefix "/unpacked"))]
+      (c/exec :mv (str common-prefix "/unpacked/" subdir "/usr/bin/clickhouse") common-prefix)
+      (c/exec :rm :-fr (str common-prefix "/unpacked")))))
+
+(defn chmod-binary
+  [path]
+  (c/exec :chmod :+x path))
+
+(defn install-downloaded-clickhouse
+  [path]
+  (cond
+    (clojure.string/ends-with? path ".deb") (unpack-deb path)
+    (clojure.string/ends-with? path ".tgz") (unpack-tgz path)
+    (clojure.string/ends-with? path "clickhouse") (chmod-binary path)
+    :else (throw (Exception. (str "Don't know how to install clickhouse from path" path)))))
+
+(defn prepare-dirs
+  []
+  (do
+    (c/exec :mkdir :-p common-prefix)
+    (c/exec :mkdir :-p data-dir)
+    (c/exec :mkdir :-p logs-dir)
+    (c/exec :mkdir :-p configs-dir)
+    (c/exec :mkdir :-p sub-configs-dir)
+    (c/exec :touch stderr-file)
+    (c/exec :chown :-R :root common-prefix)))
+
+(defn cluster-config
+  [test node config-template]
+  (let [nodes (:nodes test)
+        replacement-map {#"\{srv1\}" (get nodes 0)
+                         #"\{srv2\}" (get nodes 1)
+                         #"\{srv3\}" (get nodes 2)
+                         #"\{id\}" (str (inc (.indexOf nodes node)))
+                         #"\{quorum_reads\}" (str (boolean (:quorum test)))
+                         #"\{snapshot_distance\}" (str (:snapshot-distance test))
+                         #"\{stale_log_gap\}" (str (:stale-log-gap test))
+                         #"\{reserved_log_items\}" (str (:reserved-log-items test))}]
+    (reduce #(clojure.string/replace %1 (get %2 0) (get %2 1)) config-template replacement-map)))
+
+(defn install-configs
+  [test node]
+  (c/exec :echo (slurp (io/resource "config.xml")) :> (str configs-dir "/config.xml"))
+  (c/exec :echo (slurp (io/resource "users.xml")) :> (str configs-dir "/users.xml"))
+  (c/exec :echo (slurp (io/resource "listen.xml")) :> (str sub-configs-dir "/listen.xml"))
+  (c/exec :echo (cluster-config test node (slurp (io/resource "test_keeper_config.xml"))) :> (str sub-configs-dir "/test_keeper_config.xml")))
+
+(defn db
+  [version reuse-binary]
+  (reify db/DB
+    (setup! [_ test node]
+      (c/su
+       (do
+         (info "Preparing directories")
+         (prepare-dirs)
+         (if (or (not (cu/exists? binary-path)) (not reuse-binary))
+           (do (info "Downloading clickhouse")
+               (install-downloaded-clickhouse (download-clickhouse version)))
+           (info "Binary already exsist on path" binary-path "skipping download"))
+         (info "Installing configs")
+         (install-configs test node)
+         (info "Starting server")
+         (start-clickhouse! node test)
+         (info "ClickHouse started"))))
+
+    (teardown! [_ test node]
+      (info node "Tearing down clickhouse")
+      (kill-clickhouse! node test)
+      (c/su
+       (if (not reuse-binary)
+         (c/exec :rm :-rf binary-path))
+       (c/exec :rm :-rf pid-file-path)
+       (c/exec :rm :-rf data-dir)
+       (c/exec :rm :-rf logs-dir)
+       (c/exec :rm :-rf configs-dir)))
+
+    db/LogFiles
+    (log-files [_ test node]
+      (c/su
+       (kill-clickhouse! node test)
+       (c/cd data-dir
+             (c/exec :tar :czf "coordination.tar.gz" "coordination")))
+      [stderr-file (str logs-dir "/clickhouse-server.log") (str data-dir "/coordination.tar.gz")])))
diff --git a/tests/jepsen.nukeeper/src/jepsen/nukeeper/main.clj b/tests/jepsen.nukeeper/src/jepsen/nukeeper/main.clj
new file mode 100644
index 00000000000..b9439097e85
--- /dev/null
+++ b/tests/jepsen.nukeeper/src/jepsen/nukeeper/main.clj
@@ -0,0 +1,159 @@
+(ns jepsen.nukeeper.main
+  (:require [clojure.tools.logging :refer :all]
+            [jepsen.nukeeper.utils :refer :all]
+            [clojure.pprint :refer [pprint]]
+            [jepsen.nukeeper.set :as set]
+            [jepsen.nukeeper.db :refer :all]
+            [jepsen.nukeeper.nemesis :as custom-nemesis]
+            [jepsen.nukeeper.register :as register]
+            [jepsen.nukeeper.unique :as unique]
+            [jepsen.nukeeper.queue :as queue]
+            [jepsen.nukeeper.counter :as counter]
+            [jepsen.nukeeper.constants :refer :all]
+            [clojure.string :as str]
+            [jepsen
+             [checker :as checker]
+             [cli :as cli]
+             [client :as client]
+             [control :as c]
+             [db :as db]
+             [nemesis :as nemesis]
+             [generator :as gen]
+             [independent :as independent]
+             [tests :as tests]
+             [util :as util :refer [meh]]]
+            [jepsen.control.util :as cu]
+            [jepsen.os.ubuntu :as ubuntu]
+            [jepsen.checker.timeline :as timeline]
+            [clojure.java.io :as io]
+            [zookeeper.data :as data]
+            [zookeeper :as zk])
+  (:import (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)
+           (ch.qos.logback.classic Level)
+           (org.slf4j Logger LoggerFactory)))
+
+(def workloads
+  "A map of workload names to functions that construct workloads, given opts."
+  {"set"      set/workload
+   "register" register/workload
+   "unique-ids" unique/workload
+   "counter" counter/workload
+   "total-queue" queue/total-workload
+   "linear-queue" queue/linear-workload})
+
+(def cli-opts
+  "Additional command line options."
+  [["-w" "--workload NAME" "What workload should we run?"
+    :default "set"
+    :validate [workloads (cli/one-of workloads)]]
+   [nil "--nemesis NAME" "Which nemesis will poison our lives?"
+    :default "random-node-killer"
+    :validate [custom-nemesis/custom-nemesises (cli/one-of custom-nemesis/custom-nemesises)]]
+   ["-q" "--quorum" "Use quorum reads, instead of reading from any primary."]
+   ["-r" "--rate HZ" "Approximate number of requests per second, per thread."
+    :default  10
+    :parse-fn read-string
+    :validate [#(and (number? %) (pos? %)) "Must be a positive number"]]
+   ["-s" "--snapshot-distance NUM" "Number of log entries to create snapshot"
+    :default 10000
+    :parse-fn read-string
+    :validate [#(and (number? %) (pos? %)) "Must be a positive number"]]
+   [nil "--stale-log-gap NUM" "Number of log entries to send snapshot instead of separate logs"
+    :default 1000
+    :parse-fn read-string
+    :validate [#(and (number? %) (pos? %)) "Must be a positive number"]]
+   [nil "--reserved-log-items NUM" "Number of log entries to keep after snapshot"
+    :default 1000
+    :parse-fn read-string
+    :validate [#(and (number? %) (pos? %)) "Must be a positive number"]]
+   [nil "--ops-per-key NUM" "Maximum number of operations on any given key."
+    :default  100
+    :parse-fn parse-long
+    :validate [pos? "Must be a positive integer."]]
+   [nil, "--lightweight-run" "Subset of workloads/nemesises which is simple to validate"]
+   [nil, "--reuse-binary" "Use already downloaded binary if it exists, don't remove it on shutdown"]
+   ["-c" "--clickhouse-source URL" "URL for clickhouse deb or tgz package"
+    :default "https://clickhouse-builds.s3.yandex.net/21677/ef82333089156907a0979669d9374c2e18daabe5/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/clickhouse-common-static_21.4.1.6313_amd64.deb"]])
+
+(defn nukeeper-test
+  "Given an options map from the command line runner (e.g. :nodes, :ssh,
+  :concurrency, ...), constructs a test map."
+  [opts]
+  (info "Test opts\n" (with-out-str (pprint opts)))
+  (let [quorum (boolean (:quorum opts))
+        workload  ((get workloads (:workload opts)) opts)
+        current-nemesis (get custom-nemesis/custom-nemesises (:nemesis opts))]
+    (merge tests/noop-test
+           opts
+           {:name (str "clickhouse-keeper-quorum=" quorum "-"  (name (:workload opts)) "-" (name (:nemesis opts)))
+            :os ubuntu/os
+            :db (db (:clickhouse-source opts) (boolean (:reuse-binary opts)))
+            :pure-generators true
+            :client (:client workload)
+            :nemesis (:nemesis current-nemesis)
+            :checker (checker/compose
+                      {:perf     (checker/perf)
+                       :workload (:checker workload)})
+            :generator (gen/phases
+                        (->> (:generator workload)
+                             (gen/stagger (/ (:rate opts)))
+                             (gen/nemesis (:generator current-nemesis))
+                             (gen/time-limit (:time-limit opts)))
+                        (gen/log "Healing cluster")
+                        (gen/nemesis (gen/once {:type :info, :f :stop}))
+                        (gen/log "Waiting for recovery")
+                        (gen/sleep 10)
+                        (gen/clients (:final-generator workload)))})))
+
+(def all-nemesises (keys custom-nemesis/custom-nemesises))
+
+(def all-workloads (keys workloads))
+
+(def lightweight-workloads ["set" "unique-ids" "counter" "total-queue"])
+
+(def useful-nemesises ["random-node-killer"
+                       "simple-partitioner"
+                       "all-nodes-hammer-time"
+                       ; can lead to a very rare data loss https://github.com/eBay/NuRaft/issues/185
+                       ;"logs-and-snapshots-corruptor"
+                       ;"drop-data-corruptor"
+                       "bridge-partitioner"
+                       "blind-node-partitioner"
+                       "blind-others-partitioner"])
+
+(defn cart [colls]
+  (if (empty? colls)
+    '(())
+    (for [more (cart (rest colls))
+          x (first colls)]
+      (cons x more))))
+
+(defn all-test-options
+  "Takes base cli options, a collection of nemeses, workloads, and a test count,
+  and constructs a sequence of test options."
+  [cli worload-nemeseis-collection]
+  (take (:test-count cli)
+        (shuffle (for [[workload nemesis] worload-nemeseis-collection]
+                   (assoc cli
+                          :nemesis   nemesis
+                          :workload  workload
+                          :test-count 1)))))
+(defn all-tests
+  "Turns CLI options into a sequence of tests."
+  [test-fn cli]
+  (if (boolean (:lightweight-run cli))
+    (map test-fn (all-test-options cli (cart [lightweight-workloads useful-nemesises])))
+    (map test-fn (all-test-options cli (cart [all-workloads all-nemesises])))))
+
+(defn -main
+  "Handles command line arguments. Can either run a test, or a web server for
+  browsing results."
+  [& args]
+  (.setLevel
+   (LoggerFactory/getLogger "org.apache.zookeeper") Level/OFF)
+  (cli/run! (merge (cli/single-test-cmd {:test-fn nukeeper-test
+                                         :opt-spec cli-opts})
+                   (cli/test-all-cmd {:tests-fn (partial all-tests nukeeper-test)
+                                      :opt-spec cli-opts})
+                   (cli/serve-cmd))
+            args))
diff --git a/tests/jepsen.nukeeper/src/jepsen/nukeeper/nemesis.clj b/tests/jepsen.nukeeper/src/jepsen/nukeeper/nemesis.clj
new file mode 100644
index 00000000000..7d4941cdc8e
--- /dev/null
+++ b/tests/jepsen.nukeeper/src/jepsen/nukeeper/nemesis.clj
@@ -0,0 +1,160 @@
+(ns jepsen.nukeeper.nemesis
+  (:require
+   [clojure.tools.logging :refer :all]
+   [jepsen
+    [nemesis :as nemesis]
+    [control :as c]
+    [generator :as gen]]
+   [jepsen.nukeeper.constants :refer :all]
+   [jepsen.nukeeper.utils :refer :all]))
+
+(defn random-node-killer-nemesis
+  []
+  (nemesis/node-start-stopper
+   rand-nth
+   (fn start [test node] (kill-clickhouse! node test))
+   (fn stop [test node] (start-clickhouse! node test))))
+
+(defn all-nodes-killer-nemesis
+  []
+  (nemesis/node-start-stopper
+   identity
+   (fn start [test node] (kill-clickhouse! node test))
+   (fn stop [test node] (start-clickhouse! node test))))
+
+(defn random-node-hammer-time-nemesis
+  []
+  (nemesis/hammer-time "clickhouse"))
+
+(defn all-nodes-hammer-time-nemesis
+  []
+  (nemesis/hammer-time identity "clickhouse"))
+
+(defn select-last-file
+  [path]
+  (last (clojure.string/split
+         (c/exec :find path :-type :f :-printf "%T+ %p\n" :| :grep :-v :tmp_ :| :sort :| :awk "{print $2}")
+         #"\n")))
+
+(defn random-file-pos
+  [fname]
+  (let [fsize (Integer/parseInt (c/exec :du :-b fname :| :cut :-f1))]
+    (rand-int fsize)))
+
+(defn corrupt-file
+  [fname]
+  (if (not (empty? fname))
+    (do
+      (info "Corrupting" fname)
+      (c/exec :dd "if=/dev/zero" (str "of=" fname) "bs=1" "count=1" (str "seek=" (random-file-pos fname)) "conv=notrunc"))
+    (info "Nothing to corrupt")))
+
+(defn corruptor-nemesis
+  [path corruption-op]
+  (reify nemesis/Nemesis
+
+    (setup! [this test] this)
+
+    (invoke! [this test op]
+      (cond (= (:f op) :corrupt)
+            (let [nodes (list (rand-nth (:nodes test)))]
+              (info "Corruption on node" nodes)
+              (c/on-nodes test nodes
+                          (fn [test node]
+                            (c/su
+                             (kill-clickhouse! node test)
+                             (corruption-op path)
+                             (start-clickhouse! node test))))
+              (assoc op :type :info, :value :corrupted))
+            :else (do (c/on-nodes test (:nodes test)
+                                  (fn [test node]
+                                    (c/su
+                                     (start-clickhouse! node test))))
+                      (assoc op :type :info, :value :done))))
+
+    (teardown! [this test])))
+
+(defn logs-corruption-nemesis
+  []
+  (corruptor-nemesis coordination-logs-dir #(corrupt-file (select-last-file %1))))
+
+(defn snapshots-corruption-nemesis
+  []
+  (corruptor-nemesis coordination-snapshots-dir #(corrupt-file (select-last-file %1))))
+
+(defn logs-and-snapshots-corruption-nemesis
+  []
+  (corruptor-nemesis coordination-data-dir (fn [path]
+                                             (do
+                                               (corrupt-file (select-last-file (str path "/snapshots")))
+                                               (corrupt-file (select-last-file (str path "/logs")))))))
+(defn drop-all-corruption-nemesis
+  []
+  (corruptor-nemesis coordination-data-dir (fn [path]
+                                             (c/exec :rm :-fr path))))
+
+(defn partition-bridge-nemesis
+  []
+  (nemesis/partitioner nemesis/bridge))
+
+(defn blind-node
+  [nodes]
+  (let [[[victim] others] (nemesis/split-one nodes)]
+    {victim (into #{} others)}))
+
+(defn blind-node-partition-nemesis
+  []
+  (nemesis/partitioner blind-node))
+
+(defn blind-others
+  [nodes]
+  (let [[[victim] others] (nemesis/split-one nodes)]
+    (into {} (map (fn [node] [node #{victim}])) others)))
+
+(defn blind-others-partition-nemesis
+  []
+  (nemesis/partitioner blind-others))
+
+(defn network-non-symmetric-nemesis
+  []
+  (nemesis/partitioner nemesis/bridge))
+
+(defn start-stop-generator
+  [time-corrupt time-ok]
+  (->>
+   (cycle [(gen/sleep time-ok)
+           {:type :info, :f :start}
+           (gen/sleep time-corrupt)
+           {:type :info, :f :stop}])))
+
+(defn corruption-generator
+  []
+  (->>
+   (cycle [(gen/sleep 5)
+           {:type :info, :f :corrupt}])))
+
+(def custom-nemesises
+  {"random-node-killer" {:nemesis (random-node-killer-nemesis)
+                         :generator (start-stop-generator 5 5)}
+   "all-nodes-killer" {:nemesis (all-nodes-killer-nemesis)
+                       :generator (start-stop-generator 1 10)}
+   "simple-partitioner" {:nemesis (nemesis/partition-random-halves)
+                         :generator (start-stop-generator 5 5)}
+   "random-node-hammer-time"    {:nemesis (random-node-hammer-time-nemesis)
+                                 :generator (start-stop-generator 5 5)}
+   "all-nodes-hammer-time"    {:nemesis (all-nodes-hammer-time-nemesis)
+                               :generator (start-stop-generator 1 10)}
+   "logs-corruptor" {:nemesis (logs-corruption-nemesis)
+                     :generator (corruption-generator)}
+   "snapshots-corruptor" {:nemesis (snapshots-corruption-nemesis)
+                          :generator (corruption-generator)}
+   "logs-and-snapshots-corruptor" {:nemesis (logs-and-snapshots-corruption-nemesis)
+                                   :generator (corruption-generator)}
+   "drop-data-corruptor" {:nemesis (drop-all-corruption-nemesis)
+                          :generator (corruption-generator)}
+   "bridge-partitioner" {:nemesis (partition-bridge-nemesis)
+                         :generator (start-stop-generator 5 5)}
+   "blind-node-partitioner" {:nemesis (blind-node-partition-nemesis)
+                             :generator (start-stop-generator 5 5)}
+   "blind-others-partitioner" {:nemesis (blind-others-partition-nemesis)
+                               :generator (start-stop-generator 5 5)}})
diff --git a/tests/jepsen.nukeeper/src/jepsen/nukeeper/queue.clj b/tests/jepsen.nukeeper/src/jepsen/nukeeper/queue.clj
new file mode 100644
index 00000000000..308778983aa
--- /dev/null
+++ b/tests/jepsen.nukeeper/src/jepsen/nukeeper/queue.clj
@@ -0,0 +1,79 @@
+(ns jepsen.nukeeper.queue
+  (:require
+   [clojure.tools.logging :refer :all]
+   [jepsen
+    [checker :as checker]
+    [client :as client]
+    [generator :as gen]]
+   [knossos.model :as model]
+   [jepsen.checker.timeline :as timeline]
+   [jepsen.nukeeper.utils :refer :all]
+   [zookeeper :as zk])
+  (:import (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
+
+(defn enqueue   [val _ _] {:type :invoke, :f :enqueue :value val})
+(defn dequeue [_ _] {:type :invoke, :f :dequeue})
+
+(defrecord QueueClient [conn nodename]
+  client/Client
+  (open! [this test node]
+    (assoc
+     (assoc this
+            :conn (zk-connect node 9181 30000))
+     :nodename node))
+
+  (setup! [this test])
+
+  (invoke! [this test op]
+    (case (:f op)
+      :enqueue (try
+                 (do
+                   (zk-create-if-not-exists conn (str "/" (:value op)) "")
+                   (assoc op :type :ok))
+                 (catch Exception _ (assoc op :type :info, :error :connect-error)))
+      :dequeue
+      (try
+        (let [result (zk-multi-delete-first-child conn "/")]
+          (if (not (nil? result))
+            (assoc op :type :ok :value result)
+            (assoc op :type :fail :value result)))
+        (catch Exception _ (assoc op :type :info, :error :connect-error)))
+      :drain
+      ; drain via delete is to long, just list all nodes
+      (exec-with-retries 30 (fn []
+                              (zk-sync conn)
+                              (assoc op :type :ok :value (into #{} (map #(str %1) (zk-list conn "/"))))))))
+
+  (teardown! [_ test])
+
+  (close! [_ test]
+    (zk/close conn)))
+
+(defn sorted-str-range
+  [n]
+  (sort (map (fn [v] (str v)) (take n (range)))))
+
+(defn total-workload
+  "A generator, client, and checker for a set test."
+  [opts]
+  {:client    (QueueClient. nil nil)
+   :checker   (checker/compose
+               {:total-queue (checker/total-queue)
+                :timeline (timeline/html)})
+   :generator (->> (sorted-str-range 50000)
+                   (map (fn [x]
+                          (rand-nth [{:type :invoke, :f :enqueue :value x}
+                                     {:type :invoke, :f :dequeue}]))))
+   :final-generator (gen/once {:type :invoke, :f :drain, :value nil})})
+
+(defn linear-workload
+  [opts]
+  {:client    (QueueClient. nil nil)
+   :checker   (checker/compose
+               {:linear   (checker/linearizable {:model     (model/unordered-queue)
+                                                 :algorithm :linear})
+                :timeline (timeline/html)})
+   :generator (->> (sorted-str-range 10000)
+                   (map (fn [x]
+                          (rand-nth [{:type :invoke, :f :enqueue :value x}
+                                     {:type :invoke, :f :dequeue}]))))})
diff --git a/tests/jepsen.nukeeper/src/jepsen/nukeeper/register.clj b/tests/jepsen.nukeeper/src/jepsen/nukeeper/register.clj
new file mode 100644
index 00000000000..98322845346
--- /dev/null
+++ b/tests/jepsen.nukeeper/src/jepsen/nukeeper/register.clj
@@ -0,0 +1,64 @@
+(ns jepsen.nukeeper.register
+  (:require   [jepsen
+               [checker :as checker]
+               [client :as client]
+               [independent :as independent]
+               [generator :as gen]]
+              [jepsen.checker.timeline :as timeline]
+              [knossos.model :as model]
+              [jepsen.nukeeper.utils :refer :all]
+              [zookeeper :as zk])
+  (:import (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
+
+(defn r   [_ _] {:type :invoke, :f :read, :value nil})
+(defn w   [_ _] {:type :invoke, :f :write, :value (rand-int 5)})
+(defn cas [_ _] {:type :invoke, :f :cas, :value [(rand-int 5) (rand-int 5)]})
+
+(defrecord RegisterClient [conn]
+  client/Client
+  (open! [this test node]
+    (assoc this :conn (zk-connect node 9181 30000)))
+
+  (setup! [this test]
+    (zk-create-range conn 300)) ; 300 nodes to be sure
+
+  (invoke! [_ test op]
+    (let [[k v] (:value op)
+          zk-k (zk-path k)]
+      (case (:f op)
+        :read (try
+                (assoc op :type :ok, :value (independent/tuple k (parse-long (:data (zk-get-str conn zk-k)))))
+                (catch Exception _ (assoc op :type :fail, :error :connect-error)))
+        :write (try
+                 (do (zk-set conn zk-k v)
+                     (assoc op :type :ok))
+                 (catch Exception _ (assoc op :type :info, :error :connect-error)))
+        :cas (try
+               (let [[old new] v]
+                 (assoc op :type (if (zk-cas conn zk-k old new)
+                                   :ok
+                                   :fail)))
+               (catch KeeperException$BadVersionException _ (assoc op :type :fail, :error :bad-version))
+               (catch Exception _ (assoc op :type :info, :error :connect-error))))))
+
+  (teardown! [this test])
+
+  (close! [_ test]
+    (zk/close conn)))
+
+(defn workload
+  "Tests linearizable reads, writes, and compare-and-set operations on
+  independent keys."
+  [opts]
+  {:client    (RegisterClient. nil)
+   :checker   (independent/checker
+               (checker/compose
+                {:linear   (checker/linearizable {:model     (model/cas-register)
+                                                  :algorithm :linear})
+                 :timeline (timeline/html)}))
+   :generator (independent/concurrent-generator
+               10
+               (range)
+               (fn [k]
+                 (->> (gen/mix [r w cas])
+                      (gen/limit (:ops-per-key opts)))))})
diff --git a/tests/jepsen.nukeeper/src/jepsen/nukeeper/set.clj b/tests/jepsen.nukeeper/src/jepsen/nukeeper/set.clj
new file mode 100644
index 00000000000..f9d21a8dc62
--- /dev/null
+++ b/tests/jepsen.nukeeper/src/jepsen/nukeeper/set.clj
@@ -0,0 +1,49 @@
+(ns jepsen.nukeeper.set
+  (:require
+   [clojure.tools.logging :refer :all]
+   [jepsen
+    [checker :as checker]
+    [client :as client]
+    [generator :as gen]]
+   [jepsen.nukeeper.utils :refer :all]
+   [zookeeper :as zk])
+  (:import (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
+
+(defrecord SetClient [k conn nodename]
+  client/Client
+  (open! [this test node]
+    (assoc
+     (assoc this
+            :conn (zk-connect node 9181 30000))
+     :nodename node))
+
+  (setup! [this test]
+    (zk-create-if-not-exists conn k "#{}"))
+
+  (invoke! [this test op]
+    (case (:f op)
+      :read (exec-with-retries 30 (fn []
+                                    (zk-sync conn)
+                                    (assoc op
+                                           :type :ok
+                                           :value (read-string (:data (zk-get-str conn k))))))
+      :add (try
+             (do
+               (zk-add-to-set conn k (:value op))
+               (assoc op :type :ok))
+             (catch KeeperException$BadVersionException _ (assoc op :type :fail, :error :bad-version))
+             (catch Exception _ (assoc op :type :info, :error :connect-error)))))
+
+  (teardown! [_ test])
+
+  (close! [_ test]
+    (zk/close conn)))
+
+(defn workload
+  "A generator, client, and checker for a set test."
+  [opts]
+  {:client    (SetClient. "/a-set" nil nil)
+   :checker   (checker/set)
+   :generator (->> (range)
+                   (map (fn [x] {:type :invoke, :f :add, :value x})))
+   :final-generator (gen/once {:type :invoke, :f :read, :value nil})})
diff --git a/tests/jepsen.nukeeper/src/jepsen/nukeeper/unique.clj b/tests/jepsen.nukeeper/src/jepsen/nukeeper/unique.clj
new file mode 100644
index 00000000000..9dfb906bc17
--- /dev/null
+++ b/tests/jepsen.nukeeper/src/jepsen/nukeeper/unique.clj
@@ -0,0 +1,42 @@
+(ns jepsen.nukeeper.unique
+  (:require
+   [clojure.tools.logging :refer :all]
+   [jepsen
+    [checker :as checker]
+    [client :as client]
+    [generator :as gen]]
+   [jepsen.nukeeper.utils :refer :all]
+   [zookeeper :as zk])
+  (:import (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
+
+(defrecord UniqueClient [conn nodename]
+  client/Client
+  (open! [this test node]
+    (assoc
+     (assoc this
+            :conn (zk-connect node 9181 30000))
+     :nodename node))
+
+  (setup! [this test])
+
+  (invoke! [this test op]
+    (case
+     :generate
+      (try
+        (let [result-path (zk-create-sequential conn "/seq-" "")]
+          (assoc op :type :ok :value (parse-and-get-counter result-path)))
+        (catch Exception _ (assoc op :type :info, :error :connect-error)))))
+
+  (teardown! [_ test])
+
+  (close! [_ test]
+    (zk/close conn)))
+
+(defn workload
+  "A generator, client, and checker for a set test."
+  [opts]
+  {:client    (UniqueClient. nil nil)
+   :checker   (checker/unique-ids)
+   :generator (->>
+               (range)
+               (map (fn [_] {:type :invoke, :f :generate})))})
diff --git a/tests/jepsen.nukeeper/src/jepsen/nukeeper/utils.clj b/tests/jepsen.nukeeper/src/jepsen/nukeeper/utils.clj
new file mode 100644
index 00000000000..cfe9add238b
--- /dev/null
+++ b/tests/jepsen.nukeeper/src/jepsen/nukeeper/utils.clj
@@ -0,0 +1,180 @@
+(ns jepsen.nukeeper.utils
+  (:require [clojure.string :as str]
+            [zookeeper.data :as data]
+            [zookeeper :as zk]
+            [zookeeper.internal :as zi]
+            [jepsen.control.util :as cu]
+            [jepsen.nukeeper.constants :refer :all]
+            [jepsen.control :as c]
+            [clojure.tools.logging :refer :all])
+  (:import (org.apache.zookeeper.data Stat)
+           (org.apache.zookeeper CreateMode
+                                 ZooKeeper)
+           (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
+
+(defn parse-long
+  "Parses a string to a Long. Passes through `nil` and empty strings."
+  [s]
+  (if (and s (> (count s) 0))
+    (Long/parseLong s)))
+
+(defn parse-and-get-counter
+  [path]
+  (Integer/parseInt (apply str (take-last 10 (seq (str path))))))
+
+(defn zk-range
+  []
+  (map (fn [v] (str "/" v)) (range)))
+
+(defn zk-path
+  [n]
+  (str "/" n))
+
+(defn zk-connect
+  [host port timeout]
+  (zk/connect (str host ":" port) :timeout-msec timeout))
+
+(defn zk-create-range
+  [conn n]
+  (dorun (map (fn [v] (zk/create-all conn v :persistent? true)) (take n (zk-range)))))
+
+(defn zk-set
+  ([conn path value]
+   (zk/set-data conn path (data/to-bytes (str value)) -1))
+  ([conn path value version]
+   (zk/set-data conn path (data/to-bytes (str value)) version)))
+
+(defn zk-get-str
+  [conn path]
+  (let [zk-result (zk/data conn path)]
+    {:data (data/to-string (:data zk-result))
+     :stat (:stat zk-result)}))
+
+(defn zk-list
+  [conn path]
+  (zk/children conn path))
+
+(defn zk-list-with-stat
+  [conn path]
+  (let [stat (new Stat)
+        children (seq (.getChildren conn path false stat))]
+    {:children children
+     :stat (zi/stat-to-map stat)}))
+
+(defn zk-cas
+  [conn path old-value new-value]
+  (let [current-value (zk-get-str conn path)]
+    (if (= (parse-long (:data current-value)) old-value)
+      (do (zk-set conn path new-value (:version (:stat current-value)))
+          true))))
+
+(defn zk-add-to-set
+  [conn path elem]
+  (let [current-value (zk-get-str conn path)
+        current-set (read-string (:data current-value))
+        new-set (conj current-set elem)]
+    (zk-set conn path (pr-str new-set) (:version (:stat current-value)))))
+
+(defn zk-create-if-not-exists
+  [conn path data]
+  (zk/create conn path :data (data/to-bytes (str data)) :persistent? true))
+
+(defn zk-create-sequential
+  [conn path-prefix data]
+  (zk/create conn path-prefix :data (data/to-bytes (str data)) :persistent? true :sequential? true))
+
+(defn zk-multi-create-many-seq-nodes
+  [conn path-prefix num]
+  (let [txn (.transaction conn)]
+    (loop [i 0]
+      (cond (>= i num) (.commit txn)
+            :else (do (.create txn path-prefix
+                               (data/to-bytes "")
+                               (zi/acls :open-acl-unsafe)
+                               CreateMode/PERSISTENT_SEQUENTIAL)
+                      (recur (inc i)))))))
+
+; sync call not implemented in zookeeper-clj and don't have sync version in java API
+(defn zk-sync
+  [conn]
+  (zk-set conn "/" "" -1))
+
+(defn zk-parent-path
+  [path]
+  (let [rslash_pos (str/last-index-of path "/")]
+    (if (> rslash_pos 0)
+      (subs path 0 rslash_pos)
+      "/")))
+
+(defn zk-multi-delete-first-child
+  [conn path]
+  (let [{children :children stat :stat} (zk-list-with-stat conn path)
+        txn (.transaction conn)
+        first-child (first (sort children))]
+    (if (not (nil? first-child))
+      (try
+        (do (.check txn path (:version stat))
+            (.setData txn path (data/to-bytes "") -1) ; I'm just checking multitransactions
+            (.delete txn (str path first-child) -1)
+            (.commit txn)
+            first-child)
+        (catch KeeperException$BadVersionException _ nil)
+        ; Even if we got connection loss, delete may actually be executed.
+        ; This function is used for queue model, which strictly require
+        ; all enqueued elements to be dequeued, but allow duplicates.
+        ; So even in case when we not sure about delete we return first-child.
+        (catch Exception _ first-child))
+      nil)))
+
+(defn clickhouse-alive?
+  [node test]
+  (info "Checking server alive on" node)
+  (try
+    (c/exec binary-path :client :--query "SELECT 1")
+    (catch Exception _ false)))
+
+(defn wait-clickhouse-alive!
+  [node test & {:keys [maxtries] :or {maxtries 30}}]
+  (loop [i 0]
+    (cond (> i maxtries) false
+          (clickhouse-alive? node test) true
+          :else (do (Thread/sleep 1000) (recur (inc i))))))
+
+(defn kill-clickhouse!
+  [node test]
+  (info "Killing server on node" node)
+  (c/su
+   (cu/stop-daemon! binary-path pid-file-path)
+   (c/exec :rm :-fr (str data-dir "/status"))))
+
+(defn start-clickhouse!
+  [node test]
+  (info "Starting server on node" node)
+  (c/su
+   (cu/start-daemon!
+    {:pidfile pid-file-path
+     :logfile stderr-file
+     :chdir data-dir}
+    binary-path
+    :server
+    :--config (str configs-dir "/config.xml")
+    :--
+    :--path (str data-dir "/")
+    :--user_files_path (str data-dir "/user_files")
+    :--top_level_domains_path (str data-dir "/top_level_domains")
+    :--logger.log (str logs-dir "/clickhouse-server.log")
+    :--logger.errorlog (str logs-dir "/clickhouse-server.err.log")
+    :--test_keeper_server.snapshot_storage_path coordination-snapshots-dir
+    :--test_keeper_server.logs_storage_path coordination-logs-dir)
+   (wait-clickhouse-alive! node test)))
+
+(defn exec-with-retries
+  [retries f & args]
+  (let [res (try {:value (apply f args)}
+                 (catch Exception e
+                   (if (zero? retries)
+                     (throw e)
+                     {:exception e})))]
+    (if (:exception res)
+      (do (Thread/sleep 1000) (recur (dec retries) f args))
+      (:value res))))
diff --git a/tests/jepsen.nukeeper/test/jepsen/nukeeper_test.clj b/tests/jepsen.nukeeper/test/jepsen/nukeeper_test.clj
new file mode 100644
index 00000000000..db84ff33ee3
--- /dev/null
+++ b/tests/jepsen.nukeeper/test/jepsen/nukeeper_test.clj
@@ -0,0 +1,39 @@
+(ns jepsen.nukeeper-test
+  (:require [clojure.test :refer :all]
+            [jepsen.nukeeper.utils :refer :all]
+            [zookeeper :as zk]
+            [zookeeper.data :as data])
+  (:import (ch.qos.logback.classic Level)
+           (org.slf4j Logger LoggerFactory)))
+
+(defn multicreate
+  [conn]
+  (dorun (map (fn [v] (zk/create conn v :persistent? true)) (take 10 (zk-range)))))
+
+(defn multidelete
+  [conn]
+  (dorun (map (fn [v] (zk/delete conn v)) (take 10 (zk-range)))))
+
+(deftest a-test
+  (testing "nukeeper connection"
+    (.setLevel
+     (LoggerFactory/getLogger "org.apache.zookeeper") Level/OFF)
+    (let [conn (zk/connect "localhost:9181" :timeout-msec 5000)]
+      ;(println (take 10 (zk-range)))
+      ;(multidelete conn)
+      ;(multicreate conn)
+      ;(zk/create-all conn "/0")
+      ;(zk/create conn "/0")
+      ;(println (zk/children conn "/"))
+      ;(zk/set-data conn "/0" (data/to-bytes "777") -1)
+      (println (zk-parent-path "/sasds/dasda/das"))
+      (println (zk-parent-path "/sasds"))
+      (zk-multi-create-many-seq-nodes conn "/a-" 5)
+      (println (zk/children conn "/"))
+      (println (zk-list-with-stat conn "/"))
+      (println (zk-multi-delete-first-child conn "/"))
+      (println (zk-list-with-stat conn "/"))
+      ;(Thread/sleep 5000)
+      ;(println "VALUE" (data/to-string (:data (zk/data conn "/0"))))
+      ;(is (= (data/to-string (:data (zk/data conn "/0"))) "777"))
+      (zk/close conn))))
diff --git a/tests/performance/arithmetic.xml b/tests/performance/arithmetic.xml
index 0be61eb5823..bf5e7662e37 100644
--- a/tests/performance/arithmetic.xml
+++ b/tests/performance/arithmetic.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <settings>
         <max_memory_usage>30000000000</max_memory_usage>
     </settings>
diff --git a/tests/performance/array_join.xml b/tests/performance/array_join.xml
index ca280ce28ad..cf92b51f545 100644
--- a/tests/performance/array_join.xml
+++ b/tests/performance/array_join.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
 
 
 
diff --git a/tests/performance/bounding_ratio.xml b/tests/performance/bounding_ratio.xml
index e3a15f90013..e430136b624 100644
--- a/tests/performance/bounding_ratio.xml
+++ b/tests/performance/bounding_ratio.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <query>SELECT boundingRatio(number, number) FROM numbers(100000000)</query>
     <query>SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM numbers(100000000)</query>
 </test>
diff --git a/tests/performance/codec_none.xml b/tests/performance/codec_none.xml
new file mode 100644
index 00000000000..e6eb9773a66
--- /dev/null
+++ b/tests/performance/codec_none.xml
@@ -0,0 +1,13 @@
+<test>
+    <preconditions>
+        <table_exists>hits_10m_single</table_exists>
+    </preconditions>
+
+    <create_query>CREATE TABLE hits_none (Title String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple()</create_query>
+    <fill_query>INSERT INTO hits_none SELECT Title FROM test.hits</fill_query>
+    <fill_query>OPTIMIZE TABLE hits_none FINAL</fill_query>
+
+    <query><![CDATA[SELECT sum(length(Title)) FROM hits_none]]></query>
+
+    <drop_query>DROP TABLE hits_none</drop_query>
+</test>
diff --git a/tests/performance/codecs_float_insert.xml b/tests/performance/codecs_float_insert.xml
index a7cb5152c09..b282bcc268f 100644
--- a/tests/performance/codecs_float_insert.xml
+++ b/tests/performance/codecs_float_insert.xml
@@ -1,5 +1,5 @@
 <!-- FIXME this instability is abysmal, investigate the unstable queries -->
-<test>
+<test max_ignored_relative_change="0.2">
     <settings>
         <allow_suspicious_codecs>1</allow_suspicious_codecs>
     </settings>
diff --git a/tests/performance/codecs_int_insert.xml b/tests/performance/codecs_int_insert.xml
index caefaba3725..662df80ae70 100644
--- a/tests/performance/codecs_int_insert.xml
+++ b/tests/performance/codecs_int_insert.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <settings>
         <allow_suspicious_codecs>1</allow_suspicious_codecs>
     </settings>
diff --git a/tests/performance/collations.xml b/tests/performance/collations.xml
index 17b2d36b7e3..52ccede3798 100644
--- a/tests/performance/collations.xml
+++ b/tests/performance/collations.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
 
 
 
diff --git a/tests/performance/conditional.xml b/tests/performance/conditional.xml
index 21623f45b05..91b6cb95ff2 100644
--- a/tests/performance/conditional.xml
+++ b/tests/performance/conditional.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(if(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04')))</query>
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04')))</query>
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(if(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')]))</query>
diff --git a/tests/performance/constant_column_search.xml b/tests/performance/constant_column_search.xml
index cb76fd4cefb..71d8185d818 100644
--- a/tests/performance/constant_column_search.xml
+++ b/tests/performance/constant_column_search.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <tags>
         <tag>search</tag>
     </tags>
diff --git a/tests/performance/date_time_64.xml b/tests/performance/date_time_64.xml
index 838aba34d87..fd883416a33 100644
--- a/tests/performance/date_time_64.xml
+++ b/tests/performance/date_time_64.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <preconditions>
         <table_exists>hits_100m_single</table_exists>
     </preconditions>
diff --git a/tests/performance/date_time_long.xml b/tests/performance/date_time_long.xml
index 0c3d85f9659..c2eb42d3318 100644
--- a/tests/performance/date_time_long.xml
+++ b/tests/performance/date_time_long.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.3">
     <substitutions>
        <substitution>
            <name>datetime_transform</name>
diff --git a/tests/performance/direct_dictionary.xml b/tests/performance/direct_dictionary.xml
index eb1b4e0da00..68b52d917dd 100644
--- a/tests/performance/direct_dictionary.xml
+++ b/tests/performance/direct_dictionary.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.3">
     <create_query>
         CREATE TABLE simple_direct_dictionary_test_table
         (
diff --git a/tests/performance/float_formatting.xml b/tests/performance/float_formatting.xml
index d24ccd7664c..71d8aee3f89 100644
--- a/tests/performance/float_formatting.xml
+++ b/tests/performance/float_formatting.xml
@@ -3,7 +3,7 @@
     is 10 times faster than toString(number % 100 + 0.5). The shorter
     queries are somewhat unstable, so ignore differences less than 10%.
 -->
-<test max_ignored_relative_change="0.2">
+<test>
     <substitutions>
        <substitution>
            <name>expr</name>
diff --git a/tests/performance/float_parsing.xml b/tests/performance/float_parsing.xml
index 33ab8ba6f10..eb8577bd127 100644
--- a/tests/performance/float_parsing.xml
+++ b/tests/performance/float_parsing.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <substitutions>
        <substitution>
            <name>expr</name>
diff --git a/tests/performance/fuzz_bits.xml b/tests/performance/fuzz_bits.xml
index 2679977cb1d..87064e520c2 100644
--- a/tests/performance/fuzz_bits.xml
+++ b/tests/performance/fuzz_bits.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.3">
 
 
 
diff --git a/tests/performance/general_purpose_hashes.xml b/tests/performance/general_purpose_hashes.xml
index bd2fa9674f6..f34554360cf 100644
--- a/tests/performance/general_purpose_hashes.xml
+++ b/tests/performance/general_purpose_hashes.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <substitutions>
         <substitution>
            <name>gp_hash_func</name>
diff --git a/tests/performance/generate_table_function.xml b/tests/performance/generate_table_function.xml
index bc49a7de1bd..0339a8c19e8 100644
--- a/tests/performance/generate_table_function.xml
+++ b/tests/performance/generate_table_function.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 1000000000);</query>
     <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 0, 10, 10) LIMIT 1000000000);</query>
     <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 0, 10, 10) LIMIT 1000000000);</query>
diff --git a/tests/performance/group_by_sundy_li.xml b/tests/performance/group_by_sundy_li.xml
index c49712a8519..aebc305335c 100644
--- a/tests/performance/group_by_sundy_li.xml
+++ b/tests/performance/group_by_sundy_li.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test max_ignored_relative_change="0.4">
     <settings>
         <max_insert_threads>8</max_insert_threads>
     </settings>
diff --git a/tests/performance/if_array_string.xml b/tests/performance/if_array_string.xml
index 445b3c8c55a..773509e1c4b 100644
--- a/tests/performance/if_array_string.xml
+++ b/tests/performance/if_array_string.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.4">
+<test max_ignored_relative_change="0.3">
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : ['a', 'b', 'c'])</query>
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : ['a', 'b', 'c'])</query>
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : materialize(['a', 'b', 'c']))</query>
diff --git a/tests/performance/int_parsing.xml b/tests/performance/int_parsing.xml
index 3b8620e46c3..32f904331ce 100644
--- a/tests/performance/int_parsing.xml
+++ b/tests/performance/int_parsing.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <preconditions>
         <table_exists>hits_100m_single</table_exists>
         <table_exists>hits_10m_single</table_exists>
diff --git a/tests/performance/jit_small_requests.xml b/tests/performance/jit_small_requests.xml
index c9abec0926b..d8f917fb9af 100644
--- a/tests/performance/jit_small_requests.xml
+++ b/tests/performance/jit_small_requests.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <query>
         WITH
             bitXor(number, 0x4CF2D2BAAE6DA887) AS x0,
diff --git a/tests/performance/joins_in_memory.xml b/tests/performance/joins_in_memory.xml
index bac7679930f..fac6f2659c6 100644
--- a/tests/performance/joins_in_memory.xml
+++ b/tests/performance/joins_in_memory.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>
 
     <fill_query>INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000)</fill_query>
diff --git a/tests/performance/joins_in_memory_pmj.xml b/tests/performance/joins_in_memory_pmj.xml
index 5dd4395513d..87d1c0df14c 100644
--- a/tests/performance/joins_in_memory_pmj.xml
+++ b/tests/performance/joins_in_memory_pmj.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="1.3">
+<test max_ignored_relative_change="0.7">
     <create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>
 
     <settings>
diff --git a/tests/performance/logical_functions_medium.xml b/tests/performance/logical_functions_medium.xml
index be474894b54..19572191532 100644
--- a/tests/performance/logical_functions_medium.xml
+++ b/tests/performance/logical_functions_medium.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <settings>
         <max_threads>1</max_threads>
     </settings>
diff --git a/tests/performance/logical_functions_small.xml b/tests/performance/logical_functions_small.xml
index 3d70ef6811d..d5f6a7b99cb 100644
--- a/tests/performance/logical_functions_small.xml
+++ b/tests/performance/logical_functions_small.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <settings>
         <max_threads>1</max_threads>
     </settings>
diff --git a/tests/performance/math.xml b/tests/performance/math.xml
index 006e33548c9..35250351683 100644
--- a/tests/performance/math.xml
+++ b/tests/performance/math.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.3">
     <substitutions>
         <substitution>
            <name>func_slow</name>
diff --git a/tests/performance/optimized_select_final.xml b/tests/performance/optimized_select_final.xml
index 2c8254d2b88..d70fccc1330 100644
--- a/tests/performance/optimized_select_final.xml
+++ b/tests/performance/optimized_select_final.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test max_ignored_relative_change="0.3">
     <settings>
         <do_not_merge_across_partitions_select_final>1</do_not_merge_across_partitions_select_final>
     </settings>
diff --git a/tests/performance/optimized_select_final_one_part.xml b/tests/performance/optimized_select_final_one_part.xml
index 92c8eed859a..63541313ac9 100644
--- a/tests/performance/optimized_select_final_one_part.xml
+++ b/tests/performance/optimized_select_final_one_part.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <settings>
         <do_not_merge_across_partitions_select_final>1</do_not_merge_across_partitions_select_final>
     </settings>
diff --git a/tests/performance/or_null_default.xml b/tests/performance/or_null_default.xml
index 6fed0cce4d6..009719f66a5 100644
--- a/tests/performance/or_null_default.xml
+++ b/tests/performance/or_null_default.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test>
     <query>SELECT sumOrNull(number) FROM numbers(100000000)</query>
     <query>SELECT sumOrDefault(toNullable(number)) FROM numbers(100000000)</query>
     <query>SELECT sumOrNull(number) FROM numbers(10000000) GROUP BY number % 1024</query>
diff --git a/tests/performance/parse_engine_file.xml b/tests/performance/parse_engine_file.xml
index 2459ed084cd..d49670b36b5 100644
--- a/tests/performance/parse_engine_file.xml
+++ b/tests/performance/parse_engine_file.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.4">
+<test max_ignored_relative_change="0.2">
 
 <preconditions>
     <table_exists>test.hits</table_exists>
diff --git a/tests/performance/random_string.xml b/tests/performance/random_string.xml
index 1a740ae077a..79f12373f1c 100644
--- a/tests/performance/random_string.xml
+++ b/tests/performance/random_string.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <query>SELECT count() FROM zeros(100000000) WHERE NOT ignore(randomString(10))</query>
     <query>SELECT count() FROM zeros(100000000) WHERE NOT ignore(randomString(100))</query>
     <query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomString(1000))</query>
diff --git a/tests/performance/sum.xml b/tests/performance/sum.xml
index 32c194dab6f..9bee2a580c3 100644
--- a/tests/performance/sum.xml
+++ b/tests/performance/sum.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <query>SELECT sum(number) FROM numbers(100000000)</query>
     <query>SELECT sum(toUInt32(number)) FROM numbers(100000000)</query>
     <query>SELECT sum(toUInt16(number)) FROM numbers(100000000)</query>
diff --git a/tests/performance/sum_map.xml b/tests/performance/sum_map.xml
index bc9f9be2a18..b732c150220 100644
--- a/tests/performance/sum_map.xml
+++ b/tests/performance/sum_map.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.3">
     <settings>
         <max_threads>1</max_threads>
     </settings>
diff --git a/tests/performance/synthetic_hardware_benchmark.xml b/tests/performance/synthetic_hardware_benchmark.xml
index 4b94f73a21d..ffcf30db5cb 100644
--- a/tests/performance/synthetic_hardware_benchmark.xml
+++ b/tests/performance/synthetic_hardware_benchmark.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test max_ignored_relative_change="0.3">
     <settings>
         <max_memory_usage>30000000000</max_memory_usage>
     </settings>
diff --git a/tests/performance/url_hits.xml b/tests/performance/url_hits.xml
index a699ef6ba97..1813b2a72cb 100644
--- a/tests/performance/url_hits.xml
+++ b/tests/performance/url_hits.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <preconditions>
         <table_exists>hits_100m_single</table_exists>
         <table_exists>hits_10m_single</table_exists>
diff --git a/tests/performance/visit_param_extract_raw.xml b/tests/performance/visit_param_extract_raw.xml
index 67faeb1f743..358dcc9cc0e 100644
--- a/tests/performance/visit_param_extract_raw.xml
+++ b/tests/performance/visit_param_extract_raw.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test max_ignored_relative_change="0.3">
     <substitutions>
         <substitution>
            <name>param</name>
diff --git a/tests/performance/window_functions.xml b/tests/performance/window_functions.xml
index 622e349d060..6be3d59e2b0 100644
--- a/tests/performance/window_functions.xml
+++ b/tests/performance/window_functions.xml
@@ -110,4 +110,46 @@
         format Null
     </query>
 
+    <!-- Our variant of lead. -->
+    <query>
+        select leadInFrame(number) over w
+        from
+            (select number, intDiv(number, 1111) p, mod(number, 111) o
+                from numbers(10000000)) t
+        window w as (partition by p order by o
+            rows between unbounded preceding and unbounded following)
+        format Null
+    </query>
+
+    <!-- A faster replacement for lead with constant offset. -->
+    <query>
+        select any(number) over w
+        from
+            (select number, intDiv(number, 1111) p, mod(number, 111) o
+                from numbers(10000000)) t
+        window w as (partition by p order by o
+            rows between 1 following and 1 following)
+        format Null
+    </query>
+
+    <query>
+        select leadInFrame(number, number) over w
+        from
+            (select number, intDiv(number, 1111) p, mod(number, 111) o
+                from numbers(10000000)) t
+        window w as (partition by p order by o
+            rows between unbounded preceding and unbounded following)
+        format Null
+    </query>
+
+    <query>
+        select leadInFrame(number, number, number) over w
+        from
+            (select number, intDiv(number, 1111) p, mod(number, 111) o
+                from numbers(10000000)) t
+        window w as (partition by p order by o
+            rows between unbounded preceding and unbounded following)
+        format Null
+    </query>
+
 </test>
diff --git a/tests/queries/0_stateless/00966_invalid_json_must_not_parse.reference b/tests/queries/0_stateless/00966_invalid_json_must_not_parse.reference
index f7eb44d66e0..4521d575ff3 100644
--- a/tests/queries/0_stateless/00966_invalid_json_must_not_parse.reference
+++ b/tests/queries/0_stateless/00966_invalid_json_must_not_parse.reference
@@ -4,3 +4,7 @@
 0
 0
 0
+0
+0
+0
+0
diff --git a/tests/queries/0_stateless/00966_invalid_json_must_not_parse.sql b/tests/queries/0_stateless/00966_invalid_json_must_not_parse.sql
index afcbc78cfd5..0e7fa55dbae 100644
--- a/tests/queries/0_stateless/00966_invalid_json_must_not_parse.sql
+++ b/tests/queries/0_stateless/00966_invalid_json_must_not_parse.sql
@@ -3,6 +3,8 @@ SET allow_simdjson=1;
 SELECT JSONLength('"HX-=');
 SELECT JSONLength('[9]\0\x42\xD3\x36\xE3');
 SELECT JSONLength(unhex('5B30000E06D7AA5D'));
+SELECT JSONLength('{"success"test:"123"}');
+SELECT isValidJSON('{"success"test:"123"}');
 
 
 SET allow_simdjson=0;
@@ -10,3 +12,5 @@ SET allow_simdjson=0;
 SELECT JSONLength('"HX-=');
 SELECT JSONLength('[9]\0\x42\xD3\x36\xE3');
 SELECT JSONLength(unhex('5B30000E06D7AA5D'));
+SELECT JSONLength('{"success"test:"123"}');
+SELECT isValidJSON('{"success"test:"123"}');
diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh
index 1e61c8d64f3..fe6246e02f6 100755
--- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh
+++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh
@@ -74,7 +74,7 @@ timeout $TIMEOUT bash -c thread5 2> /dev/null &
 
 wait
 
-$CLICKHOUSE_CLIENT -n -q "
-    DROP TABLE alter_table;
-    DROP TABLE alter_table2
-"
+$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table;" &
+$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table2;" &
+
+wait
diff --git a/tests/queries/0_stateless/01053_ssd_dictionary.sql b/tests/queries/0_stateless/01053_ssd_dictionary.sql
index a23ae7e5e96..23a369cc8a6 100644
--- a/tests/queries/0_stateless/01053_ssd_dictionary.sql
+++ b/tests/queries/0_stateless/01053_ssd_dictionary.sql
@@ -76,7 +76,7 @@ CREATE DICTIONARY 01053_db.ssd_dict
 PRIMARY KEY id
 SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB '01053_db'))
 LIFETIME(MIN 1000 MAX 2000)
-LAYOUT(SSD_CACHE(FILE_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000));
+LAYOUT(SSD_CACHE(FILE_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096));
 
 SELECT 'UPDATE DICTIONARY';
 -- 118
@@ -142,7 +142,7 @@ CREATE DICTIONARY 01053_db.ssd_dict
 PRIMARY KEY id
 SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB '01053_db'))
 LIFETIME(MIN 1000 MAX 2000)
-LAYOUT(SSD_CACHE(FILE_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/2d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 1024 MAX_STORED_KEYS 10));
+LAYOUT(SSD_CACHE(FILE_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/2d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 1024));
 
 SELECT 'UPDATE DICTIONARY (MT)';
 -- 118
diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql
index 50b34c4b18f..cd3e52c9691 100644
--- a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql
+++ b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql
@@ -98,7 +98,7 @@ CREATE DICTIONARY 01280_db.ssd_dict
 PRIMARY KEY k1, k2
 SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB '01280_db'))
 LIFETIME(MIN 1000 MAX 2000)
-LAYOUT(COMPLEX_KEY_SSD_CACHE(FILE_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000));
+LAYOUT(COMPLEX_KEY_SSD_CACHE(FILE_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096));
 
 SELECT 'UPDATE DICTIONARY';
 -- 118
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index e31f8476326..14e5889a811 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -1002,6 +1002,32 @@ from numbers(5);
 1	3
 2	4
 3	\N
+-- variants of lag/lead that respect the frame
+select number, p, pp,
+    lagInFrame(number, number - pp, number * 11) over w as lag,
+    leadInFrame(number, number - pp, number * 11) over w as lead
+from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16))
+window w as (partition by p order by number
+    rows between unbounded preceding and unbounded following)
+order by number
+settings max_block_size = 3;
+;
+0	0	0	0	0
+1	0	0	0	2
+2	0	0	0	4
+3	0	0	0	33
+4	0	0	0	44
+5	1	5	5	5
+6	1	5	5	7
+7	1	5	5	9
+8	1	5	5	88
+9	1	5	5	99
+10	2	10	10	10
+11	2	10	10	12
+12	2	10	10	14
+13	2	10	10	143
+14	2	10	10	154
+15	3	15	15	15
 -- case-insensitive SQL-standard synonyms for any and anyLast
 select
     number,
@@ -1026,3 +1052,16 @@ order by number
 select count() over () from numbers(4) where number < 2;
 2
 2
+-- floating point RANGE frame
+select
+    count(*) over (order by (toFloat32(number) as f32) range 5. preceding),
+    count(*) over (order by (toFloat64(number) as f64) range 5. preceding)
+from numbers(7)
+;
+1	1
+2	2
+3	3
+4	4
+5	5
+6	6
+6	6
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 7a2d2522038..30847e09246 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -347,6 +347,17 @@ select
         over (order by number rows between 1 following and 1 following)
 from numbers(5);
 
+-- variants of lag/lead that respect the frame
+select number, p, pp,
+    lagInFrame(number, number - pp, number * 11) over w as lag,
+    leadInFrame(number, number - pp, number * 11) over w as lead
+from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16))
+window w as (partition by p order by number
+    rows between unbounded preceding and unbounded following)
+order by number
+settings max_block_size = 3;
+;
+
 -- case-insensitive SQL-standard synonyms for any and anyLast
 select
     number,
@@ -360,3 +371,10 @@ order by number
 -- In this case, we had a problem with PartialSortingTransform returning zero-row
 -- chunks for input chunks w/o columns.
 select count() over () from numbers(4) where number < 2;
+
+-- floating point RANGE frame
+select
+    count(*) over (order by (toFloat32(number) as f32) range 5. preceding),
+    count(*) over (order by (toFloat64(number) as f64) range 5. preceding)
+from numbers(7)
+;
diff --git a/tests/queries/0_stateless/01601_custom_tld.reference b/tests/queries/0_stateless/01601_custom_tld.reference
index 98b99778396..e056505f273 100644
--- a/tests/queries/0_stateless/01601_custom_tld.reference
+++ b/tests/queries/0_stateless/01601_custom_tld.reference
@@ -1,11 +1,24 @@
-no-tld
+-- no-tld
+
+foo.there-is-no-such-domain
+foo.there-is-no-such-domain
 
 foo.there-is-no-such-domain
 foo.there-is-no-such-domain
 foo
-generic
+-- generic
 kernel
 kernel.biz.ss
-difference
+-- difference
 biz.ss
 kernel.biz.ss
+-- 3+level
+xx.blogspot.co.at
+blogspot
+xx.blogspot.co.at
+blogspot
+-- url
+foobar.com
+foobar.com
+foobar.com
+xx.blogspot.co.at
diff --git a/tests/queries/0_stateless/01601_custom_tld.sql b/tests/queries/0_stateless/01601_custom_tld.sql
index 6d68299c07d..688dd419858 100644
--- a/tests/queries/0_stateless/01601_custom_tld.sql
+++ b/tests/queries/0_stateless/01601_custom_tld.sql
@@ -1,16 +1,31 @@
-select 'no-tld';
-select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain', 'public_suffix_list');
+select '-- no-tld';
 -- even if there is no TLD, 2-nd level by default anyway
 -- FIXME: make this behavior optional (so that TLD for host never changed, either empty or something real)
+select cutToFirstSignificantSubdomain('there-is-no-such-domain');
+select cutToFirstSignificantSubdomain('foo.there-is-no-such-domain');
+select cutToFirstSignificantSubdomain('bar.foo.there-is-no-such-domain');
+select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain', 'public_suffix_list');
 select cutToFirstSignificantSubdomainCustom('foo.there-is-no-such-domain', 'public_suffix_list');
 select cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
 select firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
 
-select 'generic';
-select firstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss
+select '-- generic';
+select firstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel
 select cutToFirstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss
 
-select 'difference';
+select '-- difference';
 -- biz.ss is not in the default TLD list, hence:
 select cutToFirstSignificantSubdomain('foo.kernel.biz.ss'); -- biz.ss
 select cutToFirstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss
+
+select '-- 3+level';
+select cutToFirstSignificantSubdomainCustom('xx.blogspot.co.at', 'public_suffix_list'); -- xx.blogspot.co.at
+select firstSignificantSubdomainCustom('xx.blogspot.co.at', 'public_suffix_list'); -- blogspot
+select cutToFirstSignificantSubdomainCustom('foo.bar.xx.blogspot.co.at', 'public_suffix_list'); -- xx.blogspot.co.at
+select firstSignificantSubdomainCustom('foo.bar.xx.blogspot.co.at', 'public_suffix_list'); -- blogspot
+
+select '-- url';
+select cutToFirstSignificantSubdomainCustom('http://foobar.com', 'public_suffix_list');
+select cutToFirstSignificantSubdomainCustom('http://foobar.com/foo', 'public_suffix_list');
+select cutToFirstSignificantSubdomainCustom('http://bar.foobar.com/foo', 'public_suffix_list');
+select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at', 'public_suffix_list');
diff --git a/tests/queries/0_stateless/01649_with_alias_key_condition.sql b/tests/queries/0_stateless/01649_with_alias_key_condition.sql
index b813e6ee84f..0a796f8512e 100644
--- a/tests/queries/0_stateless/01649_with_alias_key_condition.sql
+++ b/tests/queries/0_stateless/01649_with_alias_key_condition.sql
@@ -6,6 +6,6 @@ insert into alias_key_condition values (1, 2), (3, 4);
 
 set force_primary_key = 1;
 
-with i as k select * from alias_key_condition where k = 3;
+with i as k select * from alias_key_condition where k = (select i from alias_key_condition where i = 3);
 
 drop table if exists alias_key_condition;
diff --git a/tests/queries/0_stateless/01681_cache_dictionary_simple_key.sql b/tests/queries/0_stateless/01681_cache_dictionary_simple_key.sql
index ee2cde963d7..f200ead341b 100644
--- a/tests/queries/0_stateless/01681_cache_dictionary_simple_key.sql
+++ b/tests/queries/0_stateless/01681_cache_dictionary_simple_key.sql
@@ -40,7 +40,7 @@ SELECT dictGetOrDefault('01681_database_for_cache_dictionary.cache_dictionary_si
 SELECT 'dictHas';
 SELECT dictHas('01681_database_for_cache_dictionary.cache_dictionary_simple_key_simple_attributes', number) FROM system.numbers LIMIT 4;
 SELECT 'select all values as input stream';
-SELECT * FROM 01681_database_for_cache_dictionary.cache_dictionary_simple_key_simple_attributes;
+SELECT * FROM 01681_database_for_cache_dictionary.cache_dictionary_simple_key_simple_attributes ORDER BY id;
 
 DROP DICTIONARY 01681_database_for_cache_dictionary.cache_dictionary_simple_key_simple_attributes;
 DROP TABLE 01681_database_for_cache_dictionary.simple_key_simple_attributes_source_table;
@@ -84,7 +84,7 @@ SELECT dictGetOrDefault('01681_database_for_cache_dictionary.cache_dictionary_si
 SELECT 'dictHas';
 SELECT dictHas('01681_database_for_cache_dictionary.cache_dictionary_simple_key_complex_attributes', number) FROM system.numbers LIMIT 4;
 SELECT 'select all values as input stream';
-SELECT * FROM 01681_database_for_cache_dictionary.cache_dictionary_simple_key_complex_attributes;
+SELECT * FROM 01681_database_for_cache_dictionary.cache_dictionary_simple_key_complex_attributes ORDER BY id;
 
 DROP DICTIONARY 01681_database_for_cache_dictionary.cache_dictionary_simple_key_complex_attributes;
 DROP TABLE 01681_database_for_cache_dictionary.simple_key_complex_attributes_source_table;
diff --git a/tests/queries/0_stateless/01682_cache_dictionary_complex_key.sql b/tests/queries/0_stateless/01682_cache_dictionary_complex_key.sql
index 65c56090c47..4cc83412457 100644
--- a/tests/queries/0_stateless/01682_cache_dictionary_complex_key.sql
+++ b/tests/queries/0_stateless/01682_cache_dictionary_complex_key.sql
@@ -42,7 +42,7 @@ SELECT dictGetOrDefault('01682_database_for_cache_dictionary.cache_dictionary_co
 SELECT 'dictHas';
 SELECT dictHas('01682_database_for_cache_dictionary.cache_dictionary_complex_key_simple_attributes', (number, concat('id_key_', toString(number)))) FROM system.numbers LIMIT 4;
 SELECT 'select all values as input stream';
-SELECT * FROM 01682_database_for_cache_dictionary.cache_dictionary_complex_key_simple_attributes;
+SELECT * FROM 01682_database_for_cache_dictionary.cache_dictionary_complex_key_simple_attributes ORDER BY id;
 
 DROP DICTIONARY 01682_database_for_cache_dictionary.cache_dictionary_complex_key_simple_attributes;
 DROP TABLE 01682_database_for_cache_dictionary.complex_key_simple_attributes_source_table;
@@ -89,7 +89,7 @@ SELECT dictGetOrDefault('01682_database_for_cache_dictionary.cache_dictionary_co
 SELECT 'dictHas';
 SELECT dictHas('01682_database_for_cache_dictionary.cache_dictionary_complex_key_complex_attributes', (number, concat('id_key_', toString(number)))) FROM system.numbers LIMIT 4;
 SELECT 'select all values as input stream';
-SELECT * FROM 01682_database_for_cache_dictionary.cache_dictionary_complex_key_complex_attributes;
+SELECT * FROM 01682_database_for_cache_dictionary.cache_dictionary_complex_key_complex_attributes ORDER BY id;
 
 DROP DICTIONARY 01682_database_for_cache_dictionary.cache_dictionary_complex_key_complex_attributes;
 DROP TABLE 01682_database_for_cache_dictionary.complex_key_complex_attributes_source_table;
diff --git a/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sql b/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sql
index 3b327257fc4..9dbad1289f1 100644
--- a/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sql
+++ b/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sql
@@ -40,7 +40,7 @@ SELECT dictGetOrDefault('01684_database_for_cache_dictionary.cache_dictionary_si
 SELECT 'dictHas';
 SELECT dictHas('01684_database_for_cache_dictionary.cache_dictionary_simple_key_simple_attributes', number) FROM system.numbers LIMIT 4;
 SELECT 'select all values as input stream';
-SELECT * FROM 01684_database_for_cache_dictionary.cache_dictionary_simple_key_simple_attributes;
+SELECT * FROM 01684_database_for_cache_dictionary.cache_dictionary_simple_key_simple_attributes ORDER BY id;
 
 DROP DICTIONARY 01684_database_for_cache_dictionary.cache_dictionary_simple_key_simple_attributes;
 DROP TABLE 01684_database_for_cache_dictionary.simple_key_simple_attributes_source_table;
@@ -84,7 +84,7 @@ SELECT dictGetOrDefault('01684_database_for_cache_dictionary.cache_dictionary_si
 SELECT 'dictHas';
 SELECT dictHas('01684_database_for_cache_dictionary.cache_dictionary_simple_key_complex_attributes', number) FROM system.numbers LIMIT 4;
 SELECT 'select all values as input stream';
-SELECT * FROM 01684_database_for_cache_dictionary.cache_dictionary_simple_key_complex_attributes;
+SELECT * FROM 01684_database_for_cache_dictionary.cache_dictionary_simple_key_complex_attributes ORDER BY id;
 
 DROP DICTIONARY 01684_database_for_cache_dictionary.cache_dictionary_simple_key_complex_attributes;
 DROP TABLE 01684_database_for_cache_dictionary.simple_key_complex_attributes_source_table;
diff --git a/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sql b/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sql
index 1757b136d3e..03a7e1d80df 100644
--- a/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sql
+++ b/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sql
@@ -42,7 +42,7 @@ SELECT dictGetOrDefault('01685_database_for_cache_dictionary.cache_dictionary_co
 SELECT 'dictHas';
 SELECT dictHas('01685_database_for_cache_dictionary.cache_dictionary_complex_key_simple_attributes', (number, concat('id_key_', toString(number)))) FROM system.numbers LIMIT 4;
 SELECT 'select all values as input stream';
-SELECT * FROM 01685_database_for_cache_dictionary.cache_dictionary_complex_key_simple_attributes;
+SELECT * FROM 01685_database_for_cache_dictionary.cache_dictionary_complex_key_simple_attributes ORDER BY id;
 
 DROP DICTIONARY 01685_database_for_cache_dictionary.cache_dictionary_complex_key_simple_attributes;
 DROP TABLE 01685_database_for_cache_dictionary.complex_key_simple_attributes_source_table;
@@ -89,10 +89,10 @@ SELECT dictGetOrDefault('01685_database_for_cache_dictionary.cache_dictionary_co
 SELECT 'dictHas';
 SELECT dictHas('01685_database_for_cache_dictionary.cache_dictionary_complex_key_complex_attributes', (number, concat('id_key_', toString(number)))) FROM system.numbers LIMIT 4;
 SELECT 'select all values as input stream';
-SELECT * FROM 01685_database_for_cache_dictionary.cache_dictionary_complex_key_complex_attributes;
+SELECT * FROM 01685_database_for_cache_dictionary.cache_dictionary_complex_key_complex_attributes ORDER BY id;
 
 DROP DICTIONARY 01685_database_for_cache_dictionary.cache_dictionary_complex_key_complex_attributes;
 DROP TABLE 01685_database_for_cache_dictionary.complex_key_complex_attributes_source_table;
 
 DROP DATABASE 01685_database_for_cache_dictionary;
-                                                                                                                                          
+
diff --git a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference
new file mode 100644
index 00000000000..95479cf37ba
--- /dev/null
+++ b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference
@@ -0,0 +1,28 @@
+SELECT
+    x,
+    y,
+    z
+FROM prewhere_move_select_final
+PREWHERE y > 100
+SELECT
+    x,
+    y,
+    z
+FROM prewhere_move_select_final
+FINAL
+PREWHERE y > 100
+SELECT
+    x,
+    y,
+    z
+FROM prewhere_move_select_final
+FINAL
+WHERE z > 400
+SELECT
+    x,
+    y,
+    z
+FROM prewhere_move_select_final
+FINAL
+PREWHERE y > 100
+WHERE (y > 100) AND (z > 400)
diff --git a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql
new file mode 100644
index 00000000000..a3a882c461a
--- /dev/null
+++ b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS prewhere_move_select_final;
+CREATE TABLE prewhere_move_select_final (x Int, y Int, z Int) ENGINE = ReplacingMergeTree() ORDER BY (x, y);
+INSERT INTO prewhere_move_select_final SELECT number, number * 2, number * 3 FROM numbers(1000);
+
+-- order key can be pushed down with final
+EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE y > 100;
+EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100;
+
+-- can not be pushed down
+EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400;
+
+-- only y can be pushed down
+EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400;
+
+DROP TABLE prewhere_move_select_final;
diff --git a/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect
new file mode 100755
index 00000000000..65b9bde235b
--- /dev/null
+++ b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect
@@ -0,0 +1,25 @@
+#!/usr/bin/expect -f
+
+log_user 0
+set timeout 5
+match_max 100000
+# A default timeout action is to do nothing, change it to fail
+expect_after {
+    timeout {
+        exit 2
+    }
+}
+
+set basedir [file dirname $argv0]
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT"
+expect ":) "
+
+# regression for heap-buffer-overflow issue (under ASAN)
+send -- "/**"
+expect "/**"
+# just in case few more bytes
+send -- "foobar"
+expect "/**foobar"
+
+send -- "\3\4"
+expect eof
diff --git a/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.reference b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01767_timezoneOf.reference b/tests/queries/0_stateless/01767_timezoneOf.reference
new file mode 100644
index 00000000000..0a8a8c32d4e
--- /dev/null
+++ b/tests/queries/0_stateless/01767_timezoneOf.reference
@@ -0,0 +1 @@
+Asia/Tehran	Asia/Tehran	Asia/Tehran	Africa/Accra	Pacific/Pitcairn
diff --git a/tests/queries/0_stateless/01767_timezoneOf.sh b/tests/queries/0_stateless/01767_timezoneOf.sh
new file mode 100755
index 00000000000..9dee051ee3f
--- /dev/null
+++ b/tests/queries/0_stateless/01767_timezoneOf.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+TZ=Asia/Tehran $CLICKHOUSE_LOCAL --query "SELECT timezone(), timezoneOf(now()), timeZone(), timeZoneOf(toTimezone(toNullable(now()), 'Africa/Accra')), timeZoneOf(toTimeZone(now64(3), 'Pacific/Pitcairn'))"
diff --git a/tests/queries/0_stateless/01770_add_months_ubsan.reference b/tests/queries/0_stateless/01770_add_months_ubsan.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/01770_add_months_ubsan.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/01770_add_months_ubsan.sql b/tests/queries/0_stateless/01770_add_months_ubsan.sql
new file mode 100644
index 00000000000..039434ff9bc
--- /dev/null
+++ b/tests/queries/0_stateless/01770_add_months_ubsan.sql
@@ -0,0 +1,2 @@
+-- Result does not make sense but UBSan report should not be triggered.
+SELECT ignore(now() + INTERVAL 9223372036854775807 MONTH);
diff --git a/tests/queries/0_stateless/01773_min_max_time_system_parts_datetime64.reference b/tests/queries/0_stateless/01773_min_max_time_system_parts_datetime64.reference
new file mode 100644
index 00000000000..1cea52ec1c2
--- /dev/null
+++ b/tests/queries/0_stateless/01773_min_max_time_system_parts_datetime64.reference
@@ -0,0 +1,2 @@
+2000-01-02 03:04:05	2001-02-03 04:05:06
+2000-01-02 03:04:05	2001-02-03 04:05:06
diff --git a/tests/queries/0_stateless/01773_min_max_time_system_parts_datetime64.sql b/tests/queries/0_stateless/01773_min_max_time_system_parts_datetime64.sql
new file mode 100644
index 00000000000..5a1f809b03b
--- /dev/null
+++ b/tests/queries/0_stateless/01773_min_max_time_system_parts_datetime64.sql
@@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (time DateTime64(3)) ENGINE = MergeTree ORDER BY tuple() PARTITION BY toStartOfInterval(time, INTERVAL 2 YEAR);
+
+INSERT INTO test VALUES ('2000-01-02 03:04:05.123'), ('2001-02-03 04:05:06.789');
+
+SELECT min_time, max_time FROM system.parts WHERE table = 'test' AND database = currentDatabase();
+SELECT min_time, max_time FROM system.parts_columns WHERE table = 'test' AND database = currentDatabase();
+
+DROP TABLE test;
diff --git a/tests/queries/0_stateless/01774_bar_with_illegal_value.reference b/tests/queries/0_stateless/01774_bar_with_illegal_value.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01774_bar_with_illegal_value.sql b/tests/queries/0_stateless/01774_bar_with_illegal_value.sql
new file mode 100644
index 00000000000..60c7f303c13
--- /dev/null
+++ b/tests/queries/0_stateless/01774_bar_with_illegal_value.sql
@@ -0,0 +1 @@
+SELECT greatCircleAngle(1048575, 257, -9223372036854775808, 1048576) - NULL, bar(7, -inf, 1024); -- { serverError 36 } 
diff --git a/tests/queries/0_stateless/01774_tuple_null_in.reference b/tests/queries/0_stateless/01774_tuple_null_in.reference
new file mode 100644
index 00000000000..aa47d0d46d4
--- /dev/null
+++ b/tests/queries/0_stateless/01774_tuple_null_in.reference
@@ -0,0 +1,2 @@
+0
+0
diff --git a/tests/queries/0_stateless/01774_tuple_null_in.sql b/tests/queries/0_stateless/01774_tuple_null_in.sql
new file mode 100644
index 00000000000..a9cc39e8840
--- /dev/null
+++ b/tests/queries/0_stateless/01774_tuple_null_in.sql
@@ -0,0 +1,2 @@
+SELECT (NULL, NULL) = (8, 0) OR (NULL, NULL) = (3, 2) OR (NULL, NULL) = (0, 0) OR (NULL, NULL) = (3, 1);
+SELECT (NULL, NULL) IN ((NULL, 0), (3, 1), (3, 2), (8, 0), (NULL, NULL));
diff --git a/tests/queries/0_stateless/01776_decrypt_aead_size_check.reference b/tests/queries/0_stateless/01776_decrypt_aead_size_check.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01776_decrypt_aead_size_check.sql b/tests/queries/0_stateless/01776_decrypt_aead_size_check.sql
new file mode 100644
index 00000000000..8730ed0eda2
--- /dev/null
+++ b/tests/queries/0_stateless/01776_decrypt_aead_size_check.sql
@@ -0,0 +1 @@
+SELECT decrypt('aes-128-gcm', 'text', 'key', 'IV'); -- { serverError 36 }
diff --git a/tests/queries/0_stateless/01777_map_populate_series_ubsan.reference b/tests/queries/0_stateless/01777_map_populate_series_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01777_map_populate_series_ubsan.sql b/tests/queries/0_stateless/01777_map_populate_series_ubsan.sql
new file mode 100644
index 00000000000..5a8c182425a
--- /dev/null
+++ b/tests/queries/0_stateless/01777_map_populate_series_ubsan.sql
@@ -0,0 +1,2 @@
+-- Should correctly throw exception about overflow:
+SELECT mapPopulateSeries([-9223372036854775808, toUInt32(2)], [toUInt32(1023), -1]); -- { serverError 128 }
diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 0135fc6437a..1b333a6baec 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -212,6 +212,7 @@
 01017_uniqCombined_memory_usage
 01747_join_view_filter_dictionary
 01748_dictionary_table_dot
+01755_client_highlight_multi_line_comment_regression
 00950_dict_get
 01683_flat_dictionary
 01681_cache_dictionary_simple_key
diff --git a/tests/queries/1_stateful/00162_mmap_compression_none.reference b/tests/queries/1_stateful/00162_mmap_compression_none.reference
new file mode 100644
index 00000000000..3495cc537c1
--- /dev/null
+++ b/tests/queries/1_stateful/00162_mmap_compression_none.reference
@@ -0,0 +1 @@
+687074654
diff --git a/tests/queries/1_stateful/00162_mmap_compression_none.sql b/tests/queries/1_stateful/00162_mmap_compression_none.sql
new file mode 100644
index 00000000000..2178644214a
--- /dev/null
+++ b/tests/queries/1_stateful/00162_mmap_compression_none.sql
@@ -0,0 +1,8 @@
+DROP TABLE IF EXISTS hits_none;
+CREATE TABLE hits_none (Title String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple();
+INSERT INTO hits_none SELECT Title FROM test.hits;
+
+SET min_bytes_to_use_mmap_io = 1;
+SELECT sum(length(Title)) FROM hits_none;
+
+DROP TABLE hits_none;
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 981cf69d676..77d4a9b8499 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -95,7 +95,8 @@
         "01370_client_autocomplete_word_break_characters",
         "01676_clickhouse_client_autocomplete",
         "01193_metadata_loading",
-        "01455_time_zones"
+        "01455_time_zones",
+        "01755_client_highlight_multi_line_comment_regression"
     ],
     "release-build": [
     ],
@@ -582,6 +583,7 @@
         "00980_zookeeper_merge_tree_alter_settings",
         "00988_constraints_replication_zookeeper",
         "00989_parallel_parts_loading",
+        "00992_system_parts_race_condition_zookeeper_long",
         "00993_system_parts_race_condition_drop_zookeeper",
         "01012_show_tables_limit",
         "01013_sync_replica_timeout_zookeeper",
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index d534fd8fd4f..d3e1c2acd69 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -21,6 +21,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS)
     add_subdirectory (corrector_utf8)
     add_subdirectory (zookeeper-cli)
     add_subdirectory (zookeeper-test)
+    add_subdirectory (nukeeper-data-dumper)
     add_subdirectory (zookeeper-dump-tree)
     add_subdirectory (zookeeper-remove-by-list)
     add_subdirectory (zookeeper-create-entry-to-download-part)
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 231d22b50da..799492cdd90 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,5 +1,7 @@
+v21.3.4.25-lts	2021-03-28
 v21.3.3.14-lts	2021-03-19
 v21.3.2.5-lts	2021-03-12
+v21.2.7.11-stable	2021-03-28
 v21.2.6.1-stable	2021-03-15
 v21.2.5.5-stable	2021-03-02
 v21.2.4.6-stable	2021-02-20
diff --git a/utils/nukeeper-data-dumper/CMakeLists.txt b/utils/nukeeper-data-dumper/CMakeLists.txt
new file mode 100644
index 00000000000..bab1137bf4d
--- /dev/null
+++ b/utils/nukeeper-data-dumper/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_executable(nukeeper-data-dumper main.cpp)
+target_link_libraries(nukeeper-data-dumper PRIVATE dbms)
diff --git a/utils/nukeeper-data-dumper/main.cpp b/utils/nukeeper-data-dumper/main.cpp
new file mode 100644
index 00000000000..c80aeb473e2
--- /dev/null
+++ b/utils/nukeeper-data-dumper/main.cpp
@@ -0,0 +1,87 @@
+#include <Poco/ConsoleChannel.h>
+#include <Poco/Logger.h>
+#include <Coordination/NuKeeperStateMachine.h>
+#include <Common/ZooKeeper/ZooKeeperCommon.h>
+#include <Common/ZooKeeper/ZooKeeperIO.h>
+#include <Common/Exception.h>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
+#include <Coordination/NuKeeperLogStore.h>
+#include <Coordination/Changelog.h>
+#include <common/logger_useful.h>
+
+using namespace Coordination;
+using namespace DB;
+
+void dumpMachine(std::shared_ptr<NuKeeperStateMachine> machine)
+{
+    auto & storage = machine->getStorage();
+    std::queue<std::string> keys;
+    keys.push("/");
+
+    while (!keys.empty())
+    {
+        auto key = keys.front();
+        keys.pop();
+        std::cout << key << "\n";
+        auto value = storage.container.getValue(key);
+        std::cout << "\tStat: {version: " << value.stat.version <<
+            ", mtime: " << value.stat.mtime <<
+            ", emphemeralOwner: " << value.stat.ephemeralOwner <<
+            ", czxid: " << value.stat.czxid <<
+            ", mzxid: " << value.stat.mzxid <<
+            ", numChildren: " << value.stat.numChildren <<
+            ", dataLength: " << value.stat.dataLength <<
+            "}" << std::endl;
+        std::cout << "\tData: " << storage.container.getValue(key).data << std::endl;
+
+        for (const auto & child : value.children)
+        {
+            if (key == "/")
+                keys.push(key + child);
+            else
+                keys.push(key + "/" + child);
+        }
+    }
+    std::cout << std::flush;
+}
+
+int main(int argc, char *argv[])
+{
+    if (argc != 3)
+    {
+        std::cerr << "usage: " << argv[0] << " snapshotpath logpath" << std::endl;
+        return 3;
+    }
+    else
+    {
+        Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
+        Poco::Logger::root().setChannel(channel);
+        Poco::Logger::root().setLevel("trace");
+    }
+    auto * logger = &Poco::Logger::get("nukeeper-dumper");
+    ResponsesQueue queue;
+    SnapshotsQueue snapshots_queue{1};
+    CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
+    auto state_machine = std::make_shared<NuKeeperStateMachine>(queue, snapshots_queue, argv[1], settings);
+    state_machine->init();
+    size_t last_commited_index = state_machine->last_commit_index();
+
+    LOG_INFO(logger, "Last committed index: {}", last_commited_index);
+
+    DB::NuKeeperLogStore changelog(argv[2], 10000000, true);
+    changelog.init(last_commited_index, 10000000000UL); /// collect all logs
+    if (changelog.size() == 0)
+        LOG_INFO(logger, "Changelog empty");
+    else
+        LOG_INFO(logger, "Last changelog entry {}", changelog.next_slot() - 1);
+
+    for (size_t i = last_commited_index + 1; i < changelog.next_slot(); ++i)
+    {
+        if (changelog.entry_at(i)->get_val_type() == nuraft::log_val_type::app_log)
+            state_machine->commit(i, changelog.entry_at(i)->get_buf());
+    }
+
+    dumpMachine(state_machine);
+
+    return 0;
+}
diff --git a/website/benchmark/hardware/index.html b/website/benchmark/hardware/index.html
index 92da6328f0f..a57930b279d 100644
--- a/website/benchmark/hardware/index.html
+++ b/website/benchmark/hardware/index.html
@@ -75,6 +75,7 @@ Results for Raspberry Pi and Digital Ocean CPU-optimized are from <b>Fritz Wijay
 Results for Digitalocean (Storage-intesinve VMs) + (CPU/GP) are from <b>Yiğit Konur</b> and <b>Metehan Çetinkaya</b> of seo.do.<br/>
 Results for 2x AMD EPYC 7F72 3.2 Ghz (Total 96 Cores, IBM Cloud's Bare Metal Service) from <b>Yiğit Konur</b> and <b>Metehan Çetinkaya</b> of seo.do.<br/>
 Results for 2x AMD EPYC 7742 (128 physical cores, 1 TB DDR4-3200 RAM) from <b>Yedige Davletgaliyev</b> and <b>Nikita Zhavoronkov</b> of blockchair.com.<br/>
+Results for ASUS A15 (Ryzen laptop) are from <b>Kimmo Linna</b>.<br/>
 </p>
     </div>
 </div>
diff --git a/website/benchmark/hardware/results/asus_a15.json b/website/benchmark/hardware/results/asus_a15.json
new file mode 100644
index 00000000000..983dbde8681
--- /dev/null
+++ b/website/benchmark/hardware/results/asus_a15.json
@@ -0,0 +1,54 @@
+[
+    {
+        "system":       "Asus A15",
+        "system_full":  "Asus A15 (16 × AMD Ryzen 7 4800H, 16 GiB RAM)",
+        "time":         "2021-03-23 00:00:00",
+        "kind":         "laptop",
+        "result":
+        [
+[0.004, 0.003, 0.003],
+[0.019, 0.013, 0.012],
+[0.053, 0.041, 0.037],
+[0.106, 0.057, 0.056],
+[0.158, 0.115, 0.110],
+[0.324, 0.266, 0.262],
+[0.027, 0.024, 0.026],
+[0.017, 0.016, 0.017],
+[0.644, 0.589, 0.582],
+[0.733, 0.679, 0.679],
+[0.233, 0.201, 0.197],
+[0.276, 0.235, 0.236],
+[1.025, 0.962, 0.962],
+[1.342, 1.270, 1.264],
+[1.170, 1.129, 1.124],
+[1.375, 1.346, 1.351],
+[3.271, 3.210, 3.242],
+[1.960, 1.898, 1.907],
+[5.997, 5.965, 5.983],
+[0.106, 0.065, 0.055],
+[1.264, 0.990, 0.989],
+[1.555, 1.241, 1.239],
+[3.798, 3.307, 3.280],
+[1.949, 1.022, 0.995],
+[0.393, 0.292, 0.292],
+[0.307, 0.254, 0.255],
+[0.378, 0.297, 0.290],
+[1.632, 1.399, 1.386],
+[2.111, 1.909, 1.900],
+[3.349, 3.352, 3.357],
+[0.892, 0.824, 0.816],
+[1.505, 1.392, 1.378],
+[9.105, 8.951, 8.914],
+[5.195, 4.975, 4.919],
+[5.150, 5.021, 4.955],
+[1.756, 1.743, 1.749],
+[0.161, 0.154, 0.158],
+[0.108, 0.058, 0.055],
+[0.101, 0.102, 0.052],
+[0.365, 0.309, 0.334],
+[0.050, 0.023, 0.023],
+[0.037, 0.019, 0.015],
+[0.023, 0.013, 0.018]
+        ]
+    }
+]