Merge branch 'master' into fix_create_drop_replica_race

2024-11-23 16:12:01 +00:00 · 2021-04-21 13:03:29 +00:00 · 2021-04-21 13:03:29 +00:00 · 7da7279390
commit 7da7279390
parent 34b30d80d4 20a5fed53e
266 changed files with 5648 additions and 1346 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -516,9 +516,9 @@ include (cmake/find/fast_float.cmake)
 include (cmake/find/rapidjson.cmake)
 include (cmake/find/fastops.cmake)
 include (cmake/find/odbc.cmake)
+include (cmake/find/nanodbc.cmake)
 include (cmake/find/rocksdb.cmake)
 include (cmake/find/libpqxx.cmake)
-include (cmake/find/nanodbc.cmake)
 include (cmake/find/nuraft.cmake)


--- a/base/common/DateLUTImpl.h
+++ b/base/common/DateLUTImpl.h
@ -25,7 +25,7 @@


 #if defined(__PPC__)
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
 #endif
@ -1266,7 +1266,7 @@ public:
 };

 #if defined(__PPC__)
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
 #endif
--- a/base/mysqlxx/Pool.h
+++ b/base/mysqlxx/Pool.h
@ -159,9 +159,9 @@ public:
      */
    Pool(const std::string & db_,
         const std::string & server_,
-         const std::string & user_ = "",
-         const std::string & password_ = "",
-         unsigned port_ = 0,
+         const std::string & user_,
+         const std::string & password_,
+         unsigned port_,
         const std::string & socket_ = "",
         unsigned connect_timeout_ = MYSQLXX_DEFAULT_TIMEOUT,
         unsigned rw_timeout_ = MYSQLXX_DEFAULT_RW_TIMEOUT,
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@ -1,9 +1,9 @@
 # This strings autochanged from release_lib.sh:
-SET(VERSION_REVISION 54450)
+SET(VERSION_REVISION 54451)
 SET(VERSION_MAJOR 21)
-SET(VERSION_MINOR 5)
+SET(VERSION_MINOR 6)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 3827789b3d8fd2021952e57e5110343d26daa1a1)
-SET(VERSION_DESCRIBE v21.5.1.1-prestable)
-SET(VERSION_STRING 21.5.1.1)
+SET(VERSION_GITHASH 96fced4c3cf432fb0b401d2ab01f0c56e5f74a96)
+SET(VERSION_DESCRIBE v21.6.1.1-prestable)
+SET(VERSION_STRING 21.6.1.1)
 # end of autochange
--- a/cmake/find/nanodbc.cmake
+++ b/cmake/find/nanodbc.cmake
@ -1,35 +1,16 @@
-option(ENABLE_NANODBC "Enalbe nanodbc" ${ENABLE_LIBRARIES})
-
-if (NOT ENABLE_NANODBC)
-    set (USE_ODBC 0)
-    return()
-endif()
-
 if (NOT ENABLE_ODBC)
-    set (USE_NANODBC 0)
-    message (STATUS "Using nanodbc=${USE_NANODBC}")
-    return()
-endif()
+    return ()
+endif ()
+
+if (NOT USE_INTERNAL_NANODBC_LIBRARY)
+    message (FATAL_ERROR "Only the bundled nanodbc library can be used")
+endif ()

 if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/CMakeLists.txt")
-    message (WARNING "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init --recursive")
-    message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal nanodbc library")
-    set (USE_NANODBC 0)
-    return()
+    message (FATAL_ERROR "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init --recursive")
 endif()

-if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/unixodbc/include")
-    message (ERROR "submodule contrib/unixodbc is missing. to fix try run: \n git submodule update --init --recursive")
-    message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal unixodbc needed for nanodbc")
-    set (USE_NANODBC 0)
-    return()
-endif()
-
-set (USE_NANODBC 1)
-
 set (NANODBC_LIBRARY nanodbc)
+set (NANODBC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/nanodbc")

-set (NANODBC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/nanodbce")
-
-message (STATUS "Using nanodbc=${USE_NANODBC}: ${NANODBC_INCLUDE_DIR} : ${NANODBC_LIBRARY}")
-message (STATUS "Using unixodbc")
+message (STATUS "Using nanodbc: ${NANODBC_INCLUDE_DIR} : ${NANODBC_LIBRARY}")
--- a/cmake/find/odbc.cmake
+++ b/cmake/find/odbc.cmake
@ -50,4 +50,6 @@ if (NOT EXTERNAL_ODBC_LIBRARY_FOUND)
    set (USE_INTERNAL_ODBC_LIBRARY 1)
 endif ()

+set (USE_INTERNAL_NANODBC_LIBRARY 1)
+
 message (STATUS "Using unixodbc")
--- a/cmake/warnings.cmake
+++ b/cmake/warnings.cmake
@ -171,6 +171,7 @@ elseif (COMPILER_GCC)
    add_cxx_compile_options(-Wtrampolines)
    # Obvious
    add_cxx_compile_options(-Wunused)
+    add_cxx_compile_options(-Wundef)
    # Warn if vector operation is not implemented via SIMD capabilities of the architecture
    add_cxx_compile_options(-Wvector-operation-performance)
    # XXX: libstdc++ has some of these for 3way compare
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -47,6 +47,7 @@ add_subdirectory (lz4-cmake)
 add_subdirectory (murmurhash)
 add_subdirectory (replxx-cmake)
 add_subdirectory (unixodbc-cmake)
+add_subdirectory (nanodbc-cmake)

 if (USE_INTERNAL_XZ_LIBRARY)
    add_subdirectory (xz)
@ -320,10 +321,6 @@ if (USE_LIBPQXX)
    add_subdirectory (libpqxx-cmake)
 endif()

-if (USE_NANODBC)
-    add_subdirectory (nanodbc-cmake)
-endif()
-
 if (USE_NURAFT)
  add_subdirectory(nuraft-cmake)
 endif()
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit d2feb5978b979729a07c3ca76eaa4ab94cef4ceb
+Subproject commit 377f8e77491d9f66ce8e32e88aae19dffe8dc4d7
--- a/contrib/nanodbc-cmake/CMakeLists.txt
+++ b/contrib/nanodbc-cmake/CMakeLists.txt
@ -1,3 +1,7 @@
+if (NOT USE_INTERNAL_NANODBC_LIBRARY)
+    return ()
+endif ()
+
 set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/nanodbc)

 if (NOT TARGET unixodbc)
--- a/contrib/zlib-ng
+++ b/contrib/zlib-ng
@ -1 +1 @@
-Subproject commit 7f254522fd676ff4e906c6d4e9b30d4df4214c2d
+Subproject commit 5cc4d232020dc66d1d6c5438834457e2a2f6127b
--- a/debian/changelog
+++ b/debian/changelog
@ -1,5 +1,5 @@
-clickhouse (21.5.1.1) unstable; urgency=low
+clickhouse (21.6.1.1) unstable; urgency=low

  * Modified source code

- -- clickhouse-release <clickhouse-release@yandex-team.ru>  Fri, 02 Apr 2021 18:34:26 +0300
+ -- clickhouse-release <clickhouse-release@yandex-team.ru>  Tue, 20 Apr 2021 01:48:16 +0300
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:18.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.5.1.*
+ARG version=21.6.1.*

 RUN apt-get update \
    && apt-get install --yes --no-install-recommends \
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:20.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.5.1.*
+ARG version=21.6.1.*
 ARG gosu_ver=1.10

 # set non-empty deb_location_url url to create a docker image
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:18.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.5.1.*
+ARG version=21.6.1.*

 RUN apt-get update && \
    apt-get install -y apt-transport-https dirmngr && \
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -308,10 +308,8 @@ function run_tests
        01354_order_by_tuple_collate_const
        01355_ilike
        01411_bayesian_ab_testing
-        01532_collate_in_low_cardinality
-        01533_collate_in_nullable
-        01542_collate_in_array
-        01543_collate_in_tuple
+        collate
+        collation
        _orc_
        arrow
        avro
--- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
+++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
@ -17,6 +17,9 @@

            <!-- One NUMA node w/o hyperthreading -->
            <max_threads>12</max_threads>
+
+            <!-- mmap shows some improvements in perf tests -->
+            <min_bytes_to_use_mmap_io>64Mi</min_bytes_to_use_mmap_io>
        </default>
    </profiles>
    <users>
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -66,7 +66,12 @@ reportStageEnd('parse')
 subst_elems = root.findall('substitutions/substitution')
 available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
 for e in subst_elems:
-    available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')]
+    name = e.find('name').text
+    values = [v.text for v in e.findall('values/value')]
+    if not values:
+        raise Exception(f'No values given for substitution {{{name}}}')
+
+    available_parameters[name] = values

 # Takes parallel lists of templates, substitutes them with all combos of
 # parameters. The set of parameters is determined based on the first list.
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -104,6 +104,12 @@ clickhouse-client -q "system flush logs" ||:
 pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz &
 clickhouse-client -q "select * from system.query_log format TSVWithNamesAndTypes" | pigz > /test_output/query-log.tsv.gz &
 clickhouse-client -q "select * from system.query_thread_log format TSVWithNamesAndTypes" | pigz > /test_output/query-thread-log.tsv.gz &
+clickhouse-client --allow_introspection_functions=1 -q "
+    WITH
+        arrayMap(x -> concat(demangle(addressToSymbol(x)), ':', addressToLine(x)), trace) AS trace_array,
+        arrayStringConcat(trace_array, '\n') AS trace_string
+    SELECT * EXCEPT(trace), trace_string FROM system.trace_log FORMAT TSVWithNamesAndTypes
+" | pigz > /test_output/trace-log.tsv.gz &
 wait ||:

 mv /var/log/clickhouse-server/stderr.log /test_output/ ||:
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -136,6 +136,7 @@ pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhous
 tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
 mv /var/log/clickhouse-server/stderr.log /test_output/
 tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:
+tar -chf /test_output/trace_log_dump.tar /var/lib/clickhouse/data/system/trace_log ||:

 # Write check result into check_status.tsv
 clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@ -19,26 +19,26 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure,
 -   `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).
 -   `format` — The [format](../../../interfaces/formats.md#formats) of the file.
 -   `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
-   `compression` — Compression type. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Parameter is optional. By default, it will autodetect compression by file extension.
+-   `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension.

-**Example:**
+**Example**

-**1.** Set up the `s3_engine_table` table:
+1. Set up the `s3_engine_table` table:

-```sql
-CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
+``` sql
+CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
 ```

-**2.** Fill file:
+2. Fill file:

-```sql
-INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3)
+``` sql
+INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3);
 ```

-**3.** Query the data:
+3. Query the data:

-```sql
-SELECT * FROM s3_engine_table LIMIT 2
+``` sql
+SELECT * FROM s3_engine_table LIMIT 2;
 ```

 ```text
@ -73,13 +73,63 @@ For more information about virtual columns see [here](../../../engines/table-eng

 Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.

-## S3-related Settings {#s3-settings}
+**Example**
+
+1. Suppose we have several files in CSV format with the following URIs on S3:
+
+-   ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’
+-   ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’
+-   ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’
+-   ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’
+-   ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’
+-   ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’
+
+There are several ways to make a table consisting of all six files:
+
+The first way:
+
+``` sql
+CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV');
+```
+
+Another way:
+
+``` sql
+CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV');
+```
+
+Table consists of all the files in both directories (all files should satisfy format and schema described in query):
+
+``` sql
+CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV');
+```
+
+If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
+
+**Example**
+
+Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
+
+``` sql
+CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
+```
+
+## Virtual Columns {#virtual-columns}
+
+-   `_path` — Path to the file.
+-   `_file` — Name of the file.
+
+**See Also**
+
+-   [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
+
+## S3-related settings {#settings}

 The following settings can be set before query execution or placed into configuration file.

-   `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`. 
+-   `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`.
 -   `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`.
-   `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`. 
+-   `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`.

 Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.

@ -90,6 +140,7 @@ The following settings can be specified in configuration file for given endpoint
 -   `endpoint` — Specifies prefix of an endpoint. Mandatory.
 -   `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional.
 -   `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. Optional, default value is `false`.
+-   `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`.
 -   `header` —  Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times.
 -   `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional.

@ -102,11 +153,13 @@ The following settings can be specified in configuration file for given endpoint
        <!-- <access_key_id>ACCESS_KEY_ID</access_key_id> -->
        <!-- <secret_access_key>SECRET_ACCESS_KEY</secret_access_key> -->
        <!-- <use_environment_credentials>false</use_environment_credentials> -->
+        <!-- <use_insecure_imds_request>false</use_insecure_imds_request> -->
        <!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
        <!-- <server_side_encryption_customer_key_base64>BASE64-ENCODED-KEY</server_side_encryption_customer_key_base64> -->
    </endpoint-name>
 </s3>
 ```
+
 ## Usage {#usage-examples}

 Suppose we have several files in TSV format with the following URIs on HDFS:
@ -149,8 +202,7 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_p
 CREATE TABLE big_table (name String, value UInt32) 
 ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
 ```
+
 ## See also

 -  [S3 table function](../../../sql-reference/table-functions/s3.md)
-
-[Original article](https://clickhouse.tech/docs/en/engines/table-engines/integrations/s3/) <!--hide-->
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -767,6 +767,7 @@ Required parameters:

 Optional parameters:    
 -   `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
+-   `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
 -   `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. 
 -   `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. 
 -   `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. 
--- a/docs/en/guides/apply-catboost-model.md
+++ b/docs/en/guides/apply-catboost-model.md
@ -159,6 +159,9 @@ The fastest way to evaluate a CatBoost model is compile `libcatboostmodel.<so|dl
 <models_config>/home/catboost/models/*_model.xml</models_config>
 ```

+!!! note "Note"
+    You can change path to the CatBoost model configuration later without restarting server.
+
 ## 4. Run the Model Inference from SQL {#run-model-inference}

 For test model run the ClickHouse client `$ clickhouse client`.
--- a/docs/en/interfaces/third-party/gui.md
+++ b/docs/en/interfaces/third-party/gui.md
@ -169,24 +169,21 @@ Features:

 ### SeekTable {#seektable}

-[SeekTable](https://www.seektable.com) is a self-service BI tool for data exploration and operational reporting. SeekTable is available both as a cloud service and a self-hosted version. SeekTable reports may be embedded into any web-app.
+[SeekTable](https://www.seektable.com) is a self-service BI tool for data exploration and operational reporting. It is available both as a cloud service and a self-hosted version. Reports from SeekTable may be embedded into any web-app.

 Features:

 -   Business users-friendly reports builder.
 -   Powerful report parameters for SQL filtering and report-specific query customizations.
 -   Can connect to ClickHouse both with a native TCP/IP endpoint and a HTTP(S) interface (2 different drivers).
-   It is possible to use all power of CH SQL dialect in dimensions/measures definitions
+-   It is possible to use all power of ClickHouse SQL dialect in dimensions/measures definitions.
 -   [Web API](https://www.seektable.com/help/web-api-integration) for automated reports generation.
-   Supports reports development flow with account data [backup/restore](https://www.seektable.com/help/self-hosted-backup-restore), data models (cubes) / reports configuration is a human-readable XML and can be stored under version control.
+-   Supports reports development flow with account data [backup/restore](https://www.seektable.com/help/self-hosted-backup-restore); data models (cubes) / reports configuration is a human-readable XML and can be stored under version control system.

 SeekTable is [free](https://www.seektable.com/help/cloud-pricing) for personal/individual usage.

 [How to configure ClickHouse connection in SeekTable.](https://www.seektable.com/help/clickhouse-pivot-table)

-
 ### Chadmin {#chadmin}

 [Chadmin](https://github.com/bun4uk/chadmin) is a simple UI where you can visualize your currently running queries on your ClickHouse cluster and info about them and kill them if you want.
-
-[Original article](https://clickhouse.tech/docs/en/interfaces/third-party/gui/) <!--hide-->
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@ -77,7 +77,8 @@ toc_title: Adopters
 | <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
 | <a href="https://mellodesign.ru/" class="favicon">Mello</a> | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) |
 | <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
-| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a> | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x
+| <a href="https://clarity.microsoft.com/" class="favicon">Microsoft</a> | Web Analytics | Clarity (Main Product) | — | — | [A question on GitHub](https://github.com/ClickHouse/ClickHouse/issues/21556) |
+| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a> | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |
 | <a href="https://mux.com/" class="favicon">MUX</a> | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) |
 | <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
 | <a href="https://www.netskope.com/" class="favicon">Netskope</a> | Network Security | — | — | — | [Job advertisement, March 2021](https://www.mendeley.com/careers/job/senior-software-developer-backend-developer-1346348) |
--- a/docs/en/operations/system-tables/columns.md
+++ b/docs/en/operations/system-tables/columns.md
@ -4,7 +4,9 @@ Contains information about columns in all the tables.

 You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once.

-The `system.columns` table contains the following columns (the column type is shown in brackets):
+Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field. 
+
+Columns:

 -   `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
 -   `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
@ -26,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh
 **Example**

 ```sql
-:) select * from system.columns LIMIT 2 FORMAT Vertical;
+SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
 ```

 ```text
@ -65,8 +67,6 @@ is_in_sorting_key:       0
 is_in_primary_key:       0
 is_in_sampling_key:      0
 compression_codec:       
-
-2 rows in set. Elapsed: 0.002 sec. 
 ```

 [Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) <!--hide-->
--- a/docs/en/operations/system-tables/tables.md
+++ b/docs/en/operations/system-tables/tables.md
@ -1,59 +1,65 @@
 # system.tables {#system-tables}

-Contains metadata of each table that the server knows about. Detached tables are not shown in `system.tables`.
+Contains metadata of each table that the server knows about. 

-This table contains the following columns (the column type is shown in brackets):
+[Detached](../../sql-reference/statements/detach.md) tables are not shown in `system.tables`.

-   `database` (String) — The name of the database the table is in.
+[Temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.tables` only in those session where they have been created. They are shown with the empty `database` field and with the `is_temporary` flag switched on. 

-   `name` (String) — Table name.
+Columns:

-   `engine` (String) — Table engine name (without parameters).
+-   `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in. 

-   `is_temporary` (UInt8) - Flag that indicates whether the table is temporary.
+-   `name` ([String](../../sql-reference/data-types/string.md)) — Table name. 

-   `data_path` (String) - Path to the table data in the file system.
+-   `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters). 

-   `metadata_path` (String) - Path to the table metadata in the file system.
+-   `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary. 

-   `metadata_modification_time` (DateTime) - Time of latest modification of the table metadata.
+-   `data_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table data in the file system. 

-   `dependencies_database` (Array(String)) - Database dependencies.
+-   `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. 

-   `dependencies_table` (Array(String)) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).
+-   `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata.

-   `create_table_query` (String) - The query that was used to create the table.
+-   `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies.

-   `engine_full` (String) - Parameters of the table engine.
+-   `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).

-   `partition_key` (String) - The partition key expression specified in the table.
+-   `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table.

-   `sorting_key` (String) - The sorting key expression specified in the table.
+-   `engine_full` ([String](../../sql-reference/data-types/string.md)) - Parameters of the table engine. 

-   `primary_key` (String) - The primary key expression specified in the table.
+-   `partition_key` ([String](../../sql-reference/data-types/string.md)) - The partition key expression specified in the table. 

-   `sampling_key` (String) - The sampling key expression specified in the table.
+-   `sorting_key` ([String](../../sql-reference/data-types/string.md)) - The sorting key expression specified in the table. 

-   `storage_policy` (String) - The storage policy:
+-   `primary_key` ([String](../../sql-reference/data-types/string.md)) - The primary key expression specified in the table. 
+
+-   `sampling_key` ([String](../../sql-reference/data-types/string.md)) - The sampling key expression specified in the table. 
+
+-   `storage_policy` ([String](../../sql-reference/data-types/string.md)) - The storage policy:

    -   [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
    -   [Distributed](../../engines/table-engines/special/distributed.md#distributed)

-   `total_rows` (Nullable(UInt64)) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `Null` (including underying `Buffer` table).
+-   `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table). 

-   `total_bytes` (Nullable(UInt64)) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `Null` (**does not** includes any underlying storage).
+-   `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage). 

    -   If the table stores data on disk, returns used space on disk (i.e. compressed).
    -   If the table stores data in memory, returns approximated number of used bytes in memory.

-   `lifetime_rows` (Nullable(UInt64)) - Total number of rows INSERTed since server start (only for `Buffer` tables).
+-   `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables). 

-   `lifetime_bytes` (Nullable(UInt64)) - Total number of bytes INSERTed since server start (only for `Buffer` tables).
+-   `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables). 

 The `system.tables` table is used in `SHOW TABLES` query implementation.

+**Example**
+
 ```sql
-:) SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
+SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
 ```

 ```text
@ -100,8 +106,6 @@ sampling_key:
 storage_policy:             
 total_rows:                 ᴺᵁᴸᴸ
 total_bytes:                ᴺᵁᴸᴸ
-
-2 rows in set. Elapsed: 0.004 sec. 
 ```

 [Original article](https://clickhouse.tech/docs/en/operations/system_tables/tables) <!--hide-->
--- a/docs/en/operations/update.md
+++ b/docs/en/operations/update.md
@ -29,6 +29,3 @@ $ sudo apt-get update
 $ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b clickhouse-common-static=xx.yy.a.b
 $ sudo service clickhouse-server restart
 ```
-
-
-
--- a/docs/en/sql-reference/aggregate-functions/combinators.md
+++ b/docs/en/sql-reference/aggregate-functions/combinators.md
@ -27,7 +27,37 @@ Example 2: `uniqArray(arr)` – Counts the number of unique elements in all ‘a

 ## -SimpleState {#agg-functions-combinator-simplestate}

-If you apply this combinator, the aggregate function returns the same value but with a different type. This is an `SimpleAggregateFunction(...)` that can be stored in a table to work with [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engines.
+If you apply this combinator, the aggregate function returns the same value but with a different type. This is a [SimpleAggregateFunction(...)](../../sql-reference/data-types/simpleaggregatefunction.md) that can be stored in a table to work with [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) tables.
+
+**Syntax**
+
+``` sql
+<aggFunction>SimpleState(x)
+```
+
+**Arguments**
+
+-   `x` — Aggregate function parameters.
+
+**Returned values**
+
+The value of an aggregate function with the `SimpleAggregateFunction(...)` type.
+
+**Example**
+
+Query:
+
+``` sql
+WITH anySimpleState(number) AS c SELECT toTypeName(c), c FROM numbers(1);
+```
+
+Result:
+
+``` text
+┌─toTypeName(c)────────────────────────┬─c─┐
+│ SimpleAggregateFunction(any, UInt64) │ 0 │
+└──────────────────────────────────────┴───┘
+```

 ## -State {#agg-functions-combinator-state}

@ -249,4 +279,3 @@ FROM people
 └────────┴───────────────────────────┘
 ```

-
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md
@ -6,7 +6,7 @@ toc_priority: 207

 Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm.

-The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic.
+Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic.

 The performance of the function is lower than performance of [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile) or [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`.

--- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md
+++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md
@ -2,6 +2,8 @@

 `SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we don’t have to store and process any extra data.

+The common way to produce an aggregate function value is by calling the aggregate function with the [-SimpleState](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-simplestate) suffix.
+
 The following aggregate functions are supported:

 -   [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any)
--- a/docs/en/sql-reference/functions/json-functions.md
+++ b/docs/en/sql-reference/functions/json-functions.md
@ -16,46 +16,60 @@ The following assumptions are made:

 ## visitParamHas(params, name) {#visitparamhasparams-name}

-Checks whether there is a field with the ‘name’ name.
+Checks whether there is a field with the `name` name.
+
+Alias: `simpleJSONHas`.

 ## visitParamExtractUInt(params, name) {#visitparamextractuintparams-name}

-Parses UInt64 from the value of the field named ‘name’. If this is a string field, it tries to parse a number from the beginning of the string. If the field doesn’t exist, or it exists but doesn’t contain a number, it returns 0.
+Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field doesn’t exist, or it exists but doesn’t contain a number, it returns 0.
+
+Alias: `simpleJSONExtractUInt`.

 ## visitParamExtractInt(params, name) {#visitparamextractintparams-name}

 The same as for Int64.

+Alias: `simpleJSONExtractInt`.
+
 ## visitParamExtractFloat(params, name) {#visitparamextractfloatparams-name}

 The same as for Float64.

+Alias: `simpleJSONExtractFloat`.
+
 ## visitParamExtractBool(params, name) {#visitparamextractboolparams-name}

 Parses a true/false value. The result is UInt8.

+Alias: `simpleJSONExtractBool`.
+
 ## visitParamExtractRaw(params, name) {#visitparamextractrawparams-name}

 Returns the value of a field, including separators.

+Alias: `simpleJSONExtractRaw`.
+
 Examples:

 ``` sql
-visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'
-visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'
+visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
+visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
 ```

 ## visitParamExtractString(params, name) {#visitparamextractstringparams-name}

 Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string.

+Alias: `simpleJSONExtractString`.
+
 Examples:

 ``` sql
-visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'
-visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'
-visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''
-visitParamExtractString('{"abc":"hello}', 'abc') = ''
+visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
+visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
+visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
+visitParamExtractString('{"abc":"hello}', 'abc') = '';
 ```

 There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@ -74,6 +74,9 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified,

 Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly.

+!!! warning "Warning"
+    You can’t delete a column if it is referenced by [materialized view](../../../sql-reference/statements/create/view.md#materialized). Otherwise, it returns an error.
+
 Example:

 ``` sql
@ -180,7 +183,7 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
 ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
 ```

-## See Also
+**See Also**

 - [REMOVE TTL](ttl.md).

--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@ -50,15 +50,32 @@ Creates a table with the same result as that of the [table function](../../../sq
 ### From SELECT query {#from-select-query}

 ``` sql
-CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
+CREATE TABLE [IF NOT EXISTS] [db.]table_name[(name1 [type1], name2 [type2], ...)] ENGINE = engine AS SELECT ...
 ```

-Creates a table with a structure like the result of the `SELECT` query, with the `engine` engine, and fills it with data from SELECT.
+Creates a table with a structure like the result of the `SELECT` query, with the `engine` engine, and fills it with data from `SELECT`. Also you can explicitly specify columns description.

-In all cases, if `IF NOT EXISTS` is specified, the query won’t return an error if the table already exists. In this case, the query won’t do anything.
+If the table already exists and `IF NOT EXISTS` is specified, the query won’t do anything.

 There can be other clauses after the `ENGINE` clause in the query. See detailed documentation on how to create tables in the descriptions of [table engines](../../../engines/table-engines/index.md#table_engines).

+**Example**
+
+Query:
+
+``` sql
+CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1;
+SELECT x, toTypeName(x) FROM t1;
+```
+
+Result:
+
+```text
+┌─x─┬─toTypeName(x)─┐
+│ 1 │ String        │
+└───┴───────────────┘
+```
+
 ## NULL Or NOT NULL Modifiers {#null-modifiers}

 `NULL` and `NOT NULL` modifiers after data type in column definition allow or do not allow it to be [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable).
--- a/docs/en/sql-reference/statements/optimize.md
+++ b/docs/en/sql-reference/statements/optimize.md
@ -5,13 +5,18 @@ toc_title: OPTIMIZE

 # OPTIMIZE Statement {#misc_operations-optimize}

+This query tries to initialize an unscheduled merge of data parts for tables.
+
+!!! warning "Warning"
+    `OPTIMIZE` can’t fix the `Too many parts` error.
+
+**Syntax**
+
 ``` sql
 OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
 ```

-This query tries to initialize an unscheduled merge of data parts for tables with a table engine from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family.
-
-The `OPTMIZE` query is also supported for the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported.
+The `OPTMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported.

 When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all nodes (if the `replication_alter_partitions_sync` setting is enabled).

@ -21,12 +26,13 @@ When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engin
 -   If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine.


-### BY expression {#by-expression}
+## BY expression {#by-expression}

 If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explictly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key).

-Note that `*` behaves just like in `SELECT`: `MATERIALIZED`, and `ALIAS` columns are not used for expansion.
-Also, it is an error to specify empty list of columns, or write an expression that results in an empty list of columns, or deduplicate by an ALIAS column.
+!!! note "Note"
+    Notice that `*` behaves just like in `SELECT`: `MATERIALIZED` and `ALIAS` columns are not used for expansion.
+    Also, it is an error to specify empty list of columns, or write an expression that results in an empty list of columns, or deduplicate by an ALIAS column.

 ``` sql
 OPTIMIZE TABLE table DEDUPLICATE; -- the old one
@ -39,9 +45,10 @@ OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT co
 OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT (colX, colY);
 ```

-**Example:**
+**Examples**
+
+Create a table:

-A silly synthetic table.
 ``` sql
 CREATE TABLE example (
    primary_key Int32,
@ -56,31 +63,31 @@ PARTITION BY partition_key
 ORDER BY (primary_key, secondary_key);
 ```

+The 'old' deduplicate, all columns are taken into account, i.e. row is removed only if all values in all columns are equal to corresponding values in previous row.
+
 ``` sql
-- The 'old' deduplicate, all columns are taken into account, i.e. row is removed only if all values in all columns are equal to corresponding values in previous row.
 OPTIMIZE TABLE example FINAL DEDUPLICATE;
 ```

+Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED`: `primary_key`, `secondary_key`, `value`, `partition_key`, and `materialized_value` columns.
+
 ``` sql
-- Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED`: `primary_key`, `secondary_key`, `value`, `partition_key`, and `materialized_value` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY *;
 ```

+Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED` and explicitly not `materialized_value`: `primary_key`, `secondary_key`, `value`, and `partition_key` columns.
+
 ``` sql
-- Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED` and explicitly not `materialized_value`: `primary_key`, `secondary_key`, `value`, and `partition_key` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY * EXCEPT materialized_value;
 ```

+Deduplicate explicitly by `primary_key`, `secondary_key`, and `partition_key` columns.
 ``` sql
-- Deduplicate explicitly by `primary_key`, `secondary_key`, and `partition_key` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY primary_key, secondary_key, partition_key;
 ```

+Deduplicate by any column matching a regex: `primary_key`, `secondary_key`, and `partition_key` columns.
+
 ``` sql
-- Deduplicate by any column matching a regex: `primary_key`, `secondary_key`, and `partition_key` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY COLUMNS('.*_key');
 ```
-
-
-!!! warning "Warning"
-    `OPTIMIZE` can’t fix the “Too many parts” error.
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@ -18,7 +18,7 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres
 -   `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
 -   `format` — The [format](../../interfaces/formats.md#formats) of the file.
 -   `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
-   `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension.
+-   `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.

 **Returned value**

--- a/docs/ru/commercial/cloud.md
+++ b/docs/ru/commercial/cloud.md
@ -39,4 +39,20 @@ toc_title: "Поставщики облачных услуг ClickHouse"
 -   поддержка прав доступа, one-key восстановления, многоуровневая защита сети, шифрование облачного диска;
 -   полная интеграция с облачными системами логирования, базами данных и инструментами обработки данных;
 -   встроенная платформа для мониторинга и управления базами данных;
-   техническая поддержка от экспертов по работе с базами данных.
+-   техническая поддержка от экспертов по работе с базами данных.
+
+## SberCloud {#sbercloud}
+
+[Облачная платформа SberCloud.Advanced](https://sbercloud.ru/ru/advanced):
+
+-   предоставляет более 50 высокотехнологичных сервисов;
+-   позволяет быстро создавать и эффективно управлять ИТ-инфраструктурой, приложениями и интернет-сервисами;
+-   радикально минимизирует ресурсы, требуемые для работы корпоративных ИТ-систем;
+-   в разы сокращает время вывода новых продуктов на рынок.
+
+SberCloud.Advanced предоставляет [MapReduce Service (MRS)](https://docs.sbercloud.ru/mrs/ug/topics/ug__clickhouse.html) — надежную, безопасную и простую в использовании платформу корпоративного уровня для хранения, обработки и анализа больших данных. MRS позволяет быстро создавать и управлять кластерами ClickHouse.
+
+-   Инстанс ClickHouse состоит из трех узлов ZooKeeper и нескольких узлов ClickHouse. Выделенный режим реплики используется для обеспечения высокой надежности двойных копий данных.
+-   MRS предлагает возможности гибкого масштабирования при быстром росте сервисов в сценариях, когда емкости кластерного хранилища или вычислительных ресурсов процессора недостаточно. MRS в один клик предоставляет инструмент для балансировки данных при расширении узлов ClickHouse в кластере. Вы можете определить режим и время балансировки данных на основе характеристик сервиса, чтобы обеспечить доступность сервиса.
+-   MRS использует архитектуру развертывания высокой доступности на основе Elastic Load Balance (ELB) — сервиса для автоматического распределения трафика на несколько внутренних узлов. Благодаря ELB, данные записываются в локальные таблицы и считываются из распределенных таблиц на разных узлах. Такая архитектура повышает отказоустойчивость кластера и гарантирует высокую доступность приложений.
+
--- a/docs/ru/engines/table-engines/index.md
+++ b/docs/ru/engines/table-engines/index.md
@ -48,6 +48,14 @@ toc_title: "Введение"

 Движки семейства:

+-   [Kafka](integrations/kafka.md#kafka)
+-   [MySQL](integrations/mysql.md#mysql)
+-   [ODBC](integrations/odbc.md#table-engine-odbc)
+-   [JDBC](integrations/jdbc.md#table-engine-jdbc)
+-   [S3](integrations/s3.md#table-engine-s3)
+
+### Специальные движки {#spetsialnye-dvizhki}
+
 -   [ODBC](../../engines/table-engines/integrations/odbc.md)
 -   [JDBC](../../engines/table-engines/integrations/jdbc.md)
 -   [MySQL](../../engines/table-engines/integrations/mysql.md)
@ -84,4 +92,3 @@ toc_title: "Введение"
 Чтобы получить данные из виртуального столбца, необходимо указать его название в запросе `SELECT`. `SELECT *` не отображает данные из виртуальных столбцов.

 При создании таблицы со столбцом, имя которого совпадает с именем одного из виртуальных столбцов таблицы, виртуальный столбец становится недоступным. Не делайте так. Чтобы помочь избежать конфликтов, имена виртуальных столбцов обычно предваряются подчеркиванием.
-
--- a/docs/ru/engines/table-engines/integrations/s3.md
+++ b/docs/ru/engines/table-engines/integrations/s3.md
@ -19,7 +19,7 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure,
 -   `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*`, `?`, `{abc,def}` и `{N..M}` где `N`, `M` — числа, `'abc'`, `'def'` — строки. Подробнее смотри [ниже](#wildcards-in-path).
 -   `format` — [формат](../../../interfaces/formats.md#formats) файла.
 -   `structure` — структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`.
-   `compression` — тип сжатия. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. Если не указано, то тип сжатия определяется автоматически по расширению файла. 
+-   `compression` — тип сжатия. Возможные значения: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Необязательный параметр. Если не указано, то тип сжатия определяется автоматически по расширению файла. 

 **Пример**

@ -73,17 +73,17 @@ SELECT * FROM s3_engine_table LIMIT 2;

 Соображение безопасности: если злонамеренный пользователь попробует указать произвольные URL-адреса S3, параметр `s3_max_redirects` должен быть установлен в ноль, чтобы избежать атак [SSRF] (https://en.wikipedia.org/wiki/Server-side_request_forgery). Как альтернатива, в конфигурации сервера должен быть указан `remote_host_filter`.

-## Настройки конечных точек {#endpoint-settings}
+## Настройки точки приема запроса {#endpoint-settings}

-Для конечной точки (которая соответствует точному префиксу URL-адреса) в конфигурационном файле могут быть заданы следующие настройки:
+Для точки приема запроса (которая соответствует точному префиксу URL-адреса) в конфигурационном файле могут быть заданы следующие настройки:

 Обязательная настройка:
-   `endpoint` — указывает префикс конечной точки. 
+-   `endpoint` — указывает префикс точки приема запроса. 

 Необязательные настройки:
-   `access_key_id` и `secret_access_key` — указывают учетные данные для использования с данной конечной точкой.
-   `use_environment_credentials` — если `true`, S3-клиент будет пытаться получить учетные данные из переменных среды и метаданных Amazon EC2 для данной конечной точки. Значение по умолчанию - `false`.
-   `header` — добавляет указанный HTTP-заголовок к запросу на заданную конечную точку. Может быть определен несколько раз.
+-   `access_key_id` и `secret_access_key` — указывают учетные данные для использования с данной точкой приема запроса.
+-   `use_environment_credentials` — если `true`, S3-клиент будет пытаться получить учетные данные из переменных среды и метаданных Amazon EC2 для данной точки приема запроса. Значение по умолчанию - `false`.
+-   `header` — добавляет указанный HTTP-заголовок к запросу на заданную точку приема запроса. Может быть определен несколько раз.
 -   `server_side_encryption_customer_key_base64` — устанавливает необходимые заголовки для доступа к объектам S3 с шифрованием SSE-C. 

 **Пример**
@ -133,8 +133,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
 ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV');
 ```

-!!! warning "Warning"
-    Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.
+Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.

 4. Создание таблицы из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`:

@ -145,6 +144,3 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-
 **Смотрите также**

 -  [Табличная функция S3](../../../sql-reference/table-functions/s3.md)
-
-[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/s3/) <!--hide-->
-
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@ -753,7 +753,8 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'

 Необязательные параметры:   

-   `use_environment_credentials` — признак, нужно ли считывать учетные данные AWS из переменных окружения `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` и `AWS_SESSION_TOKEN`, если они есть. Значение по умолчанию: `false`.
+-   `use_environment_credentials` — признак, нужно ли считывать учетные данные AWS из сетевого окружения, а также из переменных окружения `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` и `AWS_SESSION_TOKEN`, если они есть. Значение по умолчанию: `false`.
+-   `use_insecure_imds_request` — признак, нужно ли использовать менее безопасное соединение при выполнении запроса к IMDS при получении учётных данных из метаданных Amazon EC2. Значение по умолчанию: `false`.
 -   `proxy` — конфигурация прокси-сервера для конечной точки S3. Каждый элемент `uri` внутри блока `proxy` должен содержать URL прокси-сервера. 
 -   `connect_timeout_ms` — таймаут подключения к сокету в миллисекундах. Значение по умолчанию: 10 секунд. 
 -   `request_timeout_ms` — таймаут выполнения запроса в миллисекундах. Значение по умолчанию: 5 секунд. 
--- a/docs/ru/guides/apply-catboost-model.md
+++ b/docs/ru/guides/apply-catboost-model.md
@ -158,7 +158,9 @@ FROM amazon_train
 <catboost_dynamic_library_path>/home/catboost/data/libcatboostmodel.so</catboost_dynamic_library_path>
 <models_config>/home/catboost/models/*_model.xml</models_config>
 ```
-
+!!! note "Примечание"
+    Вы можете позднее изменить путь к конфигурации модели CatBoost без перезагрузки сервера.
+    
 ## 4. Запустите вывод модели из SQL {#run-model-inference}

 Для тестирования модели запустите клиент ClickHouse `$ clickhouse client`.
--- a/docs/ru/interfaces/third-party/gui.md
+++ b/docs/ru/interfaces/third-party/gui.md
@ -166,4 +166,19 @@ toc_title: "Визуальные интерфейсы от сторонних р

 [Как сконфигурировать ClickHouse в Looker.](https://docs.looker.com/setup-and-management/database-config/clickhouse)

-[Original article](https://clickhouse.tech/docs/ru/interfaces/third-party/gui/) <!--hide-->
+### SeekTable {#seektable}
+
+[SeekTable](https://www.seektable.com) — это аналитический инструмент для самостоятельного анализа и обработки данных бизнес-аналитики. Он доступен как в виде облачного сервиса, так и в виде локальной версии. Отчеты из SeekTable могут быть встроены в любое веб-приложение.
+
+Основные возможности:
+
+-   Удобный конструктор отчетов.
+-   Гибкая настройка отчетов SQL и создание запросов для специфичных отчетов.
+-   Интегрируется с ClickHouse, используя собственную точку приема запроса TCP/IP или интерфейс HTTP(S) (два разных драйвера).
+-   Поддерживает всю мощь диалекта ClickHouse SQL для построения запросов по различным измерениям и показателям.
+-   [WEB-API](https://www.seektable.com/help/web-api-integration) для автоматизированной генерации отчетов.
+-   Процесс разработки отчетов поддерживает [резервное копирование/восстановление данных](https://www.seektable.com/help/self-hosted-backup-restore); конфигурация моделей данных (кубов) / отчетов представляет собой удобочитаемый XML-файл, который может храниться в системе контроля версий.
+
+SeekTable [бесплатен](https://www.seektable.com/help/cloud-pricing) для личного/индивидуального использования.
+
+[Как сконфигурировать подключение ClickHouse в SeekTable.](https://www.seektable.com/help/clickhouse-pivot-table)
--- a/docs/ru/operations/system-tables/columns.md
+++ b/docs/ru/operations/system-tables/columns.md
@ -4,7 +4,9 @@

 С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table), но для многих таблиц сразу.

-Таблица `system.columns` содержит столбцы (тип столбца указан в скобках):
+Колонки [временных таблиц](../../sql-reference/statements/create/table.md#temporary-tables) содержатся в `system.columns` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких колонок пустое. 
+
+Cтолбцы:

 -   `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
 -   `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
@ -23,3 +25,46 @@
 -   `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ выборки.
 -   `compression_codec` ([String](../../sql-reference/data-types/string.md)) — имя кодека сжатия.

+**Пример**
+
+```sql
+SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
+```
+
+```text
+Row 1:
+──────
+database:                system
+table:                   aggregate_function_combinators
+name:                    name
+type:                    String
+default_kind:            
+default_expression:      
+data_compressed_bytes:   0
+data_uncompressed_bytes: 0
+marks_bytes:             0
+comment:                 
+is_in_partition_key:     0
+is_in_sorting_key:       0
+is_in_primary_key:       0
+is_in_sampling_key:      0
+compression_codec:       
+
+Row 2:
+──────
+database:                system
+table:                   aggregate_function_combinators
+name:                    is_internal
+type:                    UInt8
+default_kind:            
+default_expression:      
+data_compressed_bytes:   0
+data_uncompressed_bytes: 0
+marks_bytes:             0
+comment:                 
+is_in_partition_key:     0
+is_in_sorting_key:       0
+is_in_primary_key:       0
+is_in_sampling_key:      0
+compression_codec:       
+```
--- a/docs/ru/operations/system-tables/tables.md
+++ b/docs/ru/operations/system-tables/tables.md
@ -1,39 +1,94 @@
 # system.tables {#system-tables}

-Содержит метаданные каждой таблицы, о которой знает сервер. Отсоединённые таблицы не отображаются в `system.tables`.
+Содержит метаданные каждой таблицы, о которой знает сервер. 

-Эта таблица содержит следующие столбцы (тип столбца показан в скобках):
+Отсоединённые таблицы ([DETACH](../../sql-reference/statements/detach.md)) не отображаются в `system.tables`.

-   `database String` — имя базы данных, в которой находится таблица.
-   `name` (String) — имя таблицы.
-   `engine` (String) — движок таблицы (без параметров).
-   `is_temporary` (UInt8) — флаг, указывающий на то, временная это таблица или нет.
-   `data_path` (String) — путь к данным таблицы в файловой системе.
-   `metadata_path` (String) — путь к табличным метаданным в файловой системе.
-   `metadata_modification_time` (DateTime) — время последней модификации табличных метаданных.
-   `dependencies_database` (Array(String)) — зависимости базы данных.
-   `dependencies_table` (Array(String)) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы).
-   `create_table_query` (String) — запрос, которым создавалась таблица.
-   `engine_full` (String) — параметры табличного движка.
-   `partition_key` (String) — ключ партиционирования таблицы.
-   `sorting_key` (String) — ключ сортировки таблицы.
-   `primary_key` (String) - первичный ключ таблицы.
-   `sampling_key` (String) — ключ сэмплирования таблицы.
-   `storage_policy` (String) - политика хранения данных:
+Информация о [временных таблицах](../../sql-reference/statements/create/table.md#temporary-tables) содержится в `system.tables` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких таблиц пустое, а флаг `is_temporary` включен. 
+
+Столбцы:
+
+-   `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
+-   `name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
+-   `engine` ([String](../../sql-reference/data-types/string.md)) — движок таблицы (без параметров).
+-   `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на то, временная это таблица или нет.
+-   `data_path` ([String](../../sql-reference/data-types/string.md)) — путь к данным таблицы в файловой системе.
+-   `metadata_path` ([String](../../sql-reference/data-types/string.md)) — путь к табличным метаданным в файловой системе.
+-   `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время последней модификации табличных метаданных.
+-   `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — зависимости базы данных.
+-   `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы).
+-   `create_table_query` ([String](../../sql-reference/data-types/string.md)) — запрос, при помощи которого создавалась таблица.
+-   `engine_full` ([String](../../sql-reference/data-types/string.md)) — параметры табличного движка.
+-   `partition_key` ([String](../../sql-reference/data-types/string.md)) — ключ партиционирования таблицы.
+-   `sorting_key` ([String](../../sql-reference/data-types/string.md)) — ключ сортировки таблицы.
+-   `primary_key` ([String](../../sql-reference/data-types/string.md)) - первичный ключ таблицы.
+-   `sampling_key` ([String](../../sql-reference/data-types/string.md)) — ключ сэмплирования таблицы.
+-   `storage_policy` ([String](../../sql-reference/data-types/string.md)) - политика хранения данных:

    -   [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
    -   [Distributed](../../engines/table-engines/special/distributed.md#distributed)

-   `total_rows` (Nullable(UInt64)) - общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `Null` (включая базовую таблицу `Buffer`).
+-   `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `NULL` (включая базовую таблицу `Buffer`).

-   `total_bytes` (Nullable(UInt64)) - общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `Null` (**не включает** в себя никакого базового хранилища).
+-   `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `NULL` (не включает в себя никакого базового хранилища).

    -   Если таблица хранит данные на диске, возвращает используемое пространство на диске (т. е. сжатое).
    -   Если таблица хранит данные в памяти, возвращает приблизительное количество используемых байт в памяти.

-   `lifetime_rows` (Nullable(UInt64)) - общее количество строк, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
+-   `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество строк, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).

-   `lifetime_bytes` (Nullable(UInt64)) - общее количество байт, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
+-   `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество байт, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).

 Таблица `system.tables` используется при выполнении запроса `SHOW TABLES`.

+**Пример**
+
+```sql
+SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
+```
+
+```text
+Row 1:
+──────
+database:                   system
+name:                       aggregate_function_combinators
+uuid:                       00000000-0000-0000-0000-000000000000
+engine:                     SystemAggregateFunctionCombinators
+is_temporary:               0
+data_paths:                 []
+metadata_path:              /var/lib/clickhouse/metadata/system/aggregate_function_combinators.sql
+metadata_modification_time: 1970-01-01 03:00:00
+dependencies_database:      []
+dependencies_table:         []
+create_table_query:         
+engine_full:                
+partition_key:              
+sorting_key:                
+primary_key:                
+sampling_key:               
+storage_policy:             
+total_rows:                 ᴺᵁᴸᴸ
+total_bytes:                ᴺᵁᴸᴸ
+
+Row 2:
+──────
+database:                   system
+name:                       asynchronous_metrics
+uuid:                       00000000-0000-0000-0000-000000000000
+engine:                     SystemAsynchronousMetrics
+is_temporary:               0
+data_paths:                 []
+metadata_path:              /var/lib/clickhouse/metadata/system/asynchronous_metrics.sql
+metadata_modification_time: 1970-01-01 03:00:00
+dependencies_database:      []
+dependencies_table:         []
+create_table_query:         
+engine_full:                
+partition_key:              
+sorting_key:                
+primary_key:                
+sampling_key:               
+storage_policy:             
+total_rows:                 ᴺᵁᴸᴸ
+total_bytes:                ᴺᵁᴸᴸ
+```
--- a/docs/ru/operations/update.md
+++ b/docs/ru/operations/update.md
@ -29,5 +29,3 @@ $ sudo apt-get update
 $ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b clickhouse-common-static=xx.yy.a.b
 $ sudo service clickhouse-server restart
 ```
-
-[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/update/) <!--hide-->
--- a/docs/ru/sql-reference/aggregate-functions/combinators.md
+++ b/docs/ru/sql-reference/aggregate-functions/combinators.md
@ -27,6 +27,40 @@ toc_title: "Комбинаторы агрегатных функций"

 Комбинаторы -If и -Array можно сочетать. При этом, должен сначала идти Array, а потом If. Примеры: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. Из-за такого порядка получается, что аргумент cond не должен быть массивом.

+## -SimpleState {#agg-functions-combinator-simplestate}
+
+При использовании этого комбинатора агрегатная функция возвращает то же значение, но типа [SimpleAggregateFunction(...)](../../sql-reference/data-types/simpleaggregatefunction.md). Текущее значение функции может храниться в таблице для последующей работы с таблицами семейства [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md).
+
+**Синтаксис**
+
+``` sql
+<aggFunction>SimpleState(x)
+```
+
+**Аргументы**
+
+-   `x` — параметры агрегатной функции.
+
+**Возвращаемое значение**
+
+Значение агрегатной функции типа `SimpleAggregateFunction(...)`.
+
+**Пример**
+
+Запрос:
+
+``` sql
+WITH anySimpleState(number) AS c SELECT toTypeName(c), c FROM numbers(1);
+```
+
+Результат:
+
+``` text
+┌─toTypeName(c)────────────────────────┬─c─┐
+│ SimpleAggregateFunction(any, UInt64) │ 0 │
+└──────────────────────────────────────┴───┘
+```
+
 ## -State {#state}

 В случае применения этого комбинатора, агрегатная функция возвращает не готовое значение (например, в случае функции [uniq](reference/uniq.md#agg_function-uniq) — количество уникальных значений), а промежуточное состояние агрегации (например, в случае функции `uniq` — хэш-таблицу для расчёта количества уникальных значений), которое имеет тип `AggregateFunction(...)` и может использоваться для дальнейшей обработки или может быть сохранено в таблицу для последующей доагрегации.
@ -247,4 +281,3 @@ FROM people
 │ [3,2]  │ [11.5,12.949999809265137] │
 └────────┴───────────────────────────┘
 ```
-
--- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md
+++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md
@ -3,6 +3,8 @@
 Хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, которые обладают следующим свойством: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк,
 а затем повторного выполнения `f` к результатам: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Это свойство гарантирует, что результатов частичной агрегации достаточно для вычисления комбинированной, поэтому хранить и обрабатывать какие-либо дополнительные данные не требуется.

+Чтобы получить промежуточное значение, обычно используются агрегатные функции с суффиксом [-SimpleState](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-simplestate).
+
 Поддерживаются следующие агрегатные функции:

 -   [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any)
--- a/docs/ru/sql-reference/functions/json-functions.md
+++ b/docs/ru/sql-reference/functions/json-functions.md
@ -16,51 +16,65 @@ toc_title: JSON

 ## visitParamHas(params, name) {#visitparamhasparams-name}

-Проверить наличие поля с именем name.
+Проверяет наличие поля с именем `name`.
+
+Алиас: `simpleJSONHas`.

 ## visitParamExtractUInt(params, name) {#visitparamextractuintparams-name}

-Распарсить UInt64 из значения поля с именем name. Если поле строковое - попытаться распарсить число из начала строки. Если такого поля нет, или если оно есть, но содержит не число, то вернуть 0.
+Пытается выделить число типа UInt64 из значения поля с именем `name`. Если поле строковое, пытается выделить число из начала строки. Если такого поля нет, или если оно есть, но содержит не число, то возвращает 0.
+
+Алиас: `simpleJSONExtractUInt`.

 ## visitParamExtractInt(params, name) {#visitparamextractintparams-name}

 Аналогично для Int64.

+Алиас: `simpleJSONExtractInt`.
+
 ## visitParamExtractFloat(params, name) {#visitparamextractfloatparams-name}

 Аналогично для Float64.

+Алиас: `simpleJSONExtractFloat`.
+
 ## visitParamExtractBool(params, name) {#visitparamextractboolparams-name}

-Распарсить значение true/false. Результат - UInt8.
+Пытается выделить значение true/false. Результат — UInt8.
+
+Алиас: `simpleJSONExtractBool`.

 ## visitParamExtractRaw(params, name) {#visitparamextractrawparams-name}

-Вернуть значение поля, включая разделители.
+Возвращает значение поля, включая разделители.
+
+Алиас: `simpleJSONExtractRaw`.

 Примеры:

 ``` sql
-visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'
-visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'
+visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
+visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
 ```

 ## visitParamExtractString(params, name) {#visitparamextractstringparams-name}

-Распарсить строку в двойных кавычках. У значения убирается экранирование. Если убрать экранированные символы не удалось, то возвращается пустая строка.
+Разбирает строку в двойных кавычках. У значения убирается экранирование. Если убрать экранированные символы не удалось, то возвращается пустая строка.
+
+Алиас: `simpleJSONExtractString`.

 Примеры:

 ``` sql
-visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'
-visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'
-visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''
-visitParamExtractString('{"abc":"hello}', 'abc') = ''
+visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
+visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
+visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
+visitParamExtractString('{"abc":"hello}', 'abc') = '';
 ```

-На данный момент, не поддерживаются записанные в формате `\uXXXX\uYYYY` кодовые точки не из basic multilingual plane (они переводятся не в UTF-8, а в CESU-8).
+На данный момент не поддерживаются записанные в формате `\uXXXX\uYYYY` кодовые точки не из basic multilingual plane (они переводятся не в UTF-8, а в CESU-8).

-Следующие функции используют [simdjson](https://github.com/lemire/simdjson) который разработан под более сложные требования для разбора JSON. Упомянутое выше предположение 2 по-прежнему применимо.
+Следующие функции используют [simdjson](https://github.com/lemire/simdjson), который разработан под более сложные требования для разбора JSON. Упомянутое выше допущение 2 по-прежнему применимо.

 ## isValidJSON(json) {#isvalidjsonjson}

@ -292,4 +306,3 @@ SELECT JSONExtractKeysAndValuesRaw('{"a": [-100, 200.0], "b":{"c": {"d": "hello"
 │ [('d','"hello"'),('f','"world"')]                                                                     │
 └───────────────────────────────────────────────────────────────────────────────────────────────────────┘
 ```
-
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@ -63,6 +63,9 @@ DROP COLUMN [IF EXISTS] name

 Запрос удаляет данные из файловой системы. Так как это представляет собой удаление целых файлов, запрос выполняется почти мгновенно.

+!!! warning "Предупреждение"
+    Вы не можете удалить столбец, используемый в [материализованном представлениии](../../../sql-reference/statements/create/view.md#materialized). В противном случае будет ошибка.
+
 Пример:

 ``` sql
@ -155,7 +158,7 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
 ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
 ```

-## Смотрите также
+**Смотрите также**

 - [REMOVE TTL](ttl.md).

--- a/docs/ru/sql-reference/statements/create/table.md
+++ b/docs/ru/sql-reference/statements/create/table.md
@ -46,15 +46,32 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function()
 ### Из запроса SELECT {#from-select-query}

 ``` sql
-CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
+CREATE TABLE [IF NOT EXISTS] [db.]table_name[(name1 [type1], name2 [type2], ...)] ENGINE = engine AS SELECT ...
 ```

-Создаёт таблицу со структурой, как результат запроса `SELECT`, с движком engine, и заполняет её данными из SELECT-а.
+Создаёт таблицу со структурой, как результат запроса `SELECT`, с движком `engine`, и заполняет её данными из `SELECT`. Также вы можете явно задать описание столбцов.

-Во всех случаях, если указано `IF NOT EXISTS`, то запрос не будет возвращать ошибку, если таблица уже существует. В этом случае, запрос будет ничего не делать.
+Если таблица уже существует и указано `IF NOT EXISTS`, то запрос ничего не делает.

 После секции `ENGINE` в запросе могут использоваться и другие секции в зависимости от движка. Подробную документацию по созданию таблиц смотрите в описаниях [движков таблиц](../../../engines/table-engines/index.md#table_engines).

+**Пример**
+
+Запрос:
+
+``` sql
+CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1;
+SELECT x, toTypeName(x) FROM t1;
+```
+
+Результат:
+
+```text
+┌─x─┬─toTypeName(x)─┐
+│ 1 │ String        │
+└───┴───────────────┘
+```
+
 ## Модификатор NULL или NOT NULL {#null-modifiers}

 Модификатор `NULL` или `NOT NULL`, указанный после типа данных в определении столбца, позволяет или не позволяет типу данных быть [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable). 
@ -230,7 +247,7 @@ CREATE TABLE codec_example
 )
 ENGINE = MergeTree()
 ```
-## Временные таблицы {#vremennye-tablitsy}
+## Временные таблицы {#temporary-tables}

 ClickHouse поддерживает временные таблицы со следующими характеристиками:

--- a/docs/ru/sql-reference/statements/optimize.md
+++ b/docs/ru/sql-reference/statements/optimize.md
@ -5,19 +5,83 @@ toc_title: OPTIMIZE

 # OPTIMIZE {#misc_operations-optimize}

-``` sql
-OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE]
-```
-
-Запрос пытается запустить внеплановый мёрж кусков данных для таблиц семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). Другие движки таблиц не поддерживаются.
-
-Если `OPTIMIZE` применяется к таблицам семейства [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md), ClickHouse создаёт задачу на мёрж и ожидает её исполнения на всех узлах (если активирована настройка `replication_alter_partitions_sync`).
-
-   Если `OPTIMIZE` не выполняет мёрж по любой причине, ClickHouse не оповещает об этом клиента. Чтобы включить оповещения, используйте настройку [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop).
-   Если указать `PARTITION`, то оптимизация выполняется только для указанной партиции. [Как задавать имя партиции в запросах](alter/index.md#alter-how-to-specify-part-expr).
-   Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске. Кроме того, слияние является принудительным, даже если выполняются параллельные слияния.
-   Если указать `DEDUPLICATE`, то произойдет схлопывание полностью одинаковых строк (сравниваются значения во всех колонках), имеет смысл только для движка MergeTree.
+Запрос пытается запустить внеплановое слияние кусков данных для таблиц.

 !!! warning "Внимание"
-    Запрос `OPTIMIZE` не может устранить причину появления ошибки «Too many parts».
-    
+    `OPTIMIZE` не устраняет причину появления ошибки `Too many parts`.
+
+**Синтаксис**
+
+``` sql
+OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
+```
+
+Может применяться к таблицам семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md), [MaterializedView](../../engines/table-engines/special/materializedview.md) и [Buffer](../../engines/table-engines/special/buffer.md). Другие движки таблиц не поддерживаются.
+
+Если запрос `OPTIMIZE` применяется к таблицам семейства [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md), ClickHouse создаёт задачу на слияние и ожидает её исполнения на всех узлах (если активирована настройка `replication_alter_partitions_sync`).
+
+-   По умолчанию, если запросу `OPTIMIZE` не удалось выполнить слияние, то
+ClickHouse не оповещает клиента. Чтобы включить оповещения, используйте настройку [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop).
+-   Если указать `PARTITION`, то оптимизация выполняется только для указанной партиции. [Как задавать имя партиции в запросах](alter/index.md#alter-how-to-specify-part-expr).
+-   Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске данных. Кроме того, слияние является принудительным, даже если выполняются параллельные слияния.
+-   Если указать `DEDUPLICATE`, то произойдет схлопывание полностью одинаковых строк (сравниваются значения во всех столбцах), имеет смысл только для движка MergeTree.
+
+## Выражение BY {#by-expression}
+
+Чтобы выполнить дедупликацию по произвольному набору столбцов, вы можете явно указать список столбцов или использовать любую комбинацию подстановки [`*`](../../sql-reference/statements/select/index.md#asterisk), выражений [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) и [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier).
+
+ Список столбцов для дедупликации должен включать все столбцы, указанные в условиях сортировки (первичный ключ и ключ сортировки), а также в условиях партиционирования (ключ партиционирования).
+
+ !!! note "Примечание"
+    Обратите внимание, что символ подстановки `*` обрабатывается так же, как и в запросах `SELECT`: столбцы `MATERIALIZED` и `ALIAS` не включаются в результат.
+    Если указать пустой список или выражение, которое возвращает пустой список, или дедуплицировать столбец по псевдониму (`ALIAS`), то сервер вернет ошибку.
+
+
+**Примеры**
+
+Рассмотрим таблицу:
+
+``` sql
+CREATE TABLE example (
+    primary_key Int32,
+    secondary_key Int32,
+    value UInt32,
+    partition_key UInt32,
+    materialized_value UInt32 MATERIALIZED 12345,
+    aliased_value UInt32 ALIAS 2,
+    PRIMARY KEY primary_key
+) ENGINE=MergeTree
+PARTITION BY partition_key;
+```
+
+Прежний способ дедупликации, когда учитываются все столбцы. Строка удаляется только в том случае, если все значения во всех столбцах равны соответствующим значениям в предыдущей строке.
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE;
+```
+
+Дедупликация по всем столбцам, кроме `ALIAS` и `MATERIALIZED`: `primary_key`, `secondary_key`, `value`, `partition_key` и `materialized_value`.
+
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY *;
+```
+
+Дедупликация по всем столбцам, кроме `ALIAS`, `MATERIALIZED` и `materialized_value`: столбцы `primary_key`, `secondary_key`, `value` и `partition_key`.
+
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY * EXCEPT materialized_value;
+```
+
+Дедупликация по столбцам `primary_key`, `secondary_key` и `partition_key`.
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY primary_key, secondary_key, partition_key;
+```
+
+Дедупликация по любому столбцу, соответствующему регулярному выражению: столбцам `primary_key`, `secondary_key` и `partition_key`.
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY COLUMNS('.*_key');
+```
--- a/docs/ru/sql-reference/table-functions/s3.md
+++ b/docs/ru/sql-reference/table-functions/s3.md
@ -18,7 +18,7 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres
 -   `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `'abc', 'def'` — строки. Подробнее смотри [здесь](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
 -   `format` — [формат](../../interfaces/formats.md#formats) файла.
 -   `structure` — cтруктура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`.
-   `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. 
+-   `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Необязательный параметр. 

 **Возвращаемые значения**

--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@ -33,8 +33,12 @@ option (ENABLE_CLICKHOUSE_OBFUSCATOR "Table data obfuscator (convert real data t
    ${ENABLE_CLICKHOUSE_ALL})

 # https://clickhouse.tech/docs/en/operations/utilities/odbc-bridge/
-option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver"
-    ${ENABLE_CLICKHOUSE_ALL})
+if (ENABLE_ODBC)
+    option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver"
+        ${ENABLE_CLICKHOUSE_ALL})
+else ()
+    option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver" OFF)
+endif ()

 option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to Library dictionary source"
    ${ENABLE_CLICKHOUSE_ALL})
--- a/programs/server/.gitignore
+++ b/programs/server/.gitignore
@ -1,8 +1,11 @@
-/access
-/dictionaries_lib
-/flags
-/format_schemas
+/metadata
 /metadata_dropped
+/data
+/store
+/access
+/flags
+/dictionaries_lib
+/format_schemas
 /preprocessed_configs
 /shadow
 /tmp
--- a/programs/server/CMakeLists.txt
+++ b/programs/server/CMakeLists.txt
@ -19,6 +19,7 @@ set (CLICKHOUSE_SERVER_LINK
        clickhouse_storages_system
        clickhouse_table_functions
        string_utils
+        jemalloc

    ${LINK_RESOURCE_LIB}

--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -101,6 +101,10 @@
 #   include <Server/KeeperTCPHandlerFactory.h>
 #endif

+#if USE_JEMALLOC
+#    include <jemalloc/jemalloc.h>
+#endif
+
 namespace CurrentMetrics
 {
    extern const Metric Revision;
@ -109,11 +113,35 @@ namespace CurrentMetrics
    extern const Metric MaxDDLEntryID;
 }

+#if USE_JEMALLOC
+static bool jemallocOptionEnabled(const char *name)
+{
+    bool value;
+    size_t size = sizeof(value);
+
+    if (mallctl(name, reinterpret_cast<void *>(&value), &size, /* newp= */ nullptr, /* newlen= */ 0))
+        throw Poco::SystemException("mallctl() failed");
+
+    return value;
+}
+#else
+static bool jemallocOptionEnabled(const char *) { return 0; }
+#endif
+

 int mainEntryClickHouseServer(int argc, char ** argv)
 {
    DB::Server app;

+    if (jemallocOptionEnabled("opt.background_thread"))
+    {
+        LOG_ERROR(&app.logger(),
+            "jemalloc.background_thread was requested, "
+            "however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, "
+            "and also background_thread is not compatible with ClickHouse watchdog "
+            "(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)");
+    }
+
    /// Do not fork separate process from watchdog if we attached to terminal.
    /// Otherwise it breaks gdb usage.
    /// Can be overridden by environment variable (cannot use server config at this moment).
--- a/programs/server/data/.gitignore
+++ b/programs/server/data/.gitignore
@ -1,3 +0,0 @@
-*.txt
-*.dat
-*.idx
--- a/programs/server/metadata/.gitignore
+++ b/programs/server/metadata/.gitignore
@ -1 +0,0 @@
-*.sql
--- a/src/AggregateFunctions/AggregateFunctionAvg.h
+++ b/src/AggregateFunctions/AggregateFunctionAvg.h
@ -96,7 +96,7 @@ public:
        UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0)
        : Base(argument_types_, {}), num_scale(num_scale_), denom_scale(denom_scale_) {}

-    DataTypePtr getReturnType() const final { return std::make_shared<DataTypeNumber<Float64>>(); }
+    DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<Float64>>(); }

    bool allocatesMemoryInArena() const override { return false; }

--- a/src/AggregateFunctions/AggregateFunctionSumCount.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumCount.cpp
@ -0,0 +1,49 @@
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/AggregateFunctionSumCount.h>
+#include <AggregateFunctions/Helpers.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+#include "registerAggregateFunctions.h"
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+bool allowType(const DataTypePtr& type) noexcept
+{
+    const WhichDataType t(type);
+    return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
+}
+
+AggregateFunctionPtr createAggregateFunctionSumCount(const std::string & name, const DataTypes & argument_types, const Array & parameters)
+{
+    assertNoParameters(name, parameters);
+    assertUnary(name, argument_types);
+
+    AggregateFunctionPtr res;
+    DataTypePtr data_type = argument_types[0];
+    if (!allowType(data_type))
+        throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    if (isDecimal(data_type))
+        res.reset(createWithDecimalType<AggregateFunctionSumCount>(
+            *data_type, argument_types, getDecimalScale(*data_type)));
+    else
+        res.reset(createWithNumericType<AggregateFunctionSumCount>(*data_type, argument_types));
+
+    return res;
+}
+
+}
+
+void registerAggregateFunctionSumCount(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("sumCount", createAggregateFunctionSumCount);
+}
+
+}
--- a/src/AggregateFunctions/AggregateFunctionSumCount.h
+++ b/src/AggregateFunctions/AggregateFunctionSumCount.h
@ -0,0 +1,55 @@
+#pragma once
+
+#include <type_traits>
+#include <DataTypes/DataTypeTuple.h>
+#include <AggregateFunctions/AggregateFunctionAvg.h>
+
+
+namespace DB
+{
+template <typename T>
+using DecimalOrNumberDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<AvgFieldType<T>>, DataTypeNumber<AvgFieldType<T>>>;
+template <typename T>
+class AggregateFunctionSumCount final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionSumCount<T>>
+{
+public:
+    using Base = AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionSumCount<T>>;
+
+    AggregateFunctionSumCount(const DataTypes & argument_types_, UInt32 num_scale_ = 0)
+         : Base(argument_types_, num_scale_), scale(num_scale_) {}
+
+    DataTypePtr getReturnType() const override
+    {
+        DataTypes types;
+        if constexpr (IsDecimalNumber<T>)
+            types.emplace_back(std::make_shared<DecimalOrNumberDataType<T>>(DecimalOrNumberDataType<T>::maxPrecision(), scale));
+        else
+            types.emplace_back(std::make_shared<DecimalOrNumberDataType<T>>());
+
+        types.emplace_back(std::make_shared<DataTypeUInt64>());
+
+        return std::make_shared<DataTypeTuple>(types);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const final
+    {
+        assert_cast<DecimalOrVectorCol<AvgFieldType<T>> &>((assert_cast<ColumnTuple &>(to)).getColumn(0)).getData().push_back(
+            this->data(place).numerator);
+
+        assert_cast<ColumnUInt64 &>((assert_cast<ColumnTuple &>(to)).getColumn(1)).getData().push_back(
+            this->data(place).denominator);
+    }
+
+    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final
+    {
+        this->data(place).numerator += static_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
+        ++this->data(place).denominator;
+    }
+
+    String getName() const final { return "sumCount"; }
+
+private:
+    UInt32 scale;
+};
+
+}
--- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
+++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
@ -17,7 +17,7 @@
 #include <IO/WriteHelpers.h>


-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
 #endif
@ -280,7 +280,7 @@ public:

 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/AggregateFunctions/ReservoirSamplerDeterministic.h
+++ b/src/AggregateFunctions/ReservoirSamplerDeterministic.h
@ -163,7 +163,7 @@ public:
        sorted = false;
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wclass-memaccess"
 #endif
@ -191,7 +191,7 @@ public:
        }
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@ -25,6 +25,7 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory &);
 void registerAggregateFunctionsStatisticsStable(AggregateFunctionFactory &);
 void registerAggregateFunctionsStatisticsSimple(AggregateFunctionFactory &);
 void registerAggregateFunctionSum(AggregateFunctionFactory &);
+void registerAggregateFunctionSumCount(AggregateFunctionFactory &);
 void registerAggregateFunctionSumMap(AggregateFunctionFactory &);
 void registerAggregateFunctionsUniq(AggregateFunctionFactory &);
 void registerAggregateFunctionUniqCombined(AggregateFunctionFactory &);
@ -83,6 +84,7 @@ void registerAggregateFunctions()
        registerAggregateFunctionsStatisticsStable(factory);
        registerAggregateFunctionsStatisticsSimple(factory);
        registerAggregateFunctionSum(factory);
+        registerAggregateFunctionSumCount(factory);
        registerAggregateFunctionSumMap(factory);
        registerAggregateFunctionsUniq(factory);
        registerAggregateFunctionUniqCombined(factory);
--- a/src/AggregateFunctions/ya.make
+++ b/src/AggregateFunctions/ya.make
@ -50,6 +50,7 @@ SRCS(
    AggregateFunctionStatisticsSimple.cpp
    AggregateFunctionStudentTTest.cpp
    AggregateFunctionSum.cpp
+    AggregateFunctionSumCount.cpp
    AggregateFunctionSumMap.cpp
    AggregateFunctionTopK.cpp
    AggregateFunctionUniq.cpp
--- a/src/Client/MultiplexedConnections.cpp
+++ b/src/Client/MultiplexedConnections.cpp
@ -13,6 +13,7 @@ namespace ErrorCodes
    extern const int MISMATCH_REPLICAS_DATA_SOURCES;
    extern const int NO_AVAILABLE_REPLICA;
    extern const int TIMEOUT_EXCEEDED;
+    extern const int UNKNOWN_PACKET_FROM_SERVER;
 }


@ -278,7 +279,22 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
    Packet packet;
    {
        AsyncCallbackSetter async_setter(current_connection, std::move(async_callback));
-        packet = current_connection->receivePacket();
+
+        try
+        {
+            packet = current_connection->receivePacket();
+        }
+        catch (Exception & e)
+        {
+            if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
+            {
+                /// Exception may happen when packet is received, e.g. when got unknown packet.
+                /// In this case, invalidate replica, so that we would not read from it anymore.
+                current_connection->disconnect();
+                invalidateReplica(state);
+            }
+            throw;
+        }
    }

    switch (packet.type)
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@ -111,7 +111,7 @@ public:
    }

 /// Suppress gcc 7.3.1 warning: '*((void*)&<anonymous> +8)' may be used uninitialized in this function
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -128,7 +128,7 @@ public:
        offsets.push_back(new_size);
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Common/Allocator.h
+++ b/src/Common/Allocator.h
@ -277,7 +277,7 @@ private:
  *  GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
  * In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
  */
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wfree-nonheap-object"
 #endif
@ -359,6 +359,6 @@ extern template class Allocator<true, false>;
 extern template class Allocator<false, true>;
 extern template class Allocator<true, true>;

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
--- a/src/Common/UInt128.h
+++ b/src/Common/UInt128.h
@ -19,7 +19,7 @@ namespace DB
 struct UInt128
 {
 /// Suppress gcc7 warnings: 'prev_key.DB::UInt128::low' may be used uninitialized in this function
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -92,7 +92,7 @@ struct UInt128
            return static_cast<T>(low);
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

@ -150,7 +150,7 @@ struct DummyUInt256
 {

 /// Suppress gcc7 warnings: 'prev_key.DB::UInt256::a' may be used uninitialized in this function
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -179,7 +179,7 @@ struct DummyUInt256
    bool operator== (const UInt64 rhs) const { return a == rhs && b == 0 && c == 0 && d == 0; }
    bool operator!= (const UInt64 rhs) const { return !operator==(rhs); }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Common/ZooKeeper/IKeeper.h
+++ b/src/Common/ZooKeeper/IKeeper.h
@ -116,6 +116,7 @@ struct Request
    virtual ~Request() = default;
    virtual String getPath() const = 0;
    virtual void addRootPath(const String & /* root_path */) {}
+    virtual size_t bytesSize() const { return 0; }
 };

 struct Response;
@ -131,6 +132,7 @@ struct Response
    Response & operator=(const Response &) = default;
    virtual ~Response() = default;
    virtual void removeRootPath(const String & /* root_path */) {}
+    virtual size_t bytesSize() const { return 0; }
 };

 struct WatchResponse : virtual Response
@ -140,6 +142,8 @@ struct WatchResponse : virtual Response
    String path;

    void removeRootPath(const String & root_path) override;
+
+    size_t bytesSize() const override { return path.size() + sizeof(type) + sizeof(state); }
 };

 using WatchCallback = std::function<void(const WatchResponse &)>;
@ -154,6 +158,9 @@ struct CreateRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size() + data.size()
+            + sizeof(is_ephemeral) + sizeof(is_sequential) + acls.size() * sizeof(ACL); }
 };

 struct CreateResponse : virtual Response
@ -161,6 +168,8 @@ struct CreateResponse : virtual Response
    String path_created;

    void removeRootPath(const String & root_path) override;
+
+    size_t bytesSize() const override { return path_created.size(); }
 };

 struct RemoveRequest : virtual Request
@ -170,6 +179,8 @@ struct RemoveRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size() + sizeof(version); }
 };

 struct RemoveResponse : virtual Response
@ -182,11 +193,15 @@ struct ExistsRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size(); }
 };

 struct ExistsResponse : virtual Response
 {
    Stat stat;
+
+    size_t bytesSize() const override { return sizeof(Stat); }
 };

 struct GetRequest : virtual Request
@ -195,12 +210,16 @@ struct GetRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size(); }
 };

 struct GetResponse : virtual Response
 {
    String data;
    Stat stat;
+
+    size_t bytesSize() const override { return data.size() + sizeof(stat); }
 };

 struct SetRequest : virtual Request
@ -211,11 +230,15 @@ struct SetRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return data.size() + data.size() + sizeof(version); }
 };

 struct SetResponse : virtual Response
 {
    Stat stat;
+
+    size_t bytesSize() const override { return sizeof(stat); }
 };

 struct ListRequest : virtual Request
@ -224,12 +247,22 @@ struct ListRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size(); }
 };

 struct ListResponse : virtual Response
 {
    std::vector<String> names;
    Stat stat;
+
+    size_t bytesSize() const override
+    {
+        size_t size = sizeof(stat);
+        for (const auto & name : names)
+            size += name.size();
+        return size;
+    }
 };

 struct CheckRequest : virtual Request
@ -239,6 +272,8 @@ struct CheckRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size() + sizeof(version); }
 };

 struct CheckResponse : virtual Response
@ -251,6 +286,14 @@ struct MultiRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return {}; }
+
+    size_t bytesSize() const override
+    {
+        size_t size = 0;
+        for (const auto & request : requests)
+            size += request->bytesSize();
+        return size;
+    }
 };

 struct MultiResponse : virtual Response
@ -258,6 +301,14 @@ struct MultiResponse : virtual Response
    Responses responses;

    void removeRootPath(const String & root_path) override;
+
+    size_t bytesSize() const override
+    {
+        size_t size = 0;
+        for (const auto & response : responses)
+            size += response->bytesSize();
+        return size;
+    }
 };

 /// This response may be received only as an element of responses in MultiResponse.
--- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
@ -455,6 +455,39 @@ ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const { return std::m
 ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const { return std::make_shared<ZooKeeperMultiResponse>(requests); }
 ZooKeeperResponsePtr ZooKeeperCloseRequest::makeResponse() const { return std::make_shared<ZooKeeperCloseResponse>(); }

+void ZooKeeperSessionIDRequest::writeImpl(WriteBuffer & out) const
+{
+    Coordination::write(internal_id, out);
+    Coordination::write(session_timeout_ms, out);
+    Coordination::write(server_id, out);
+}
+
+void ZooKeeperSessionIDRequest::readImpl(ReadBuffer & in)
+{
+    Coordination::read(internal_id, in);
+    Coordination::read(session_timeout_ms, in);
+    Coordination::read(server_id, in);
+}
+
+Coordination::ZooKeeperResponsePtr ZooKeeperSessionIDRequest::makeResponse() const
+{
+    return std::make_shared<ZooKeeperSessionIDResponse>();
+}
+
+void ZooKeeperSessionIDResponse::readImpl(ReadBuffer & in)
+{
+    Coordination::read(internal_id, in);
+    Coordination::read(session_id, in);
+    Coordination::read(server_id, in);
+}
+
+void ZooKeeperSessionIDResponse::writeImpl(WriteBuffer & out) const
+{
+    Coordination::write(internal_id, out);
+    Coordination::write(session_id, out);
+    Coordination::write(server_id, out);
+}
+
 void ZooKeeperRequestFactory::registerRequest(OpNum op_num, Creator creator)
 {
    if (!op_num_to_request.try_emplace(op_num, creator).second)
@ -511,6 +544,7 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory()
    registerZooKeeperRequest<OpNum::List, ZooKeeperListRequest>(*this);
    registerZooKeeperRequest<OpNum::Check, ZooKeeperCheckRequest>(*this);
    registerZooKeeperRequest<OpNum::Multi, ZooKeeperMultiRequest>(*this);
+    registerZooKeeperRequest<OpNum::SessionID, ZooKeeperSessionIDRequest>(*this);
 }

 }
--- a/src/Common/ZooKeeper/ZooKeeperCommon.h
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.h
@ -84,6 +84,8 @@ struct ZooKeeperSyncRequest final : ZooKeeperRequest
    void readImpl(ReadBuffer & in) override;
    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return false; }
+
+    size_t bytesSize() const override { return ZooKeeperRequest::bytesSize() + path.size(); }
 };

 struct ZooKeeperSyncResponse final : ZooKeeperResponse
@ -92,6 +94,8 @@ struct ZooKeeperSyncResponse final : ZooKeeperResponse
    void readImpl(ReadBuffer & in) override;
    void writeImpl(WriteBuffer & out) const override;
    OpNum getOpNum() const override { return OpNum::Sync; }
+
+    size_t bytesSize() const override { return path.size(); }
 };

 struct ZooKeeperHeartbeatResponse final : ZooKeeperResponse
@ -128,6 +132,9 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return false; }
+
+    size_t bytesSize() const override { return ZooKeeperRequest::bytesSize() + sizeof(xid) +
+            sizeof(type) + scheme.size() + data.size(); }
 };

 struct ZooKeeperAuthResponse final : ZooKeeperResponse
@ -136,6 +143,8 @@ struct ZooKeeperAuthResponse final : ZooKeeperResponse
    void writeImpl(WriteBuffer &) const override {}

    OpNum getOpNum() const override { return OpNum::Auth; }
+
+    size_t bytesSize() const override { return ZooKeeperResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperCloseRequest final : ZooKeeperRequest
@ -172,6 +181,8 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return false; }
+
+    size_t bytesSize() const override { return CreateRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse
@ -181,6 +192,8 @@ struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse
    void writeImpl(WriteBuffer & out) const override;

    OpNum getOpNum() const override { return OpNum::Create; }
+
+    size_t bytesSize() const override { return CreateResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest
@ -194,6 +207,8 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return false; }
+
+    size_t bytesSize() const override { return RemoveRequest::bytesSize() + sizeof(xid); }
 };

 struct ZooKeeperRemoveResponse final : RemoveResponse, ZooKeeperResponse
@ -201,6 +216,8 @@ struct ZooKeeperRemoveResponse final : RemoveResponse, ZooKeeperResponse
    void readImpl(ReadBuffer &) override {}
    void writeImpl(WriteBuffer &) const override {}
    OpNum getOpNum() const override { return OpNum::Remove; }
+
+    size_t bytesSize() const override { return RemoveResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest
@ -211,6 +228,8 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return !has_watch; }
+
+    size_t bytesSize() const override { return ExistsRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperExistsResponse final : ExistsResponse, ZooKeeperResponse
@ -218,6 +237,8 @@ struct ZooKeeperExistsResponse final : ExistsResponse, ZooKeeperResponse
    void readImpl(ReadBuffer & in) override;
    void writeImpl(WriteBuffer & out) const override;
    OpNum getOpNum() const override { return OpNum::Exists; }
+
+    size_t bytesSize() const override { return ExistsResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest
@ -228,6 +249,8 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return !has_watch; }
+
+    size_t bytesSize() const override { return GetRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperGetResponse final : GetResponse, ZooKeeperResponse
@ -235,6 +258,8 @@ struct ZooKeeperGetResponse final : GetResponse, ZooKeeperResponse
    void readImpl(ReadBuffer & in) override;
    void writeImpl(WriteBuffer & out) const override;
    OpNum getOpNum() const override { return OpNum::Get; }
+
+    size_t bytesSize() const override { return GetResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest
@ -247,6 +272,8 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest
    void readImpl(ReadBuffer & in) override;
    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return false; }
+
+    size_t bytesSize() const override { return SetRequest::bytesSize() + sizeof(xid); }
 };

 struct ZooKeeperSetResponse final : SetResponse, ZooKeeperResponse
@ -254,6 +281,8 @@ struct ZooKeeperSetResponse final : SetResponse, ZooKeeperResponse
    void readImpl(ReadBuffer & in) override;
    void writeImpl(WriteBuffer & out) const override;
    OpNum getOpNum() const override { return OpNum::Set; }
+
+    size_t bytesSize() const override { return SetResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest
@ -263,6 +292,8 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest
    void readImpl(ReadBuffer & in) override;
    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return !has_watch; }
+
+    size_t bytesSize() const override { return ListRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperSimpleListRequest final : ZooKeeperListRequest
@ -275,6 +306,8 @@ struct ZooKeeperListResponse : ListResponse, ZooKeeperResponse
    void readImpl(ReadBuffer & in) override;
    void writeImpl(WriteBuffer & out) const override;
    OpNum getOpNum() const override { return OpNum::List; }
+
+    size_t bytesSize() const override { return ListResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperSimpleListResponse final : ZooKeeperListResponse
@ -293,6 +326,8 @@ struct ZooKeeperCheckRequest final : CheckRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return !has_watch; }
+
+    size_t bytesSize() const override { return CheckRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperCheckResponse final : CheckResponse, ZooKeeperResponse
@ -300,6 +335,8 @@ struct ZooKeeperCheckResponse final : CheckResponse, ZooKeeperResponse
    void readImpl(ReadBuffer &) override {}
    void writeImpl(WriteBuffer &) const override {}
    OpNum getOpNum() const override { return OpNum::Check; }
+
+    size_t bytesSize() const override { return CheckResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 /// This response may be received only as an element of responses in MultiResponse.
@ -309,6 +346,8 @@ struct ZooKeeperErrorResponse final : ErrorResponse, ZooKeeperResponse
    void writeImpl(WriteBuffer & out) const override;

    OpNum getOpNum() const override { return OpNum::Error; }
+
+    size_t bytesSize() const override { return ErrorResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest
@ -323,6 +362,8 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override;
+
+    size_t bytesSize() const override { return MultiRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperMultiResponse final : MultiResponse, ZooKeeperResponse
@ -346,6 +387,41 @@ struct ZooKeeperMultiResponse final : MultiResponse, ZooKeeperResponse

    void writeImpl(WriteBuffer & out) const override;

+    size_t bytesSize() const override { return MultiResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
+};
+
+/// Fake internal coordination (keeper) response. Never received from client
+/// and never send to client.
+struct ZooKeeperSessionIDRequest final : ZooKeeperRequest
+{
+    int64_t internal_id;
+    int64_t session_timeout_ms;
+    /// Who requested this session
+    int32_t server_id;
+
+    Coordination::OpNum getOpNum() const override { return OpNum::SessionID; }
+    String getPath() const override { return {}; }
+    void writeImpl(WriteBuffer & out) const override;
+    void readImpl(ReadBuffer & in) override;
+
+    Coordination::ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
+};
+
+/// Fake internal coordination (keeper) response. Never received from client
+/// and never send to client.
+struct ZooKeeperSessionIDResponse final : ZooKeeperResponse
+{
+    int64_t internal_id;
+    int64_t session_id;
+    /// Who requested this session
+    int32_t server_id;
+
+    void readImpl(ReadBuffer & in) override;
+
+    void writeImpl(WriteBuffer & out) const override;
+
+    Coordination::OpNum getOpNum() const override { return OpNum::SessionID; }
 };

 class ZooKeeperRequestFactory final : private boost::noncopyable
--- a/src/Common/ZooKeeper/ZooKeeperConstants.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperConstants.cpp
@ -21,6 +21,7 @@ static const std::unordered_set<int32_t> VALID_OPERATIONS =
    static_cast<int32_t>(OpNum::Check),
    static_cast<int32_t>(OpNum::Multi),
    static_cast<int32_t>(OpNum::Auth),
+    static_cast<int32_t>(OpNum::SessionID),
 };

 std::string toString(OpNum op_num)
@ -55,6 +56,8 @@ std::string toString(OpNum op_num)
            return "Heartbeat";
        case OpNum::Auth:
            return "Auth";
+        case OpNum::SessionID:
+            return "SessionID";
    }
    int32_t raw_op = static_cast<int32_t>(op_num);
    throw Exception("Operation " + std::to_string(raw_op) + " is unknown", Error::ZUNIMPLEMENTED);
--- a/src/Common/ZooKeeper/ZooKeeperConstants.h
+++ b/src/Common/ZooKeeper/ZooKeeperConstants.h
@ -30,6 +30,7 @@ enum class OpNum : int32_t
    Check = 13,
    Multi = 14,
    Auth = 100,
+    SessionID = 997, /// Special internal request
 };

 std::string toString(OpNum op_num);
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@ -1012,6 +1012,16 @@ void ZooKeeper::pushRequest(RequestInfo && info)
    ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
 }

+void ZooKeeper::executeGenericRequest(
+    const ZooKeeperRequestPtr & request,
+    ResponseCallback callback)
+{
+    RequestInfo request_info;
+    request_info.request = request;
+    request_info.callback = callback;
+
+    pushRequest(std::move(request_info));
+}

 void ZooKeeper::create(
    const String & path,
--- a/src/Common/ZooKeeper/ZooKeeperImpl.h
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.h
@ -121,6 +121,9 @@ public:
    /// Useful to check owner of ephemeral node.
    int64_t getSessionID() const override { return session_id; }

+    void executeGenericRequest(
+        const ZooKeeperRequestPtr & request,
+        ResponseCallback callback);

    /// See the documentation about semantics of these methods in IKeeper class.

--- a/src/Common/parseRemoteDescription.cpp
+++ b/src/Common/parseRemoteDescription.cpp
@ -181,7 +181,7 @@ std::vector<std::pair<String, uint16_t>> parseRemoteDescriptionForExternalDataba
        size_t colon = address.find(':');
        if (colon == String::npos)
        {
-            LOG_WARNING(&Poco::Logger::get("ParseRemoteDescription"), "Port is not found for host: {}. Using default port {}", default_port);
+            LOG_WARNING(&Poco::Logger::get("ParseRemoteDescription"), "Port is not found for host: {}. Using default port {}", address, default_port);
            result.emplace_back(std::make_pair(address, default_port));
        }
        else
--- a/src/Common/tests/compact_array.cpp
+++ b/src/Common/tests/compact_array.cpp
@ -1,5 +1,5 @@
 /// Bug in GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
 #endif
@ -263,6 +263,6 @@ int main()
    return 0;
 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
--- a/src/Common/tests/parallel_aggregation.cpp
+++ b/src/Common/tests/parallel_aggregation.cpp
@ -69,7 +69,7 @@ static void aggregate1(Map & map, Source::const_iterator begin, Source::const_it
        ++map[*it];
 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -122,7 +122,7 @@ static void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source:
    }
 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Common/tests/parallel_aggregation2.cpp
+++ b/src/Common/tests/parallel_aggregation2.cpp
@ -62,7 +62,7 @@ struct AggregateIndependent
    }
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -115,7 +115,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
    }
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

@ -265,7 +265,7 @@ struct Creator
    void operator()(Value &) const {}
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -275,7 +275,7 @@ struct Updater
    void operator()(Value & x) const { ++x; }
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@ -80,7 +80,7 @@ public:
    {}


-    off_t appendRecord(ChangelogRecord && record, bool sync)
+    off_t appendRecord(ChangelogRecord && record)
    {
        off_t result = plain_buf.count();
        writeIntBinary(computeRecordChecksum(record), plain_buf);
@ -96,23 +96,21 @@ public:

        entries_written++;

-        if (sync)
-            plain_buf.sync();
-        else
-            plain_buf.next();
        return result;
    }

    void truncateToLength(off_t new_length)
    {
-        flush();
+        plain_buf.next();
        plain_buf.truncate(new_length);
        plain_buf.seek(new_length, SEEK_SET);
    }

-    void flush()
+    void flush(bool force_fsync)
    {
-        plain_buf.sync();
+        plain_buf.next();
+        if (force_fsync)
+            plain_buf.sync();
    }

    uint64_t getEntriesWritten() const
@ -247,9 +245,14 @@ private:
    ReadBufferFromFile read_buf;
 };

-Changelog::Changelog(const std::string & changelogs_dir_, uint64_t rotate_interval_, Poco::Logger * log_)
+Changelog::Changelog(
+    const std::string & changelogs_dir_,
+    uint64_t rotate_interval_,
+    bool force_sync_,
+    Poco::Logger * log_)
    : changelogs_dir(changelogs_dir_)
    , rotate_interval(rotate_interval_)
+    , force_sync(force_sync_)
    , log(log_)
 {
    namespace fs = std::filesystem;
@ -357,6 +360,9 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin

 void Changelog::rotate(uint64_t new_start_log_index)
 {
+    /// Flush previous log
+    flush();
+
    ChangelogFileDescription new_description;
    new_description.prefix = DEFAULT_PREFIX;
    new_description.from_log_index = new_start_log_index;
@ -387,7 +393,7 @@ ChangelogRecord Changelog::buildRecord(uint64_t index, const LogEntryPtr & log_e
    return record;
 }

-void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry, bool force_sync)
+void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry)
 {
    if (!current_writer)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records");
@ -398,14 +404,14 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry, bool
    if (current_writer->getEntriesWritten() == rotate_interval)
        rotate(index);

-    auto offset = current_writer->appendRecord(buildRecord(index, log_entry), force_sync);
+    auto offset = current_writer->appendRecord(buildRecord(index, log_entry));
    if (!index_to_start_pos.try_emplace(index, offset).second)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Record with index {} already exists", index);

    logs[index] = makeClone(log_entry);
 }

-void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry, bool force_sync)
+void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry)
 {
    if (index_to_start_pos.count(index) == 0)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot write at index {} because changelog doesn't contain it", index);
@ -451,7 +457,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry, bool forc

    current_writer->setEntriesWritten(entries_written);

-    appendEntry(index, log_entry, force_sync);
+    appendEntry(index, log_entry);
 }

 void Changelog::compact(uint64_t up_to_log_index)
@ -540,7 +546,7 @@ nuraft::ptr<nuraft::buffer> Changelog::serializeEntriesToBuffer(uint64_t index,
    return buf_out;
 }

-void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer, bool force_sync)
+void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer)
 {
    buffer.pos(0);
    int num_logs = buffer.get_int();
@ -555,23 +561,23 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer,

        LogEntryPtr log_entry = nuraft::log_entry::deserialize(*buf_local);
        if (i == 0 && logs.count(cur_index))
-            writeAt(cur_index, log_entry, force_sync);
+            writeAt(cur_index, log_entry);
        else
-            appendEntry(cur_index, log_entry, force_sync);
+            appendEntry(cur_index, log_entry);
    }
 }

 void Changelog::flush()
 {
-    current_writer->flush();
+    if (current_writer)
+        current_writer->flush(force_sync);
 }

 Changelog::~Changelog()
 {
    try
    {
-        if (current_writer)
-            current_writer->flush();
+        flush();
    }
    catch (...)
    {
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@ -63,17 +63,17 @@ class Changelog
 {

 public:
-    Changelog(const std::string & changelogs_dir_, uint64_t rotate_interval_, Poco::Logger * log_);
+    Changelog(const std::string & changelogs_dir_, uint64_t rotate_interval_, bool force_sync_, Poco::Logger * log_);

    /// Read changelog from files on changelogs_dir_ skipping all entries before from_log_index
    /// Truncate broken entries, remove files after broken entries.
    void readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep);

-    /// Add entry to log with index. Call fsync if force_sync true.
-    void appendEntry(uint64_t index, const LogEntryPtr & log_entry, bool force_sync);
+    /// Add entry to log with index.
+    void appendEntry(uint64_t index, const LogEntryPtr & log_entry);

    /// Write entry at index and truncate all subsequent entries.
-    void writeAt(uint64_t index, const LogEntryPtr & log_entry, bool force_sync);
+    void writeAt(uint64_t index, const LogEntryPtr & log_entry);

    /// Remove log files with to_log_index <= up_to_log_index.
    void compact(uint64_t up_to_log_index);
@ -101,9 +101,9 @@ public:
    BufferPtr serializeEntriesToBuffer(uint64_t index, int32_t count);

    /// Apply entries from buffer overriding existing entries
-    void applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer, bool force_sync);
+    void applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer);

-    /// Fsync log to disk
+    /// Fsync latest log to disk and flush buffer
    void flush();

    uint64_t size() const
@ -124,6 +124,7 @@ private:
 private:
    const std::string changelogs_dir;
    const uint64_t rotate_interval;
+    const bool force_sync;
    Poco::Logger * log;

    std::map<uint64_t, ChangelogFileDescription> existing_changelogs;
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@ -22,18 +22,19 @@ struct Settings;
    M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
    M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
    M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Lower bound of election timer (avoid too often leader elections)", 0) \
-    M(UInt64, reserved_log_items, 10000, "How many log items to store (don't remove during compaction)", 0) \
-    M(UInt64, snapshot_distance, 10000, "How many log items we have to collect to write new snapshot", 0) \
+    M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
+    M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
    M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
    M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0) \
    M(Milliseconds, startup_timeout, 30000, "How many time we will until RAFT to start", 0) \
    M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
-    M(UInt64, rotate_log_storage_interval, 10000, "How many records will be stored in one log storage file", 0) \
+    M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
    M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
    M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
    M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
+    M(UInt64, max_requests_batch_size, 100, "Max size of batch in requests count before it will be sent to RAFT", 0) \
    M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
-    M(Bool, force_sync, true, " Call fsync on each change in RAFT changelog", 0)
+    M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0)

 DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)

--- a/src/Coordination/KeeperLogStore.cpp
+++ b/src/Coordination/KeeperLogStore.cpp
@ -5,9 +5,12 @@ namespace DB

 KeeperLogStore::KeeperLogStore(const std::string & changelogs_path, uint64_t rotate_interval_, bool force_sync_)
    : log(&Poco::Logger::get("KeeperLogStore"))
-    , changelog(changelogs_path, rotate_interval_, log)
-    , force_sync(force_sync_)
+    , changelog(changelogs_path, rotate_interval_, force_sync_, log)
 {
+    if (force_sync_)
+        LOG_INFO(log, "force_sync enabled");
+    else
+        LOG_INFO(log, "force_sync disabled");
 }

 uint64_t KeeperLogStore::start_index() const
@ -38,7 +41,7 @@ uint64_t KeeperLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
 {
    std::lock_guard lock(changelog_lock);
    uint64_t idx = changelog.getNextEntryIndex();
-    changelog.appendEntry(idx, entry, force_sync);
+    changelog.appendEntry(idx, entry);
    return idx;
 }

@ -46,7 +49,7 @@ uint64_t KeeperLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
 void KeeperLogStore::write_at(uint64_t index, nuraft::ptr<nuraft::log_entry> & entry)
 {
    std::lock_guard lock(changelog_lock);
-    changelog.writeAt(index, entry, force_sync);
+    changelog.writeAt(index, entry);
 }

 nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> KeeperLogStore::log_entries(uint64_t start, uint64_t end)
@ -93,7 +96,7 @@ bool KeeperLogStore::flush()
 void KeeperLogStore::apply_pack(uint64_t index, nuraft::buffer & pack)
 {
    std::lock_guard lock(changelog_lock);
-    changelog.applyEntriesFromBuffer(index, pack, force_sync);
+    changelog.applyEntriesFromBuffer(index, pack);
 }

 uint64_t KeeperLogStore::size() const
@ -102,4 +105,10 @@ uint64_t KeeperLogStore::size() const
    return changelog.size();
 }

+void KeeperLogStore::end_of_append_batch(uint64_t /*start_index*/, uint64_t /*count*/)
+{
+    std::lock_guard lock(changelog_lock);
+    changelog.flush();
+}
+
 }
--- a/src/Coordination/KeeperLogStore.h
+++ b/src/Coordination/KeeperLogStore.h
@ -42,11 +42,12 @@ public:

    uint64_t size() const;

+    void end_of_append_batch(uint64_t start_index, uint64_t count) override;
+
 private:
    mutable std::mutex changelog_lock;
    Poco::Logger * log;
    Changelog changelog;
-    bool force_sync;
 };

 }
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@ -24,6 +24,7 @@ namespace ErrorCodes
    extern const int RAFT_ERROR;
    extern const int NO_ELEMENTS_IN_CONFIG;
    extern const int SUPPORT_IS_DISABLED;
+    extern const int LOGICAL_ERROR;
 }

 namespace
@ -73,7 +74,6 @@ KeeperServer::KeeperServer(
                        config.getString("keeper_server.snapshot_storage_path", config.getString("path", DBMS_DEFAULT_PATH) + "coordination/snapshots"),
                        coordination_settings))
    , state_manager(nuraft::cs_new<KeeperStateManager>(server_id, "keeper_server", config, coordination_settings))
-    , responses_queue(responses_queue_)
    , log(&Poco::Logger::get("KeeperServer"))
 {
    if (coordination_settings->quorum_reads)
@ -111,7 +111,7 @@ void KeeperServer::startup()
    params.auto_forwarding_ = coordination_settings->auto_forwarding;
    params.auto_forwarding_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds() * 2;

-    params.return_method_ = nuraft::raft_params::blocking;
+    params.return_method_ = nuraft::raft_params::async_handler;

    nuraft::asio_service::options asio_opts{};
    if (state_manager->isSecure())
@ -222,75 +222,26 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coord

 }

-void KeeperServer::putRequest(const KeeperStorage::RequestForSession & request_for_session)
+
+void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession & request_for_session)
 {
-    auto [session_id, request] = request_for_session;
-    if (!coordination_settings->quorum_reads && isLeaderAlive() && request->isReadRequest())
-    {
-        state_machine->processReadRequest(request_for_session);
-    }
-    else
-    {
-        std::vector<nuraft::ptr<nuraft::buffer>> entries;
-        entries.push_back(getZooKeeperLogEntry(session_id, request));
+    if (!request_for_session.request->isReadRequest())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot process non-read request locally");

-        std::lock_guard lock(append_entries_mutex);
-
-        auto result = raft_instance->append_entries(entries);
-        if (!result->get_accepted())
-        {
-            KeeperStorage::ResponsesForSessions responses;
-            auto response = request->makeResponse();
-            response->xid = request->xid;
-            response->zxid = 0;
-            response->error = Coordination::Error::ZOPERATIONTIMEOUT;
-            responses_queue.push(DB::KeeperStorage::ResponseForSession{session_id, response});
-        }
-
-        if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
-        {
-            KeeperStorage::ResponsesForSessions responses;
-            auto response = request->makeResponse();
-            response->xid = request->xid;
-            response->zxid = 0;
-            response->error = Coordination::Error::ZOPERATIONTIMEOUT;
-            responses_queue.push(DB::KeeperStorage::ResponseForSession{session_id, response});
-        }
-        else if (result->get_result_code() != nuraft::cmd_result_code::OK)
-            throw Exception(ErrorCodes::RAFT_ERROR, "Requests result failed with code {} and message: '{}'", result->get_result_code(), result->get_result_str());
-    }
+    state_machine->processReadRequest(request_for_session);
 }

-int64_t KeeperServer::getSessionID(int64_t session_timeout_ms)
+RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
 {
-    /// Just some sanity check. We don't want to make a lot of clients wait with lock.
-    if (active_session_id_requests > 10)
-        throw Exception(ErrorCodes::RAFT_ERROR, "Too many concurrent SessionID requests already in flight");

-    ++active_session_id_requests;
-    SCOPE_EXIT({ --active_session_id_requests; });
+    std::vector<nuraft::ptr<nuraft::buffer>> entries;
+    for (const auto & [session_id, request] : requests_for_sessions)
+        entries.push_back(getZooKeeperLogEntry(session_id, request));

-    auto entry = nuraft::buffer::alloc(sizeof(int64_t));
-    /// Just special session request
-    nuraft::buffer_serializer bs(entry);
-    bs.put_i64(session_timeout_ms);
-
-    std::lock_guard lock(append_entries_mutex);
-
-    auto result = raft_instance->append_entries({entry});
-
-    if (!result->get_accepted())
-        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send session_id request to RAFT");
-
-    if (result->get_result_code() != nuraft::cmd_result_code::OK)
-        throw Exception(ErrorCodes::RAFT_ERROR, "session_id request failed to RAFT");
-
-    auto resp = result->get();
-    if (resp == nullptr)
-        throw Exception(ErrorCodes::RAFT_ERROR, "Received nullptr as session_id");
-
-    nuraft::buffer_serializer bs_resp(resp);
-    return bs_resp.get_i64();
+    {
+        std::lock_guard lock(append_entries_mutex);
+        return raft_instance->append_entries(entries);
+    }
 }

 bool KeeperServer::isLeader() const
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@ -12,10 +12,12 @@
 namespace DB
 {

+using RaftAppendResult = nuraft::ptr<nuraft::cmd_result<nuraft::ptr<nuraft::buffer>>>;
+
 class KeeperServer
 {
 private:
-    int server_id;
+    const int server_id;

    CoordinationSettingsPtr coordination_settings;

@ -29,13 +31,10 @@ private:

    std::mutex append_entries_mutex;

-    ResponsesQueue & responses_queue;
-
    std::mutex initialized_mutex;
    std::atomic<bool> initialized_flag = false;
    std::condition_variable initialized_cv;
    std::atomic<bool> initial_batch_committed = false;
-    std::atomic<size_t> active_session_id_requests = 0;

    Poco::Logger * log;

@ -60,9 +59,9 @@ public:

    void startup();

-    void putRequest(const KeeperStorage::RequestForSession & request);
+    void putLocalReadRequest(const KeeperStorage::RequestForSession & request);

-    int64_t getSessionID(int64_t session_timeout_ms);
+    RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests);

    std::unordered_set<int64_t> getDeadSessions();

@ -73,6 +72,8 @@ public:
    void waitInit();

    void shutdown();
+
+    int getServerID() const { return server_id; }
 };

 }
--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@ -90,25 +90,29 @@ void KeeperStateMachine::init()

 nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data)
 {
-    if (data.size() == sizeof(int64_t))
+    auto request_for_session = parseRequest(data);
+    if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
    {
-        nuraft::buffer_serializer timeout_data(data);
-        int64_t session_timeout_ms = timeout_data.get_i64();
-        auto response = nuraft::buffer::alloc(sizeof(int64_t));
+        const Coordination::ZooKeeperSessionIDRequest & session_id_request = dynamic_cast<const Coordination::ZooKeeperSessionIDRequest &>(*request_for_session.request);
        int64_t session_id;
-        nuraft::buffer_serializer bs(response);
        {
            std::lock_guard lock(storage_lock);
-            session_id = storage->getSessionID(session_timeout_ms);
-            bs.put_i64(session_id);
+            session_id = storage->getSessionID(session_id_request.session_timeout_ms);
        }
-        LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_timeout_ms);
-        last_committed_idx = log_idx;
-        return response;
+        LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms);
+
+        std::shared_ptr<Coordination::ZooKeeperSessionIDResponse> response = std::make_shared<Coordination::ZooKeeperSessionIDResponse>();
+        response->internal_id = session_id_request.internal_id;
+        response->session_id = session_id;
+        response->server_id = session_id_request.server_id;
+
+        KeeperStorage::ResponseForSession response_for_session;
+        response_for_session.session_id = -1;
+        response_for_session.response = response;
+        responses_queue.push(response_for_session);
    }
    else
    {
-        auto request_for_session = parseRequest(data);
        KeeperStorage::ResponsesForSessions responses_for_sessions;
        {
            std::lock_guard lock(storage_lock);
@ -116,10 +120,10 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
            for (auto & response_for_session : responses_for_sessions)
                responses_queue.push(response_for_session);
        }
-
-        last_committed_idx = log_idx;
-        return nullptr;
    }
+
+    last_committed_idx = log_idx;
+    return nullptr;
 }

 bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@ -405,8 +405,6 @@ struct KeeperStorageListRequest final : public KeeperStorageRequest

            response.names.insert(response.names.end(), it->value.children.begin(), it->value.children.end());

-            std::sort(response.names.begin(), response.names.end());
-
            response.stat = it->value.stat;
            response.error = Coordination::Error::ZOK;
        }
--- a/src/Coordination/KeeperStorageDispatcher.cpp
+++ b/src/Coordination/KeeperStorageDispatcher.cpp
@ -1,5 +1,9 @@
 #include <Coordination/KeeperStorageDispatcher.h>
 #include <Common/setThreadName.h>
+#include <Common/Stopwatch.h>
+#include <Common/ZooKeeper/KeeperException.h>
+#include <future>
+#include <chrono>

 namespace DB
 {
@ -17,29 +21,116 @@ KeeperStorageDispatcher::KeeperStorageDispatcher()
 {
 }

+
 void KeeperStorageDispatcher::requestThread()
 {
    setThreadName("KeeperReqT");
+
+    /// Result of requests batch from previous iteration
+    RaftAppendResult prev_result = nullptr;
+    /// Requests from previous iteration. We store them to be able
+    /// to send errors to the client.
+    KeeperStorage::RequestsForSessions prev_batch;
+
    while (!shutdown_called)
    {
        KeeperStorage::RequestForSession request;

        UInt64 max_wait = UInt64(coordination_settings->operation_timeout_ms.totalMilliseconds());
+        uint64_t max_batch_size = coordination_settings->max_requests_batch_size;

-        if (requests_queue.tryPop(request, max_wait))
+        /// The code below do a very simple thing: batch all write (quorum) requests into vector until
+        /// previous write batch is not finished or max_batch size achieved. The main complexity goes from
+        /// the ability to process read requests without quorum (from local state). So when we are collecting
+        /// requests into a batch we must check that the new request is not read request. Otherwise we have to
+        /// process all already accumulated write requests, wait them synchronously and only after that process
+        /// read request. So reads are some kind of "separator" for writes.
+        try
        {
-            if (shutdown_called)
-                break;
+            if (requests_queue->tryPop(request, max_wait))
+            {
+                if (shutdown_called)
+                    break;

-            try
-            {
-                server->putRequest(request);
-            }
-            catch (...)
-            {
-                tryLogCurrentException(__PRETTY_FUNCTION__);
+                KeeperStorage::RequestsForSessions current_batch;
+
+                bool has_read_request = false;
+
+                /// If new request is not read request or we must to process it through quorum.
+                /// Otherwise we will process it locally.
+                if (coordination_settings->quorum_reads || !request.request->isReadRequest())
+                {
+                    current_batch.emplace_back(request);
+
+                    /// Waiting until previous append will be successful, or batch is big enough
+                    /// has_result == false && get_result_code == OK means that our request still not processed.
+                    /// Sometimes NuRaft set errorcode without setting result, so we check both here.
+                    while (prev_result && (!prev_result->has_result() && prev_result->get_result_code() == nuraft::cmd_result_code::OK) && current_batch.size() <= max_batch_size)
+                    {
+                        /// Trying to get batch requests as fast as possible
+                        if (requests_queue->tryPop(request, 1))
+                        {
+                            /// Don't append read request into batch, we have to process them separately
+                            if (!coordination_settings->quorum_reads && request.request->isReadRequest())
+                            {
+                                has_read_request = true;
+                                break;
+                            }
+                            else
+                            {
+
+                                current_batch.emplace_back(request);
+                            }
+                        }
+
+                        if (shutdown_called)
+                            break;
+                    }
+                }
+                else
+                    has_read_request = true;
+
+                if (shutdown_called)
+                    break;
+
+                /// Forcefully process all previous pending requests
+                if (prev_result)
+                    forceWaitAndProcessResult(prev_result, prev_batch);
+
+                /// Process collected write requests batch
+                if (!current_batch.empty())
+                {
+                    auto result = server->putRequestBatch(current_batch);
+
+                    if (result)
+                    {
+                        if (has_read_request) /// If we will execute read request next, than we have to process result now
+                            forceWaitAndProcessResult(result, current_batch);
+                    }
+                    else
+                    {
+                        addErrorResponses(current_batch, Coordination::Error::ZRUNTIMEINCONSISTENCY);
+                        current_batch.clear();
+                    }
+
+                    prev_batch = current_batch;
+                    prev_result = result;
+                }
+
+                /// Read request always goes after write batch (last request)
+                if (has_read_request)
+                {
+                    if (server->isLeaderAlive())
+                        server->putLocalReadRequest(request);
+                    else
+                        addErrorResponses({request}, Coordination::Error::ZRUNTIMEINCONSISTENCY);
+                }
            }
        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
    }
 }

@ -94,14 +185,32 @@ void KeeperStorageDispatcher::snapshotThread()
 void KeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
 {
    std::lock_guard lock(session_to_response_callback_mutex);
-    auto session_writer = session_to_response_callback.find(session_id);
-    if (session_writer == session_to_response_callback.end())
-        return;
+    if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::SessionID)
+    {
+        const Coordination::ZooKeeperSessionIDResponse & session_id_resp = dynamic_cast<const Coordination::ZooKeeperSessionIDResponse &>(*response);

-    session_writer->second(response);
-    /// Session closed, no more writes
-    if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close)
-        session_to_response_callback.erase(session_writer);
+        /// Nobody waits for this session id
+        if (session_id_resp.server_id != server->getServerID() || !new_session_id_response_callback.count(session_id_resp.internal_id))
+            return;
+
+        auto callback = new_session_id_response_callback[session_id_resp.internal_id];
+        callback(response);
+        new_session_id_response_callback.erase(session_id_resp.internal_id);
+    }
+    else
+    {
+        auto session_writer = session_to_response_callback.find(session_id);
+        if (session_writer == session_to_response_callback.end())
+            return;
+
+        session_writer->second(response);
+
+        /// Session closed, no more writes
+        if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close)
+        {
+            session_to_response_callback.erase(session_writer);
+        }
+    }
 }

 bool KeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
@ -119,8 +228,8 @@ bool KeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr
    std::lock_guard lock(push_request_mutex);
    /// Put close requests without timeouts
    if (request->getOpNum() == Coordination::OpNum::Close)
-        requests_queue.push(std::move(request_info));
-    else if (!requests_queue.tryPush(std::move(request_info), coordination_settings->operation_timeout_ms.totalMilliseconds()))
+        requests_queue->push(std::move(request_info));
+    else if (!requests_queue->tryPush(std::move(request_info), coordination_settings->operation_timeout_ms.totalMilliseconds()))
        throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
    return true;
 }
@ -131,6 +240,7 @@ void KeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration
    int myid = config.getInt("keeper_server.server_id");

    coordination_settings->loadFromConfig("keeper_server.coordination_settings", config);
+    requests_queue = std::make_unique<RequestsQueue>(coordination_settings->max_requests_batch_size);

    request_thread = ThreadFromGlobalPool([this] { requestThread(); });
    responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
@ -175,7 +285,7 @@ void KeeperStorageDispatcher::shutdown()
                session_cleaner_thread.join();

            /// FIXME not the best way to notify
-            requests_queue.push({});
+            requests_queue->push({});
            if (request_thread.joinable())
                request_thread.join();

@ -192,7 +302,7 @@ void KeeperStorageDispatcher::shutdown()
            server->shutdown();

        KeeperStorage::RequestForSession request_for_session;
-        while (requests_queue.tryPop(request_for_session))
+        while (requests_queue->tryPop(request_for_session))
        {
            if (request_for_session.request)
            {
@ -249,7 +359,7 @@ void KeeperStorageDispatcher::sessionCleanerTask()
                    request_info.session_id = dead_session;
                    {
                        std::lock_guard lock(push_request_mutex);
-                        requests_queue.push(std::move(request_info));
+                        requests_queue->push(std::move(request_info));
                    }
                    finishSession(dead_session);
                    LOG_INFO(log, "Dead session close request pushed");
@ -273,4 +383,79 @@ void KeeperStorageDispatcher::finishSession(int64_t session_id)
        session_to_response_callback.erase(session_it);
 }

+void KeeperStorageDispatcher::addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error)
+{
+    for (const auto & [session_id, request] : requests_for_sessions)
+    {
+        KeeperStorage::ResponsesForSessions responses;
+        auto response = request->makeResponse();
+        response->xid = request->xid;
+        response->zxid = 0;
+        response->error = error;
+        responses_queue.push(DB::KeeperStorage::ResponseForSession{session_id, response});
+    }
+}
+
+void KeeperStorageDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions)
+{
+    if (!result->has_result())
+        result->get();
+
+    /// If we get some errors, than send them to clients
+    if (!result->get_accepted() || result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
+        addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
+    else if (result->get_result_code() != nuraft::cmd_result_code::OK)
+        addErrorResponses(requests_for_sessions, Coordination::Error::ZRUNTIMEINCONSISTENCY);
+
+    result = nullptr;
+    requests_for_sessions.clear();
+}
+
+int64_t KeeperStorageDispatcher::getSessionID(int64_t session_timeout_ms)
+{
+    KeeperStorage::RequestForSession request_info;
+    std::shared_ptr<Coordination::ZooKeeperSessionIDRequest> request = std::make_shared<Coordination::ZooKeeperSessionIDRequest>();
+    request->internal_id = internal_session_id_counter.fetch_add(1);
+    request->session_timeout_ms = session_timeout_ms;
+    request->server_id = server->getServerID();
+
+    request_info.request = request;
+    request_info.session_id = -1;
+
+    auto promise = std::make_shared<std::promise<int64_t>>();
+    auto future = promise->get_future();
+    {
+        std::lock_guard lock(session_to_response_callback_mutex);
+        new_session_id_response_callback[request->internal_id] = [promise, internal_id = request->internal_id] (const Coordination::ZooKeeperResponsePtr & response)
+        {
+            if (response->getOpNum() != Coordination::OpNum::SessionID)
+                promise->set_exception(std::make_exception_ptr(Exception(ErrorCodes::LOGICAL_ERROR,
+                            "Incorrect response of type {} instead of SessionID response", Coordination::toString(response->getOpNum()))));
+
+            auto session_id_response = dynamic_cast<const Coordination::ZooKeeperSessionIDResponse &>(*response);
+            if (session_id_response.internal_id != internal_id)
+            {
+                promise->set_exception(std::make_exception_ptr(Exception(ErrorCodes::LOGICAL_ERROR,
+                            "Incorrect response with internal id {} instead of {}", session_id_response.internal_id, internal_id)));
+            }
+
+            if (response->error != Coordination::Error::ZOK)
+                promise->set_exception(std::make_exception_ptr(zkutil::KeeperException("SessionID request failed with error", response->error)));
+
+            promise->set_value(session_id_response.session_id);
+        };
+    }
+
+    {
+        std::lock_guard lock(push_request_mutex);
+        if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms))
+            throw Exception("Cannot push session id request to queue within session timeout", ErrorCodes::TIMEOUT_EXCEEDED);
+    }
+
+    if (future.wait_for(std::chrono::milliseconds(session_timeout_ms)) != std::future_status::ready)
+        throw Exception("Cannot receive session id within session timeout", ErrorCodes::TIMEOUT_EXCEEDED);
+
+    return future.get();
+}
+
 }
--- a/src/Coordination/KeeperStorageDispatcher.h
+++ b/src/Coordination/KeeperStorageDispatcher.h
@ -32,24 +32,42 @@ private:
    using RequestsQueue = ConcurrentBoundedQueue<KeeperStorage::RequestForSession>;
    using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;

-    RequestsQueue requests_queue{1};
+    /// Size depends on coordination settings
+    std::unique_ptr<RequestsQueue> requests_queue;
    ResponsesQueue responses_queue;
    SnapshotsQueue snapshots_queue{1};

    std::atomic<bool> shutdown_called{false};

    std::mutex session_to_response_callback_mutex;
+    /// These two maps looks similar, but serves different purposes.
+    /// The first map is subscription map for normal responses like
+    /// (get, set, list, etc.). Dispatcher determines callback for each response
+    /// using session id from this map.
    SessionToResponseCallback session_to_response_callback;
+    /// But when client connects to the server for the first time it doesn't
+    /// have session_id. It request it from server. We give temporary
+    /// internal id for such requests just to much client with its response.
+    SessionToResponseCallback new_session_id_response_callback;

+    /// Reading and batching new requests from client handlers
    ThreadFromGlobalPool request_thread;
+    /// Pushing responses to clients client handlers
+    /// using session_id.
    ThreadFromGlobalPool responses_thread;
+    /// Cleaning old dead sessions
    ThreadFromGlobalPool session_cleaner_thread;
+    /// Dumping new snapshots to disk
    ThreadFromGlobalPool snapshot_thread;

+    /// RAFT wrapper. Most important class.
    std::unique_ptr<KeeperServer> server;

    Poco::Logger * log;

+    /// Counter for new session_id requests.
+    std::atomic<int64_t> internal_session_id_counter{0};
+
 private:
    void requestThread();
    void responseThread();
@ -57,6 +75,14 @@ private:
    void snapshotThread();
    void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);

+    /// Add error responses for requests to responses queue.
+    /// Clears requests.
+    void addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error);
+
+    /// Forcefully wait for result and sets errors if something when wrong.
+    /// Clears both arguments
+    void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions);
+
 public:
    KeeperStorageDispatcher();

@ -78,10 +104,7 @@ public:
        return server->isLeaderAlive();
    }

-    int64_t getSessionID(long session_timeout_ms)
-    {
-        return server->getSessionID(session_timeout_ms);
-    }
+    int64_t getSessionID(int64_t session_timeout_ms);

    void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
    /// Call if we don't need any responses for this session no more (session was expired)
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@ -211,6 +211,8 @@ TEST(CoordinationTest, ChangelogTestSimple)
    changelog.init(1, 0);
    auto entry = getLogEntry("hello world", 77);
    changelog.append(entry);
+    changelog.end_of_append_batch(0, 0);
+
    EXPECT_EQ(changelog.next_slot(), 2);
    EXPECT_EQ(changelog.start_index(), 1);
    EXPECT_EQ(changelog.last_entry()->get_term(), 77);
@ -225,6 +227,7 @@ TEST(CoordinationTest, ChangelogTestFile)
    changelog.init(1, 0);
    auto entry = getLogEntry("hello world", 77);
    changelog.append(entry);
+    changelog.end_of_append_batch(0, 0);
    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
    for (const auto & p : fs::directory_iterator("./logs"))
        EXPECT_EQ(p.path(), "./logs/changelog_1_5.bin");
@ -234,6 +237,7 @@ TEST(CoordinationTest, ChangelogTestFile)
    changelog.append(entry);
    changelog.append(entry);
    changelog.append(entry);
+    changelog.end_of_append_batch(0, 0);

    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
@ -249,6 +253,8 @@ TEST(CoordinationTest, ChangelogReadWrite)
        auto entry = getLogEntry("hello world", i * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);
+
    EXPECT_EQ(changelog.size(), 10);
    DB::KeeperLogStore changelog_reader("./logs", 1000, true);
    changelog_reader.init(1, 0);
@ -276,10 +282,14 @@ TEST(CoordinationTest, ChangelogWriteAt)
        auto entry = getLogEntry("hello world", i * 10);
        changelog.append(entry);
    }
+
+    changelog.end_of_append_batch(0, 0);
    EXPECT_EQ(changelog.size(), 10);

    auto entry = getLogEntry("writer", 77);
    changelog.write_at(7, entry);
+    changelog.end_of_append_batch(0, 0);
+
    EXPECT_EQ(changelog.size(), 7);
    EXPECT_EQ(changelog.last_entry()->get_term(), 77);
    EXPECT_EQ(changelog.entry_at(7)->get_term(), 77);
@ -305,6 +315,7 @@ TEST(CoordinationTest, ChangelogTestAppendAfterRead)
        auto entry = getLogEntry("hello world", i * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);

    EXPECT_EQ(changelog.size(), 7);
    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
@ -319,6 +330,7 @@ TEST(CoordinationTest, ChangelogTestAppendAfterRead)
        auto entry = getLogEntry("hello world", i * 10);
        changelog_reader.append(entry);
    }
+    changelog_reader.end_of_append_batch(0, 0);
    EXPECT_EQ(changelog_reader.size(), 10);
    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
@ -331,6 +343,7 @@ TEST(CoordinationTest, ChangelogTestAppendAfterRead)

    auto entry = getLogEntry("someentry", 77);
    changelog_reader.append(entry);
+    changelog_reader.end_of_append_batch(0, 0);
    EXPECT_EQ(changelog_reader.size(), 11);
    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
@ -354,6 +367,7 @@ TEST(CoordinationTest, ChangelogTestCompaction)
        auto entry = getLogEntry("hello world", i * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);

    EXPECT_EQ(changelog.size(), 3);

@ -373,6 +387,7 @@ TEST(CoordinationTest, ChangelogTestCompaction)
    changelog.append(e3);
    auto e4 = getLogEntry("hello world", 60);
    changelog.append(e4);
+    changelog.end_of_append_batch(0, 0);

    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
@ -405,6 +420,7 @@ TEST(CoordinationTest, ChangelogTestBatchOperations)
        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);

    EXPECT_EQ(changelog.size(), 10);

@ -420,6 +436,7 @@ TEST(CoordinationTest, ChangelogTestBatchOperations)
    EXPECT_EQ(apply_changelog.size(), 10);

    apply_changelog.apply_pack(8, *entries);
+    apply_changelog.end_of_append_batch(0, 0);

    EXPECT_EQ(apply_changelog.size(), 12);
    EXPECT_EQ(apply_changelog.start_index(), 1);
@ -447,6 +464,7 @@ TEST(CoordinationTest, ChangelogTestBatchOperationsEmpty)
        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);

    EXPECT_EQ(changelog.size(), 10);

@ -458,6 +476,7 @@ TEST(CoordinationTest, ChangelogTestBatchOperationsEmpty)
    EXPECT_EQ(changelog_new.size(), 0);

    changelog_new.apply_pack(5, *entries);
+    changelog_new.end_of_append_batch(0, 0);

    EXPECT_EQ(changelog_new.size(), 5);
    EXPECT_EQ(changelog_new.start_index(), 5);
@ -468,6 +487,8 @@ TEST(CoordinationTest, ChangelogTestBatchOperationsEmpty)

    auto e = getLogEntry("hello_world", 110);
    changelog_new.append(e);
+    changelog_new.end_of_append_batch(0, 0);
+
    EXPECT_EQ(changelog_new.size(), 6);
    EXPECT_EQ(changelog_new.start_index(), 5);
    EXPECT_EQ(changelog_new.next_slot(), 11);
@ -488,6 +509,7 @@ TEST(CoordinationTest, ChangelogTestWriteAtPreviousFile)
        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);

    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
@ -501,6 +523,7 @@ TEST(CoordinationTest, ChangelogTestWriteAtPreviousFile)

    auto e1 = getLogEntry("helloworld", 5555);
    changelog.write_at(7, e1);
+    changelog.end_of_append_batch(0, 0);
    EXPECT_EQ(changelog.size(), 7);
    EXPECT_EQ(changelog.start_index(), 1);
    EXPECT_EQ(changelog.next_slot(), 8);
@ -534,6 +557,7 @@ TEST(CoordinationTest, ChangelogTestWriteAtFileBorder)
        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);

    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
@ -547,6 +571,7 @@ TEST(CoordinationTest, ChangelogTestWriteAtFileBorder)

    auto e1 = getLogEntry("helloworld", 5555);
    changelog.write_at(11, e1);
+    changelog.end_of_append_batch(0, 0);
    EXPECT_EQ(changelog.size(), 11);
    EXPECT_EQ(changelog.start_index(), 1);
    EXPECT_EQ(changelog.next_slot(), 12);
@ -580,6 +605,7 @@ TEST(CoordinationTest, ChangelogTestWriteAtAllFiles)
        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);

    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
@ -593,6 +619,7 @@ TEST(CoordinationTest, ChangelogTestWriteAtAllFiles)

    auto e1 = getLogEntry("helloworld", 5555);
    changelog.write_at(1, e1);
+    changelog.end_of_append_batch(0, 0);
    EXPECT_EQ(changelog.size(), 1);
    EXPECT_EQ(changelog.start_index(), 1);
    EXPECT_EQ(changelog.next_slot(), 2);
@ -619,6 +646,7 @@ TEST(CoordinationTest, ChangelogTestStartNewLogAfterRead)
        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);
    EXPECT_EQ(changelog.size(), 35);
    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
@ -635,6 +663,7 @@ TEST(CoordinationTest, ChangelogTestStartNewLogAfterRead)

    auto entry = getLogEntry("36_hello_world", 360);
    changelog_reader.append(entry);
+    changelog_reader.end_of_append_batch(0, 0);

    EXPECT_EQ(changelog_reader.size(), 36);
    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
@ -660,6 +689,7 @@ TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);
    EXPECT_EQ(changelog.size(), 35);
    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
@ -674,6 +704,7 @@ TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)

    DB::KeeperLogStore changelog_reader("./logs", 5, true);
    changelog_reader.init(1, 0);
+    changelog_reader.end_of_append_batch(0, 0);

    EXPECT_EQ(changelog_reader.size(), 10);
    EXPECT_EQ(changelog_reader.last_entry()->get_term(), 90);
@ -689,6 +720,7 @@ TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)

    auto entry = getLogEntry("h", 7777);
    changelog_reader.append(entry);
+    changelog_reader.end_of_append_batch(0, 0);
    EXPECT_EQ(changelog_reader.size(), 11);
    EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777);

@ -719,6 +751,7 @@ TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
        auto entry = getLogEntry(std::to_string(i) + "_hello_world", (i + 44) * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);

    EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin"));
@ -735,6 +768,7 @@ TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
    EXPECT_FALSE(fs::exists("./logs/changelog_21_40.bin"));
    auto entry = getLogEntry("hello_world", 7777);
    changelog_reader.append(entry);
+    changelog_reader.end_of_append_batch(0, 0);
    EXPECT_EQ(changelog_reader.size(), 3);
    EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777);

@ -757,6 +791,7 @@ TEST(CoordinationTest, ChangelogTestLostFiles)
        auto entry = getLogEntry(std::to_string(i) + "_hello_world", (i + 44) * 10);
        changelog.append(entry);
    }
+    changelog.end_of_append_batch(0, 0);

    EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin"));
@ -1105,6 +1140,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
        request->path = "/hello_" + std::to_string(i);
        auto entry = getLogEntryFromZKRequest(0, 1, request);
        changelog.append(entry);
+        changelog.end_of_append_batch(0, 0);

        state_machine->commit(i, changelog.entry_at(i)->get_buf());
        bool snapshot_created = false;
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@ -484,7 +484,7 @@ DataTypes Block::getDataTypes() const


 template <typename ReturnType>
-static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description)
+static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description, bool allow_remove_constants)
 {
    auto on_error = [](const std::string & message [[maybe_unused]], int code [[maybe_unused]])
    {
@ -515,7 +515,16 @@ static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, cons
        if (!actual.column || !expected.column)
            continue;

-        if (actual.column->getName() != expected.column->getName())
+        const IColumn * actual_column = actual.column.get();
+
+        /// If we allow to remove constants, and expected column is not const, then unwrap actual constant column.
+        if (allow_remove_constants && !isColumnConst(*expected.column))
+        {
+            if (const auto * column_const = typeid_cast<const ColumnConst *>(actual_column))
+                actual_column = &column_const->getDataColumn();
+        }
+
+        if (actual_column->getName() != expected.column->getName())
            return on_error("Block structure mismatch in " + context_description + " stream: different columns:\n"
                + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::LOGICAL_ERROR);

@ -537,13 +546,25 @@ static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, cons

 bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs)
 {
-    return checkBlockStructure<bool>(lhs, rhs, {});
+    return checkBlockStructure<bool>(lhs, rhs, {}, false);
 }


 void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const std::string & context_description)
 {
-    checkBlockStructure<void>(lhs, rhs, context_description);
+    checkBlockStructure<void>(lhs, rhs, context_description, false);
+}
+
+
+bool isCompatibleHeader(const Block & actual, const Block & desired)
+{
+    return checkBlockStructure<bool>(actual, desired, {}, true);
+}
+
+
+void assertCompatibleHeader(const Block & actual, const Block & desired, const std::string & context_description)
+{
+    checkBlockStructure<void>(actual, desired, context_description, true);
 }


--- a/src/Core/Block.h
+++ b/src/Core/Block.h
@ -184,6 +184,12 @@ bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs);
 /// Throw exception when blocks are different.
 void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const std::string & context_description);

+/// Actual header is compatible to desired if block have equal structure except constants.
+/// It is allowed when column from actual header is constant, but in desired is not.
+/// If both columns are constant, it is checked that they have the same value.
+bool isCompatibleHeader(const Block & actual, const Block & desired);
+void assertCompatibleHeader(const Block & actual, const Block & desired, const std::string & context_description);
+
 /// Calculate difference in structure of blocks and write description into output strings. NOTE It doesn't compare values of constant columns.
 void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out_lhs_diff, std::string & out_rhs_diff);

--- a/src/Core/DecimalFunctions.h
+++ b/src/Core/DecimalFunctions.h
@ -103,7 +103,10 @@ inline DecimalType decimalFromComponentsWithMultiplier(
    if (common::mulOverflow(whole, scale_multiplier, whole_scaled))
        throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);

-    const T value = whole_scaled + fractional_sign * (fractional % scale_multiplier);
+    T value;
+    if (common::addOverflow(whole_scaled, fractional_sign * (fractional % scale_multiplier), value))
+        throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
+
    return DecimalType(value);
 }

--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@ -96,7 +96,7 @@ template <typename T> bool decimalEqual(T x, T y, UInt32 x_scale, UInt32 y_scale
 template <typename T> bool decimalLess(T x, T y, UInt32 x_scale, UInt32 y_scale);
 template <typename T> bool decimalLessOrEqual(T x, T y, UInt32 x_scale, UInt32 y_scale);

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -159,7 +159,7 @@ private:
    T dec;
    UInt32 scale;
 };
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

@ -563,7 +563,7 @@ public:
        {
            case Types::Null:    return f(field.template get<Null>());
 // gcc 8.2.1
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -583,7 +583,7 @@ public:
            case Types::Int128: return f(field.template get<Int128>());
            case Types::UInt256: return f(field.template get<UInt256>());
            case Types::Int256: return f(field.template get<Int256>());
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
        }
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -70,6 +70,7 @@ class IColumn;
    M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
    M(UInt64, s3_min_upload_part_size, 512*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
    M(UInt64, s3_max_single_part_upload_size, 64*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
+    M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
    M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
    M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
    M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
@ -142,7 +143,7 @@ class IColumn;
    M(UInt64, optimize_min_equality_disjunction_chain_length, 3, "The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization ", 0) \
    \
    M(UInt64, min_bytes_to_use_direct_io, 0, "The minimum number of bytes for reading the data with O_DIRECT option during SELECT queries execution. 0 - disabled.", 0) \
-    M(UInt64, min_bytes_to_use_mmap_io, (64 * 1024 * 1024), "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
+    M(UInt64, min_bytes_to_use_mmap_io, 0, "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
    M(Bool, checksum_on_read, true, "Validate checksums on reading. It is enabled by default and should be always enabled in production. Please do not expect any benefits in disabling this setting. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network.", 0) \
    \
    M(Bool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
@ -224,6 +225,7 @@ class IColumn;
    /** Settings for testing hedged requests */ \
    M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
    M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \
+    M(UInt64, unknown_packet_in_send_data, 0, "Send unknown packet instead of data Nth data packet", 0) \
    \
    M(Bool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
    M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \
@ -424,6 +426,7 @@ class IColumn;
    M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
    M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \
    M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
+    M(Bool, optimize_fuse_sum_count_avg, false, "Fuse aggregate functions sum(), avg(), count() with identical arguments into one sumCount() call, if the query has at least two different functions", 0) \
    M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \
    M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \
    M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
@ -445,6 +448,8 @@ class IColumn;
    M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \
    M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \
    M(UInt64, distributed_ddl_entry_format_version, 1, "Version of DDL entry to write into ZooKeeper", 0) \
+    M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializeMySQL. If equal to 0, this setting is disabled", 0) \
+    M(UInt64, external_storage_max_read_bytes, 0, "Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializeMySQL. If equal to 0, this setting is disabled", 0)  \
    \
    /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
    \
--- a/src/Core/Types.h
+++ b/src/Core/Types.h
@ -15,7 +15,7 @@ namespace DB
 struct Null {};

 /// Ignore strange gcc warning https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55776
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
 #endif
@ -59,7 +59,7 @@ enum class TypeIndex
    LowCardinality,
    Map,
 };
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/DataTypes/DataTypeDateTime.h
+++ b/src/DataTypes/DataTypeDateTime.h
@ -19,9 +19,12 @@ public:
    TimezoneMixin(const TimezoneMixin &) = default;

    const DateLUTImpl & getTimeZone() const { return time_zone; }
+    bool hasExplicitTimeZone() const { return has_explicit_time_zone; }

 protected:
+    /// true if time zone name was provided in data type parameters, false if it's using default time zone.
    bool has_explicit_time_zone;
+
    const DateLUTImpl & time_zone;
    const DateLUTImpl & utc_time_zone;
 };
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@ -365,8 +365,8 @@ void DatabaseAtomic::assertDetachedTableNotInUse(const UUID & uuid)
    /// 4. INSERT INTO table ...; (both Storage instances writes data without any synchronization)
    /// To avoid it, we remember UUIDs of detached tables and does not allow ATTACH table with such UUID until detached instance still in use.
    if (detached_tables.count(uuid))
-        throw Exception("Cannot attach table with UUID " + toString(uuid) +
-              ", because it was detached but still used by some query. Retry later.", ErrorCodes::TABLE_ALREADY_EXISTS);
+        throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Cannot attach table with UUID {}, "
+                        "because it was detached but still used by some query. Retry later.", toString(uuid));
 }

 void DatabaseAtomic::setDetachedTableNotInUseForce(const UUID & uuid)
@ -573,12 +573,6 @@ void DatabaseAtomic::renameDictionaryInMemoryUnlocked(const StorageID & old_name
 }
 void DatabaseAtomic::waitDetachedTableNotInUse(const UUID & uuid)
 {
-    {
-        std::lock_guard lock{mutex};
-        if (detached_tables.count(uuid) == 0)
-            return;
-    }
-
    /// Table is in use while its shared_ptr counter is greater than 1.
    /// We cannot trigger condvar on shared_ptr destruction, so it's busy wait.
    while (true)
@ -594,5 +588,13 @@ void DatabaseAtomic::waitDetachedTableNotInUse(const UUID & uuid)
    }
 }

+void DatabaseAtomic::checkDetachedTableNotInUse(const UUID & uuid)
+{
+    DetachedTables not_in_use;
+    std::lock_guard lock{mutex};
+    not_in_use = cleanupDetachedTables();
+    assertDetachedTableNotInUse(uuid);
+}
+
 }

--- a/Show More
+++ b/Show More