Merge branch 'master' into fix_prewhere_in_materialize_mysql

2024-11-10 01:25:21 +00:00 · 2021-04-20 15:38:48 +03:00 · 2021-04-20 15:38:48 +03:00 · bef0ab182e
commit bef0ab182e
parent d9ef1a9368 a25e6d96a5
295 changed files with 7207 additions and 1391 deletions
--- a/base/common/DateLUTImpl.h
+++ b/base/common/DateLUTImpl.h
@ -25,7 +25,7 @@


 #if defined(__PPC__)
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
 #endif
@ -1266,7 +1266,7 @@ public:
 };

 #if defined(__PPC__)
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
 #endif
--- a/base/common/wide_integer_impl.h
+++ b/base/common/wide_integer_impl.h
@ -271,9 +271,13 @@ struct integer<Bits, Signed>::_impl
        /// As to_Integral does a static_cast to int64_t, it may result in UB.
        /// The necessary check here is that long double has enough significant (mantissa) bits to store the
        /// int64_t max value precisely.
+
+        //TODO Be compatible with Apple aarch64
+#if not (defined(__APPLE__) && defined(__aarch64__))
        static_assert(LDBL_MANT_DIG >= 64,
            "On your system long double has less than 64 precision bits,"
            "which may result in UB when initializing double from int64_t");
+#endif

        if ((rhs > 0 && rhs < static_cast<long double>(max_int)) || (rhs < 0 && rhs > static_cast<long double>(min_int)))
        {
--- a/cmake/arch.cmake
+++ b/cmake/arch.cmake
@ -1,7 +1,7 @@
 if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
    set (ARCH_AMD64 1)
 endif ()
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
    set (ARCH_AARCH64 1)
 endif ()
 if (ARCH_AARCH64 OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@ -1,9 +1,9 @@
 # This strings autochanged from release_lib.sh:
-SET(VERSION_REVISION 54450)
+SET(VERSION_REVISION 54451)
 SET(VERSION_MAJOR 21)
-SET(VERSION_MINOR 5)
+SET(VERSION_MINOR 6)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 3827789b3d8fd2021952e57e5110343d26daa1a1)
-SET(VERSION_DESCRIBE v21.5.1.1-prestable)
-SET(VERSION_STRING 21.5.1.1)
+SET(VERSION_GITHASH 96fced4c3cf432fb0b401d2ab01f0c56e5f74a96)
+SET(VERSION_DESCRIBE v21.6.1.1-prestable)
+SET(VERSION_STRING 21.6.1.1)
 # end of autochange
--- a/cmake/darwin/default_libs.cmake
+++ b/cmake/darwin/default_libs.cmake
@ -4,6 +4,9 @@ set (DEFAULT_LIBS "${DEFAULT_LIBS} ${COVERAGE_OPTION} -lc -lm -lpthread -ldl")

 if (COMPILER_GCC)
    set (DEFAULT_LIBS "${DEFAULT_LIBS} -lgcc_eh")
+    if (ARCH_AARCH64)
+        set (DEFAULT_LIBS "${DEFAULT_LIBS} -lgcc")
+    endif ()
 endif ()

 message(STATUS "Default libraries: ${DEFAULT_LIBS}")
--- a/cmake/darwin/toolchain-aarch64.cmake
+++ b/cmake/darwin/toolchain-aarch64.cmake
@ -0,0 +1,14 @@
+set (CMAKE_SYSTEM_NAME "Darwin")
+set (CMAKE_SYSTEM_PROCESSOR "aarch64")
+set (CMAKE_C_COMPILER_TARGET "aarch64-apple-darwin")
+set (CMAKE_CXX_COMPILER_TARGET "aarch64-apple-darwin")
+set (CMAKE_ASM_COMPILER_TARGET "aarch64-apple-darwin")
+set (CMAKE_OSX_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/darwin-aarch64")
+
+set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it doesn't work in CMake
+
+set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
+set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
+
+set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
+set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
--- a/cmake/find/ldap.cmake
+++ b/cmake/find/ldap.cmake
@ -64,7 +64,8 @@ if (NOT OPENLDAP_FOUND AND NOT MISSING_INTERNAL_LDAP_LIBRARY)
        ( "${_system_name}" STREQUAL "linux"   AND "${_system_processor}" STREQUAL "aarch64" ) OR
        ( "${_system_name}" STREQUAL "linux"   AND "${_system_processor}" STREQUAL "ppc64le" ) OR
        ( "${_system_name}" STREQUAL "freebsd" AND "${_system_processor}" STREQUAL "x86_64"  ) OR
-        ( "${_system_name}" STREQUAL "darwin"  AND "${_system_processor}" STREQUAL "x86_64"  )
+        ( "${_system_name}" STREQUAL "darwin"  AND "${_system_processor}" STREQUAL "x86_64"  ) OR
+        ( "${_system_name}" STREQUAL "darwin"  AND "${_system_processor}" STREQUAL "aarch64"   )
    )
        set (_ldap_supported_platform TRUE)
    endif ()
--- a/cmake/find/rocksdb.cmake
+++ b/cmake/find/rocksdb.cmake
@ -1,3 +1,7 @@
+if (OS_DARWIN AND ARCH_AARCH64)
+    set (ENABLE_ROCKSDB OFF CACHE INTERNAL "")
+endif()
+
 option(ENABLE_ROCKSDB "Enable ROCKSDB" ${ENABLE_LIBRARIES})

 if (NOT ENABLE_ROCKSDB)
--- a/cmake/warnings.cmake
+++ b/cmake/warnings.cmake
@ -171,6 +171,7 @@ elseif (COMPILER_GCC)
    add_cxx_compile_options(-Wtrampolines)
    # Obvious
    add_cxx_compile_options(-Wunused)
+    add_cxx_compile_options(-Wundef)
    # Warn if vector operation is not implemented via SIMD capabilities of the architecture
    add_cxx_compile_options(-Wvector-operation-performance)
    # XXX: libstdc++ has some of these for 3way compare
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit d2feb5978b979729a07c3ca76eaa4ab94cef4ceb
+Subproject commit 377f8e77491d9f66ce8e32e88aae19dffe8dc4d7
--- a/contrib/boost
+++ b/contrib/boost
@ -1 +1 @@
-Subproject commit ee24fa55bc46e4d2ce7d0d052cc5a0d9b1be8c36
+Subproject commit a8d43d3142cc6b26fc55bec33f7f6edb1156ab7a
--- a/contrib/jemalloc-cmake/CMakeLists.txt
+++ b/contrib/jemalloc-cmake/CMakeLists.txt
@ -1,10 +1,13 @@
-if (SANITIZE OR NOT (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE) OR NOT (OS_LINUX OR OS_FREEBSD OR OS_DARWIN))
+if (SANITIZE OR NOT (
+    ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE)) OR
+    (OS_DARWIN AND CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
+))
    if (ENABLE_JEMALLOC)
        message (${RECONFIGURE_MESSAGE_LEVEL}
-                 "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64 or ppc64le on linux or freebsd.")
-    endif()
+                 "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds.")
+    endif ()
    set (ENABLE_JEMALLOC OFF)
-else()
+else ()
    option (ENABLE_JEMALLOC "Enable jemalloc allocator" ${ENABLE_LIBRARIES})
 endif ()

@ -34,9 +37,9 @@ if (OS_LINUX)
    # avoid spurious latencies and additional work associated with
    # MADV_DONTNEED. See
    # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation.
-    set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:10000")
+    set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000")
 else()
-    set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:10000")
+    set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000")
 endif()
 # CACHE variable is empty, to allow changing defaults without necessity
 # to purge cache
--- a/contrib/jemalloc-cmake/include_darwin_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/contrib/jemalloc-cmake/include_darwin_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in
@ -42,7 +42,7 @@
 * total number of bits in a pointer, e.g. on x64, for which the uppermost 16
 * bits are the same as bit 47.
 */
-#define LG_VADDR 48
+#define LG_VADDR 64

 /* Defined if C11 atomics are available. */
 #define JEMALLOC_C11_ATOMICS 1
@ -101,11 +101,6 @@
 */
 #define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1

-/*
- * Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
- */
-#define JEMALLOC_HAVE_CLOCK_REALTIME 1
-
 /*
 * Defined if _malloc_thread_cleanup() exists.  At least in the case of
 * FreeBSD, pthread_key_create() allocates, which if used during malloc
@ -181,14 +176,14 @@
 /* #undef LG_QUANTUM */

 /* One page is 2^LG_PAGE bytes. */
-#define LG_PAGE 16
+#define LG_PAGE 14

 /*
 * One huge page is 2^LG_HUGEPAGE bytes.  Note that this is defined even if the
 * system does not explicitly support huge pages; system calls that require
 * explicit huge page support are separately configured.
 */
-#define LG_HUGEPAGE 29
+#define LG_HUGEPAGE 21

 /*
 * If defined, adjacent virtual memory mappings with identical attributes
@ -356,7 +351,7 @@
 /* #undef JEMALLOC_EXPORT */

 /* config.malloc_conf options string. */
-#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
+#define JEMALLOC_CONFIG_MALLOC_CONF ""

 /* If defined, jemalloc takes the malloc/free/etc. symbol names. */
 /* #undef JEMALLOC_IS_MALLOC */
--- a/contrib/librdkafka-cmake/config.h.in
+++ b/contrib/librdkafka-cmake/config.h.in
@ -66,7 +66,7 @@
 #cmakedefine WITH_SASL_OAUTHBEARER 1
 #cmakedefine WITH_SASL_CYRUS 1
 // crc32chw
-#if !defined(__PPC__) && (!defined(__aarch64__) || defined(__ARM_FEATURE_CRC32))
+#if !defined(__PPC__) && (!defined(__aarch64__) || defined(__ARM_FEATURE_CRC32)) && !(defined(__aarch64__) && defined(__APPLE__))
 #define WITH_CRC32C_HW 1
 #endif
 // regex
--- a/contrib/openldap-cmake/darwin_aarch64/include/lber_types.h
+++ b/contrib/openldap-cmake/darwin_aarch64/include/lber_types.h
@ -0,0 +1,63 @@
+/* include/lber_types.h. Generated from lber_types.hin by configure. */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 1998-2020 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+/*
+ * LBER types
+ */
+
+#ifndef _LBER_TYPES_H
+#define _LBER_TYPES_H
+
+#include <ldap_cdefs.h>
+
+LDAP_BEGIN_DECL
+
+/* LBER boolean, enum, integers (32 bits or larger) */
+#define LBER_INT_T int
+
+/* LBER tags (32 bits or larger) */
+#define LBER_TAG_T long
+
+/* LBER socket descriptor */
+#define LBER_SOCKET_T int
+
+/* LBER lengths (32 bits or larger) */
+#define LBER_LEN_T long
+
+/* ------------------------------------------------------------ */
+
+/* booleans, enumerations, and integers */
+typedef LBER_INT_T ber_int_t;
+
+/* signed and unsigned versions */
+typedef signed LBER_INT_T ber_sint_t;
+typedef unsigned LBER_INT_T ber_uint_t;
+
+/* tags */
+typedef unsigned LBER_TAG_T ber_tag_t;
+
+/* "socket" descriptors */
+typedef LBER_SOCKET_T ber_socket_t;
+
+/* lengths */
+typedef unsigned LBER_LEN_T ber_len_t;
+
+/* signed lengths */
+typedef signed LBER_LEN_T ber_slen_t;
+
+LDAP_END_DECL
+
+#endif /* _LBER_TYPES_H */
--- a/contrib/openldap-cmake/darwin_aarch64/include/ldap_config.h
+++ b/contrib/openldap-cmake/darwin_aarch64/include/ldap_config.h
@ -0,0 +1,74 @@
+/* include/ldap_config.h. Generated from ldap_config.hin by configure. */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 1998-2020 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+/*
+ * This file works in conjunction with OpenLDAP configure system.
+ * If you do no like the values below, adjust your configure options.
+ */
+
+#ifndef _LDAP_CONFIG_H
+#define _LDAP_CONFIG_H
+
+/* directory separator */
+#ifndef LDAP_DIRSEP
+#ifndef _WIN32
+#define LDAP_DIRSEP "/"
+#else
+#define LDAP_DIRSEP "\\"
+#endif
+#endif
+
+/* directory for temporary files */
+#if defined(_WIN32)
+# define LDAP_TMPDIR "C:\\." /* we don't have much of a choice */
+#elif defined( _P_tmpdir )
+# define LDAP_TMPDIR _P_tmpdir
+#elif defined( P_tmpdir )
+# define LDAP_TMPDIR P_tmpdir
+#elif defined( _PATH_TMPDIR )
+# define LDAP_TMPDIR _PATH_TMPDIR
+#else
+# define LDAP_TMPDIR LDAP_DIRSEP "tmp"
+#endif
+
+/* directories */
+#ifndef LDAP_BINDIR
+#define LDAP_BINDIR        "/tmp/ldap-prefix/bin"
+#endif
+#ifndef LDAP_SBINDIR
+#define LDAP_SBINDIR       "/tmp/ldap-prefix/sbin"
+#endif
+#ifndef LDAP_DATADIR
+#define LDAP_DATADIR       "/tmp/ldap-prefix/share/openldap"
+#endif
+#ifndef LDAP_SYSCONFDIR
+#define LDAP_SYSCONFDIR    "/tmp/ldap-prefix/etc/openldap"
+#endif
+#ifndef LDAP_LIBEXECDIR
+#define LDAP_LIBEXECDIR    "/tmp/ldap-prefix/libexec"
+#endif
+#ifndef LDAP_MODULEDIR
+#define LDAP_MODULEDIR     "/tmp/ldap-prefix/libexec/openldap"
+#endif
+#ifndef LDAP_RUNDIR
+#define LDAP_RUNDIR        "/tmp/ldap-prefix/var"
+#endif
+#ifndef LDAP_LOCALEDIR
+#define LDAP_LOCALEDIR     ""
+#endif
+
+
+#endif /* _LDAP_CONFIG_H */
--- a/contrib/openldap-cmake/darwin_aarch64/include/ldap_features.h
+++ b/contrib/openldap-cmake/darwin_aarch64/include/ldap_features.h
@ -0,0 +1,61 @@
+/* include/ldap_features.h. Generated from ldap_features.hin by configure. */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 1998-2020 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+/*
+ * LDAP Features
+ */
+
+#ifndef _LDAP_FEATURES_H
+#define _LDAP_FEATURES_H 1
+
+/* OpenLDAP API version macros */
+#define LDAP_VENDOR_VERSION 20501
+#define LDAP_VENDOR_VERSION_MAJOR 2
+#define LDAP_VENDOR_VERSION_MINOR 5
+#define LDAP_VENDOR_VERSION_PATCH X
+
+/*
+** WORK IN PROGRESS!
+**
+** OpenLDAP reentrancy/thread-safeness should be dynamically
+** checked using ldap_get_option().
+**
+** The -lldap implementation is not thread-safe.
+**
+** The -lldap_r implementation is:
+**     LDAP_API_FEATURE_THREAD_SAFE (basic thread safety)
+** but also be:
+**     LDAP_API_FEATURE_SESSION_THREAD_SAFE
+**     LDAP_API_FEATURE_OPERATION_THREAD_SAFE
+**
+** The preprocessor flag LDAP_API_FEATURE_X_OPENLDAP_THREAD_SAFE
+** can be used to determine if -lldap_r is available at compile
+** time.  You must define LDAP_THREAD_SAFE if and only if you
+** link with -lldap_r.
+**
+** If you fail to define LDAP_THREAD_SAFE when linking with
+** -lldap_r or define LDAP_THREAD_SAFE when linking with -lldap,
+** provided header definitions and declarations may be incorrect.
+**
+*/
+
+/* is -lldap_r available or not */
+#define LDAP_API_FEATURE_X_OPENLDAP_THREAD_SAFE 1
+
+/* LDAP v2 Referrals */
+/* #undef LDAP_API_FEATURE_X_OPENLDAP_V2_REFERRALS */
+
+#endif /* LDAP_FEATURES */
--- a/contrib/openldap-cmake/darwin_aarch64/include/portable.h
+++ b/contrib/openldap-cmake/darwin_aarch64/include/portable.h
--- a/contrib/poco
+++ b/contrib/poco
@ -1 +1 @@
-Subproject commit 83beecccb09eec0c9fd2669cacea03ede1d9f138
+Subproject commit b7d9ec16ee33ca76643d5fcd907ea9a33285640a
--- a/contrib/poco-cmake/Foundation/CMakeLists.txt
+++ b/contrib/poco-cmake/Foundation/CMakeLists.txt
@ -233,3 +233,10 @@ else ()

    message (STATUS "Using Poco::Foundation: ${LIBRARY_POCO_FOUNDATION} ${INCLUDE_POCO_FOUNDATION}")
 endif ()
+
+if(OS_DARWIN AND ARCH_AARCH64)
+    target_compile_definitions (_poco_foundation
+        PRIVATE
+            POCO_NO_STAT64
+    )
+endif()
--- a/contrib/rocksdb-cmake/CMakeLists.txt
+++ b/contrib/rocksdb-cmake/CMakeLists.txt
@ -142,14 +142,14 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
  endif(HAS_ALTIVEC)
 endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")

-if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")
  CHECK_C_COMPILER_FLAG("-march=armv8-a+crc+crypto" HAS_ARMV8_CRC)
  if(HAS_ARMV8_CRC)
    message(STATUS " HAS_ARMV8_CRC yes")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
  endif(HAS_ARMV8_CRC)
-endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")


 include(CheckCXXSourceCompiles)
--- a/contrib/zlib-ng
+++ b/contrib/zlib-ng
@ -1 +1 @@
-Subproject commit b82d3497a5afc46dec3c5d07e4b163b169f251d7
+Subproject commit 7f254522fd676ff4e906c6d4e9b30d4df4214c2d
--- a/debian/changelog
+++ b/debian/changelog
@ -1,5 +1,5 @@
-clickhouse (21.5.1.1) unstable; urgency=low
+clickhouse (21.6.1.1) unstable; urgency=low

  * Modified source code

- -- clickhouse-release <clickhouse-release@yandex-team.ru>  Fri, 02 Apr 2021 18:34:26 +0300
+ -- clickhouse-release <clickhouse-release@yandex-team.ru>  Tue, 20 Apr 2021 01:48:16 +0300
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:18.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.5.1.*
+ARG version=21.6.1.*

 RUN apt-get update \
    && apt-get install --yes --no-install-recommends \
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:20.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.5.1.*
+ARG version=21.6.1.*
 ARG gosu_ver=1.10

 # set non-empty deb_location_url url to create a docker image
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:18.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.5.1.*
+ARG version=21.6.1.*

 RUN apt-get update && \
    apt-get install -y apt-transport-https dirmngr && \
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -367,6 +367,9 @@ function run_tests
        # JSON functions
        01666_blns

+        # Requires postgresql-client
+        01802_test_postgresql_protocol_with_row_policy
+
        # Depends on AWS
        01801_s3_cluster
    )
--- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
+++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
@ -1,6 +1,7 @@
 <yandex>
    <http_port remove="remove"/>
    <mysql_port remove="remove"/>
+    <postgresql_port remove="remove"/>
    <interserver_http_port remove="remove"/>
    <tcp_with_proxy_port remove="remove"/>
    <keeper_server remove="remove"/>
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -66,7 +66,12 @@ reportStageEnd('parse')
 subst_elems = root.findall('substitutions/substitution')
 available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
 for e in subst_elems:
-    available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')]
+    name = e.find('name').text
+    values = [v.text for v in e.findall('values/value')]
+    if not values:
+        raise Exception(f'No values given for substitution {{{name}}}')
+
+    available_parameters[name] = values

 # Takes parallel lists of templates, substitutes them with all combos of
 # parameters. The set of parameters is determined based on the first list.
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@ -21,14 +21,14 @@ function start()
        -- --path /var/lib/clickhouse1/ --logger.stderr /var/log/clickhouse-server/stderr1.log \
        --logger.log /var/log/clickhouse-server/clickhouse-server1.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server1.err.log \
        --tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \
-        --mysql_port 19004 \
+        --mysql_port 19004 --postgresql_port 19005 \
        --keeper_server.tcp_port 19181 --keeper_server.server_id 2

        sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server2/config.xml --daemon \
        -- --path /var/lib/clickhouse2/ --logger.stderr /var/log/clickhouse-server/stderr2.log \
        --logger.log /var/log/clickhouse-server/clickhouse-server2.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server2.err.log \
        --tcp_port 29000 --tcp_port_secure 29440 --http_port 28123 --https_port 28443 --interserver_http_port 29009 --tcp_with_proxy_port 29010 \
-        --mysql_port 29004 \
+        --mysql_port 29004 --postgresql_port 29005 \
        --keeper_server.tcp_port 29181 --keeper_server.server_id 3
    fi

--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@ -28,7 +28,8 @@ RUN apt-get update -y \
            tree \
            unixodbc \
            wget \
-            mysql-client=5.7*
+            mysql-client=5.7* \
+            postgresql-client

 RUN pip3 install numpy scipy pandas

--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -44,7 +44,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
    -- --path /var/lib/clickhouse1/ --logger.stderr /var/log/clickhouse-server/stderr1.log \
    --logger.log /var/log/clickhouse-server/clickhouse-server1.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server1.err.log \
    --tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \
-    --mysql_port 19004 \
+    --mysql_port 19004 --postgresql_port 19005 \
    --keeper_server.tcp_port 19181 --keeper_server.server_id 2 \
    --macros.replica r2   # It doesn't work :(

@ -52,7 +52,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
    -- --path /var/lib/clickhouse2/ --logger.stderr /var/log/clickhouse-server/stderr2.log \
    --logger.log /var/log/clickhouse-server/clickhouse-server2.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server2.err.log \
    --tcp_port 29000 --tcp_port_secure 29440 --http_port 28123 --https_port 28443 --interserver_http_port 29009 --tcp_with_proxy_port 29010 \
-    --mysql_port 29004 \
+    --mysql_port 29004 --postgresql_port 29005 \
    --keeper_server.tcp_port 29181 --keeper_server.server_id 3 \
    --macros.shard s2   # It doesn't work :(

--- a/docs/en/development/build-osx.md
+++ b/docs/en/development/build-osx.md
@ -5,12 +5,13 @@ toc_title: Build on Mac OS X

 # How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x}

-Build should work on x86_64 (Intel) based macOS 10.15 (Catalina) and higher with recent Xcode's native AppleClang, or Homebrew's vanilla Clang or GCC compilers.
+Build should work on x86_64 (Intel) and arm64 (Apple Silicon) based macOS 10.15 (Catalina) and higher with recent Xcode's native AppleClang, or Homebrew's vanilla Clang or GCC compilers.

 ## Install Homebrew {#install-homebrew}

 ``` bash
-$ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
+/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
+# ...and follow the printed instructions on any additional steps required to complete the installation.
 ```

 ## Install Xcode and Command Line Tools {#install-xcode-and-command-line-tools}
@ -22,8 +23,8 @@ Open it at least once to accept the end-user license agreement and automatically
 Then, make sure that the latest Comman Line Tools are installed and selected in the system:

 ``` bash
-$ sudo rm -rf /Library/Developer/CommandLineTools
-$ sudo xcode-select --install
+sudo rm -rf /Library/Developer/CommandLineTools
+sudo xcode-select --install
 ```

 Reboot.
@ -31,14 +32,15 @@ Reboot.
 ## Install Required Compilers, Tools, and Libraries {#install-required-compilers-tools-and-libraries}

 ``` bash
-$ brew update
-$ brew install cmake ninja libtool gettext llvm gcc
+brew update
+brew install cmake ninja libtool gettext llvm gcc
 ```

 ## Checkout ClickHouse Sources {#checkout-clickhouse-sources}

 ``` bash
-$ git clone --recursive git@github.com:ClickHouse/ClickHouse.git # or https://github.com/ClickHouse/ClickHouse.git
+git clone --recursive git@github.com:ClickHouse/ClickHouse.git
+# ...alternatively, you can use https://github.com/ClickHouse/ClickHouse.git as the repo URL.
 ```

 ## Build ClickHouse {#build-clickhouse}
@ -46,37 +48,37 @@ $ git clone --recursive git@github.com:ClickHouse/ClickHouse.git # or https://gi
 To build using Xcode's native AppleClang compiler:

 ``` bash
-$ cd ClickHouse
-$ rm -rf build
-$ mkdir build
-$ cd build
-$ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
-$ cmake --build . --config RelWithDebInfo
-$ cd ..
+cd ClickHouse
+rm -rf build
+mkdir build
+cd build
+cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
+cmake --build . --config RelWithDebInfo
+cd ..
 ```

 To build using Homebrew's vanilla Clang compiler:

 ``` bash
-$ cd ClickHouse
-$ rm -rf build
-$ mkdir build
-$ cd build
-$ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
-$ cmake --build . --config RelWithDebInfo
-$ cd ..
+cd ClickHouse
+rm -rf build
+mkdir build
+cd build
+cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
+cmake --build . --config RelWithDebInfo
+cd ..
 ```

 To build using Homebrew's vanilla GCC compiler:

 ``` bash
-$ cd ClickHouse
-$ rm -rf build
-$ mkdir build
-$ cd build
-$ cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-10 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-10 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
-$ cmake --build . --config RelWithDebInfo
-$ cd ..
+cd ClickHouse
+rm -rf build
+mkdir build
+cd build
+cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-10 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-10 -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
+cmake --build . --config RelWithDebInfo
+cd ..
 ```

 ## Caveats {#caveats}
@ -115,7 +117,7 @@ To do so, create the `/Library/LaunchDaemons/limit.maxfiles.plist` file with the
 Execute the following command:

 ``` bash
-$ sudo chown root:wheel /Library/LaunchDaemons/limit.maxfiles.plist
+sudo chown root:wheel /Library/LaunchDaemons/limit.maxfiles.plist
 ```

 Reboot.
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@ -90,6 +90,7 @@ The following settings can be specified in configuration file for given endpoint
 -   `endpoint` — Specifies prefix of an endpoint. Mandatory.
 -   `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional.
 -   `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. Optional, default value is `false`.
+-   `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`.
 -   `header` —  Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times.
 -   `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional.

@ -102,11 +103,13 @@ The following settings can be specified in configuration file for given endpoint
        <!-- <access_key_id>ACCESS_KEY_ID</access_key_id> -->
        <!-- <secret_access_key>SECRET_ACCESS_KEY</secret_access_key> -->
        <!-- <use_environment_credentials>false</use_environment_credentials> -->
+        <!-- <use_insecure_imds_request>false</use_insecure_imds_request> -->
        <!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
        <!-- <server_side_encryption_customer_key_base64>BASE64-ENCODED-KEY</server_side_encryption_customer_key_base64> -->
    </endpoint-name>
 </s3>
 ```
+
 ## Usage {#usage-examples}

 Suppose we have several files in TSV format with the following URIs on HDFS:
@ -149,6 +152,7 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_p
 CREATE TABLE big_table (name String, value UInt32) 
 ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
 ```
+
 ## See also

 -  [S3 table function](../../../sql-reference/table-functions/s3.md)
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -767,6 +767,7 @@ Required parameters:

 Optional parameters:    
 -   `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
+-   `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
 -   `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. 
 -   `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. 
 -   `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. 
--- a/docs/en/engines/table-engines/special/buffer.md
+++ b/docs/en/engines/table-engines/special/buffer.md
@ -18,11 +18,17 @@ Engine parameters:
 -   `num_layers` – Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16.
 -   `min_time`, `max_time`, `min_rows`, `max_rows`, `min_bytes`, and `max_bytes` – Conditions for flushing data from the buffer.

+Optional engine parameters:
+
+-   `flush_time`, `flush_rows`, `flush_bytes` – Conditions for flushing data from the buffer, that will happen only in background (ommited or zero means no `flush*` parameters).
+
 Data is flushed from the buffer and written to the destination table if all the `min*` conditions or at least one `max*` condition are met.

-   `min_time`, `max_time` – Condition for the time in seconds from the moment of the first write to the buffer.
-   `min_rows`, `max_rows` – Condition for the number of rows in the buffer.
-   `min_bytes`, `max_bytes` – Condition for the number of bytes in the buffer.
+Also if at least one `flush*` condition are met flush initiated in background, this is different from `max*`, since `flush*` allows you to configure background flushes separately to avoid adding latency for `INSERT` (into `Buffer`) queries.
+
+-   `min_time`, `max_time`, `flush_time` – Condition for the time in seconds from the moment of the first write to the buffer.
+-   `min_rows`, `max_rows`, `flush_rows` – Condition for the number of rows in the buffer.
+-   `min_bytes`, `max_bytes`, `flush_bytes` – Condition for the number of bytes in the buffer.

 During the write operation, data is inserted to a `num_layers` number of random buffers. Or, if the data part to insert is large enough (greater than `max_rows` or `max_bytes`), it is written directly to the destination table, omitting the buffer.

--- a/docs/en/guides/apply-catboost-model.md
+++ b/docs/en/guides/apply-catboost-model.md
@ -159,6 +159,9 @@ The fastest way to evaluate a CatBoost model is compile `libcatboostmodel.<so|dl
 <models_config>/home/catboost/models/*_model.xml</models_config>
 ```

+!!! note "Note"
+    You can change path to the CatBoost model configuration later without restarting server.
+
 ## 4. Run the Model Inference from SQL {#run-model-inference}

 For test model run the ClickHouse client `$ clickhouse client`.
--- a/docs/en/interfaces/third-party/gui.md
+++ b/docs/en/interfaces/third-party/gui.md
@ -169,24 +169,21 @@ Features:

 ### SeekTable {#seektable}

-[SeekTable](https://www.seektable.com) is a self-service BI tool for data exploration and operational reporting. SeekTable is available both as a cloud service and a self-hosted version. SeekTable reports may be embedded into any web-app.
+[SeekTable](https://www.seektable.com) is a self-service BI tool for data exploration and operational reporting. It is available both as a cloud service and a self-hosted version. Reports from SeekTable may be embedded into any web-app.

 Features:

 -   Business users-friendly reports builder.
 -   Powerful report parameters for SQL filtering and report-specific query customizations.
 -   Can connect to ClickHouse both with a native TCP/IP endpoint and a HTTP(S) interface (2 different drivers).
-   It is possible to use all power of CH SQL dialect in dimensions/measures definitions
+-   It is possible to use all power of ClickHouse SQL dialect in dimensions/measures definitions.
 -   [Web API](https://www.seektable.com/help/web-api-integration) for automated reports generation.
-   Supports reports development flow with account data [backup/restore](https://www.seektable.com/help/self-hosted-backup-restore), data models (cubes) / reports configuration is a human-readable XML and can be stored under version control.
+-   Supports reports development flow with account data [backup/restore](https://www.seektable.com/help/self-hosted-backup-restore); data models (cubes) / reports configuration is a human-readable XML and can be stored under version control system.

 SeekTable is [free](https://www.seektable.com/help/cloud-pricing) for personal/individual usage.

 [How to configure ClickHouse connection in SeekTable.](https://www.seektable.com/help/clickhouse-pivot-table)

-
 ### Chadmin {#chadmin}

 [Chadmin](https://github.com/bun4uk/chadmin) is a simple UI where you can visualize your currently running queries on your ClickHouse cluster and info about them and kill them if you want.
-
-[Original article](https://clickhouse.tech/docs/en/interfaces/third-party/gui/) <!--hide-->
--- a/docs/en/operations/system-tables/columns.md
+++ b/docs/en/operations/system-tables/columns.md
@ -4,7 +4,9 @@ Contains information about columns in all the tables.

 You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once.

-The `system.columns` table contains the following columns (the column type is shown in brackets):
+Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field. 
+
+Columns:

 -   `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
 -   `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
@ -26,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh
 **Example**

 ```sql
-:) select * from system.columns LIMIT 2 FORMAT Vertical;
+SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
 ```

 ```text
@ -65,8 +67,6 @@ is_in_sorting_key:       0
 is_in_primary_key:       0
 is_in_sampling_key:      0
 compression_codec:       
-
-2 rows in set. Elapsed: 0.002 sec. 
 ```

 [Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) <!--hide-->
--- a/docs/en/operations/system-tables/tables.md
+++ b/docs/en/operations/system-tables/tables.md
@ -1,59 +1,65 @@
 # system.tables {#system-tables}

-Contains metadata of each table that the server knows about. Detached tables are not shown in `system.tables`.
+Contains metadata of each table that the server knows about. 

-This table contains the following columns (the column type is shown in brackets):
+[Detached](../../sql-reference/statements/detach.md) tables are not shown in `system.tables`.

-   `database` (String) — The name of the database the table is in.
+[Temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.tables` only in those session where they have been created. They are shown with the empty `database` field and with the `is_temporary` flag switched on. 

-   `name` (String) — Table name.
+Columns:

-   `engine` (String) — Table engine name (without parameters).
+-   `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in. 

-   `is_temporary` (UInt8) - Flag that indicates whether the table is temporary.
+-   `name` ([String](../../sql-reference/data-types/string.md)) — Table name. 

-   `data_path` (String) - Path to the table data in the file system.
+-   `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters). 

-   `metadata_path` (String) - Path to the table metadata in the file system.
+-   `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary. 

-   `metadata_modification_time` (DateTime) - Time of latest modification of the table metadata.
+-   `data_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table data in the file system. 

-   `dependencies_database` (Array(String)) - Database dependencies.
+-   `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. 

-   `dependencies_table` (Array(String)) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).
+-   `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata.

-   `create_table_query` (String) - The query that was used to create the table.
+-   `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies.

-   `engine_full` (String) - Parameters of the table engine.
+-   `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).

-   `partition_key` (String) - The partition key expression specified in the table.
+-   `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table.

-   `sorting_key` (String) - The sorting key expression specified in the table.
+-   `engine_full` ([String](../../sql-reference/data-types/string.md)) - Parameters of the table engine. 

-   `primary_key` (String) - The primary key expression specified in the table.
+-   `partition_key` ([String](../../sql-reference/data-types/string.md)) - The partition key expression specified in the table. 

-   `sampling_key` (String) - The sampling key expression specified in the table.
+-   `sorting_key` ([String](../../sql-reference/data-types/string.md)) - The sorting key expression specified in the table. 

-   `storage_policy` (String) - The storage policy:
+-   `primary_key` ([String](../../sql-reference/data-types/string.md)) - The primary key expression specified in the table. 
+
+-   `sampling_key` ([String](../../sql-reference/data-types/string.md)) - The sampling key expression specified in the table. 
+
+-   `storage_policy` ([String](../../sql-reference/data-types/string.md)) - The storage policy:

    -   [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
    -   [Distributed](../../engines/table-engines/special/distributed.md#distributed)

-   `total_rows` (Nullable(UInt64)) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `Null` (including underying `Buffer` table).
+-   `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table). 

-   `total_bytes` (Nullable(UInt64)) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `Null` (**does not** includes any underlying storage).
+-   `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage). 

    -   If the table stores data on disk, returns used space on disk (i.e. compressed).
    -   If the table stores data in memory, returns approximated number of used bytes in memory.

-   `lifetime_rows` (Nullable(UInt64)) - Total number of rows INSERTed since server start (only for `Buffer` tables).
+-   `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables). 

-   `lifetime_bytes` (Nullable(UInt64)) - Total number of bytes INSERTed since server start (only for `Buffer` tables).
+-   `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables). 

 The `system.tables` table is used in `SHOW TABLES` query implementation.

+**Example**
+
 ```sql
-:) SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
+SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
 ```

 ```text
@ -100,8 +106,6 @@ sampling_key:
 storage_policy:             
 total_rows:                 ᴺᵁᴸᴸ
 total_bytes:                ᴺᵁᴸᴸ
-
-2 rows in set. Elapsed: 0.004 sec. 
 ```

 [Original article](https://clickhouse.tech/docs/en/operations/system_tables/tables) <!--hide-->
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md
@ -6,7 +6,7 @@ toc_priority: 207

 Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm.

-The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic.
+Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic.

 The performance of the function is lower than performance of [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile) or [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`.

--- a/docs/en/sql-reference/functions/json-functions.md
+++ b/docs/en/sql-reference/functions/json-functions.md
@ -16,46 +16,60 @@ The following assumptions are made:

 ## visitParamHas(params, name) {#visitparamhasparams-name}

-Checks whether there is a field with the ‘name’ name.
+Checks whether there is a field with the `name` name.
+
+Alias: `simpleJSONHas`.

 ## visitParamExtractUInt(params, name) {#visitparamextractuintparams-name}

-Parses UInt64 from the value of the field named ‘name’. If this is a string field, it tries to parse a number from the beginning of the string. If the field doesn’t exist, or it exists but doesn’t contain a number, it returns 0.
+Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field doesn’t exist, or it exists but doesn’t contain a number, it returns 0.
+
+Alias: `simpleJSONExtractUInt`.

 ## visitParamExtractInt(params, name) {#visitparamextractintparams-name}

 The same as for Int64.

+Alias: `simpleJSONExtractInt`.
+
 ## visitParamExtractFloat(params, name) {#visitparamextractfloatparams-name}

 The same as for Float64.

+Alias: `simpleJSONExtractFloat`.
+
 ## visitParamExtractBool(params, name) {#visitparamextractboolparams-name}

 Parses a true/false value. The result is UInt8.

+Alias: `simpleJSONExtractBool`.
+
 ## visitParamExtractRaw(params, name) {#visitparamextractrawparams-name}

 Returns the value of a field, including separators.

+Alias: `simpleJSONExtractRaw`.
+
 Examples:

 ``` sql
-visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'
-visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'
+visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
+visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
 ```

 ## visitParamExtractString(params, name) {#visitparamextractstringparams-name}

 Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string.

+Alias: `simpleJSONExtractString`.
+
 Examples:

 ``` sql
-visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'
-visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'
-visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''
-visitParamExtractString('{"abc":"hello}', 'abc') = ''
+visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
+visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
+visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
+visitParamExtractString('{"abc":"hello}', 'abc') = '';
 ```

 There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@ -74,6 +74,9 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified,

 Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly.

+!!! warning "Warning"
+    You can’t delete a column if it is referenced by [materialized view](../../../sql-reference/statements/create/view.md#materialized). Otherwise, it returns an error.
+
 Example:

 ``` sql
@ -180,7 +183,7 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
 ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
 ```

-## See Also
+**See Also**

 - [REMOVE TTL](ttl.md).

--- a/docs/en/sql-reference/statements/create/row-policy.md
+++ b/docs/en/sql-reference/statements/create/row-policy.md
@ -5,39 +5,81 @@ toc_title: ROW POLICY

 # CREATE ROW POLICY {#create-row-policy-statement}

-Creates [filters for rows](../../../operations/access-rights.md#row-policy-management), which a user can read from a table.
+Creates a [row policy](../../../operations/access-rights.md#row-policy-management), i.e. a filter used to determine which rows a user can read from a table.

 Syntax:

 ``` sql
 CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1 
        [, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2 ...] 
+    [FOR SELECT] USING condition
    [AS {PERMISSIVE | RESTRICTIVE}]
-    [FOR SELECT]
-    [USING condition]
    [TO {role1 [, role2 ...] | ALL | ALL EXCEPT role1 [, role2 ...]}]
 ```

-`ON CLUSTER` clause allows creating row policies on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
+## USING Clause {#create-row-policy-using}

-## AS Clause {#create-row-policy-as}
-
-Using this section you can create permissive or restrictive policies.
-
-Permissive policy grants access to rows. Permissive policies which apply to the same table are combined together using the boolean `OR` operator. Policies are permissive by default.
-
-Restrictive policy restricts access to rows. Restrictive policies which apply to the same table are combined together using the boolean `AND` operator.
-
-Restrictive policies apply to rows that passed the permissive filters. If you set restrictive policies but no permissive policies, the user can’t get any row from the table.
+Allows to specify a condition to filter rows. An user will see a row if the condition is calculated to non-zero for the row.

 ## TO Clause {#create-row-policy-to}

-In the section `TO` you can provide a mixed list of roles and users, for example, `CREATE ROW POLICY ... TO accountant, john@localhost`.
+In the section `TO` you can provide a list of users and roles this policy should work for. For example, `CREATE ROW POLICY ... TO accountant, john@localhost`.

-Keyword `ALL` means all the ClickHouse users including current user. Keywords `ALL EXCEPT` allow to exclude some users from the all users list, for example, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost`
+Keyword `ALL` means all the ClickHouse users including current user. Keyword `ALL EXCEPT` allow to exclude some users from the all users list, for example, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost`

-## Examples {#examples}
+!!! note "Note"
+    If there are no row policies defined for a table then any user can `SELECT` all the row from the table. Defining one or more row policies for the table makes the access to the table depending on the row policies no matter if those row policies are defined for the current user or not. For example, the following policy
    
-`CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO accountant, john@localhost`
+    `CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter`

-`CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO ALL EXCEPT mira`
+    forbids the users `mira` and `peter` to see the rows with `b != 1`, and any non-mentioned user (e.g., the user `paul`) will see no rows from `mydb.table1` at all.
+    
+    If that's not desirable it can't be fixed by adding one more row policy, like the following:
+
+    `CREATE ROW POLICY pol2 ON mydb.table1 USING 1 TO ALL EXCEPT mira, peter`
+
+## AS Clause {#create-row-policy-as}
+
+It's allowed to have more than one policy enabled on the same table for the same user at the one time. So we need a way to combine the conditions from multiple policies.
+
+By default policies are combined using the boolean `OR` operator. For example, the following policies
+
+``` sql
+CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter
+CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 TO peter, antonio
+```
+
+enables the user `peter` to see rows with either `b=1` or `c=2`.
+
+The `AS` clause specifies how policies should be combined with other policies. Policies can be either permissive or restrictive. By default policies are permissive, which means they are combined using the boolean `OR` operator.
+
+A policy can be defined as restrictive as an alternative. Restrictive policies are combined using the boolean `AND` operator.
+
+Here is the general formula:
+
+```
+row_is_visible = (one or more of the permissive policies' conditions are non-zero) AND
+                 (all of the restrictive policies's conditions are non-zero)
+```
+
+For example, the following policies
+
+``` sql
+CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter
+CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 AS RESTRICTIVE TO peter, antonio
+```
+
+enables the user `peter` to see rows only if both `b=1` AND `c=2`.
+
+## ON CLUSTER Clause {#create-row-policy-on-cluster}
+
+Allows creating row policies on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
+
+
+## Examples
+
+`CREATE ROW POLICY filter1 ON mydb.mytable USING a<1000 TO accountant, john@localhost`
+
+`CREATE ROW POLICY filter2 ON mydb.mytable USING a<1000 AND b=5 TO ALL EXCEPT mira`
+
+`CREATE ROW POLICY filter3 ON mydb.mytable USING 1 TO admin`
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@ -50,15 +50,32 @@ Creates a table with the same result as that of the [table function](../../../sq
 ### From SELECT query {#from-select-query}

 ``` sql
-CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
+CREATE TABLE [IF NOT EXISTS] [db.]table_name[(name1 [type1], name2 [type2], ...)] ENGINE = engine AS SELECT ...
 ```

-Creates a table with a structure like the result of the `SELECT` query, with the `engine` engine, and fills it with data from SELECT.
+Creates a table with a structure like the result of the `SELECT` query, with the `engine` engine, and fills it with data from `SELECT`. Also you can explicitly specify columns description.

-In all cases, if `IF NOT EXISTS` is specified, the query won’t return an error if the table already exists. In this case, the query won’t do anything.
+If the table already exists and `IF NOT EXISTS` is specified, the query won’t do anything.

 There can be other clauses after the `ENGINE` clause in the query. See detailed documentation on how to create tables in the descriptions of [table engines](../../../engines/table-engines/index.md#table_engines).

+**Example**
+
+Query:
+
+``` sql
+CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1;
+SELECT x, toTypeName(x) FROM t1;
+```
+
+Result:
+
+```text
+┌─x─┬─toTypeName(x)─┐
+│ 1 │ String        │
+└───┴───────────────┘
+```
+
 ## NULL Or NOT NULL Modifiers {#null-modifiers}

 `NULL` and `NOT NULL` modifiers after data type in column definition allow or do not allow it to be [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable).
--- a/docs/en/sql-reference/statements/grant.md
+++ b/docs/en/sql-reference/statements/grant.md
@ -91,7 +91,7 @@ Hierarchy of privileges:
            -   `ALTER ADD CONSTRAINT`
            -   `ALTER DROP CONSTRAINT`
        -   `ALTER TTL`
-        -   `ALTER MATERIALIZE TTL`
+            -   `ALTER MATERIALIZE TTL`
        -   `ALTER SETTINGS`
        -   `ALTER MOVE PARTITION`
        -   `ALTER FETCH PARTITION`
@ -102,9 +102,9 @@ Hierarchy of privileges:
 -   [CREATE](#grant-create)
    -   `CREATE DATABASE`
    -   `CREATE TABLE`
+        -   `CREATE TEMPORARY TABLE`
    -   `CREATE VIEW`
    -   `CREATE DICTIONARY`
-    -   `CREATE TEMPORARY TABLE`
 -   [DROP](#grant-drop)
    -   `DROP DATABASE`
    -   `DROP TABLE`
@ -150,7 +150,7 @@ Hierarchy of privileges:
    -   `SYSTEM RELOAD`
        -   `SYSTEM RELOAD CONFIG`
        -   `SYSTEM RELOAD DICTIONARY`
-        -   `SYSTEM RELOAD EMBEDDED DICTIONARIES`
+            -   `SYSTEM RELOAD EMBEDDED DICTIONARIES`
    -   `SYSTEM MERGES`
    -   `SYSTEM TTL MERGES`
    -   `SYSTEM FETCHES`
@ -276,7 +276,7 @@ Allows executing [ALTER](../../sql-reference/statements/alter/index.md) queries
            -   `ALTER ADD CONSTRAINT`. Level: `TABLE`. Aliases: `ADD CONSTRAINT`
            -   `ALTER DROP CONSTRAINT`. Level: `TABLE`. Aliases: `DROP CONSTRAINT`
        -   `ALTER TTL`. Level: `TABLE`. Aliases: `ALTER MODIFY TTL`, `MODIFY TTL`
-        -   `ALTER MATERIALIZE TTL`. Level: `TABLE`. Aliases: `MATERIALIZE TTL`
+            -   `ALTER MATERIALIZE TTL`. Level: `TABLE`. Aliases: `MATERIALIZE TTL`
        -   `ALTER SETTINGS`. Level: `TABLE`. Aliases: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING`
        -   `ALTER MOVE PARTITION`. Level: `TABLE`. Aliases: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART`
        -   `ALTER FETCH PARTITION`. Level: `TABLE`. Aliases: `ALTER FETCH PART`, `FETCH PARTITION`, `FETCH PART`
@ -304,9 +304,9 @@ Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [A
 -   `CREATE`. Level: `GROUP`
    -   `CREATE DATABASE`. Level: `DATABASE`
    -   `CREATE TABLE`. Level: `TABLE`
+        -   `CREATE TEMPORARY TABLE`. Level: `GLOBAL`
    -   `CREATE VIEW`. Level: `VIEW`
    -   `CREATE DICTIONARY`. Level: `DICTIONARY`
-    -   `CREATE TEMPORARY TABLE`. Level: `GLOBAL`

 **Notes**

@ -401,7 +401,7 @@ Allows a user to execute [SYSTEM](../../sql-reference/statements/system.md) quer
    -   `SYSTEM RELOAD`. Level: `GROUP`
        -   `SYSTEM RELOAD CONFIG`. Level: `GLOBAL`. Aliases: `RELOAD CONFIG`
        -   `SYSTEM RELOAD DICTIONARY`. Level: `GLOBAL`. Aliases: `SYSTEM RELOAD DICTIONARIES`, `RELOAD DICTIONARY`, `RELOAD DICTIONARIES`
-        -   `SYSTEM RELOAD EMBEDDED DICTIONARIES`. Level: `GLOBAL`. Aliases: R`ELOAD EMBEDDED DICTIONARIES`
+            -   `SYSTEM RELOAD EMBEDDED DICTIONARIES`. Level: `GLOBAL`. Aliases: `RELOAD EMBEDDED DICTIONARIES`
    -   `SYSTEM MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP MERGES`, `SYSTEM START MERGES`, `STOP MERGES`, `START MERGES`
    -   `SYSTEM TTL MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP TTL MERGES`, `SYSTEM START TTL MERGES`, `STOP TTL MERGES`, `START TTL MERGES`
    -   `SYSTEM FETCHES`. Level: `TABLE`. Aliases: `SYSTEM STOP FETCHES`, `SYSTEM START FETCHES`, `STOP FETCHES`, `START FETCHES`
--- a/docs/en/sql-reference/statements/optimize.md
+++ b/docs/en/sql-reference/statements/optimize.md
@ -5,13 +5,18 @@ toc_title: OPTIMIZE

 # OPTIMIZE Statement {#misc_operations-optimize}

+This query tries to initialize an unscheduled merge of data parts for tables.
+
+!!! warning "Warning"
+    `OPTIMIZE` can’t fix the `Too many parts` error.
+
+**Syntax**
+
 ``` sql
 OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
 ```

-This query tries to initialize an unscheduled merge of data parts for tables with a table engine from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family.
-
-The `OPTMIZE` query is also supported for the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported.
+The `OPTMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported.

 When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all nodes (if the `replication_alter_partitions_sync` setting is enabled).

@ -21,12 +26,13 @@ When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engin
 -   If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine.


-### BY expression {#by-expression}
+## BY expression {#by-expression}

 If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explictly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key).

-Note that `*` behaves just like in `SELECT`: `MATERIALIZED`, and `ALIAS` columns are not used for expansion.
-Also, it is an error to specify empty list of columns, or write an expression that results in an empty list of columns, or deduplicate by an ALIAS column.
+!!! note "Note"
+    Notice that `*` behaves just like in `SELECT`: `MATERIALIZED` and `ALIAS` columns are not used for expansion.
+    Also, it is an error to specify empty list of columns, or write an expression that results in an empty list of columns, or deduplicate by an ALIAS column.

 ``` sql
 OPTIMIZE TABLE table DEDUPLICATE; -- the old one
@ -39,9 +45,10 @@ OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT co
 OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT (colX, colY);
 ```

-**Example:**
+**Examples**
+
+Create a table:

-A silly synthetic table.
 ``` sql
 CREATE TABLE example (
    primary_key Int32,
@ -56,31 +63,31 @@ PARTITION BY partition_key
 ORDER BY (primary_key, secondary_key);
 ```

+The 'old' deduplicate, all columns are taken into account, i.e. row is removed only if all values in all columns are equal to corresponding values in previous row.
+
 ``` sql
-- The 'old' deduplicate, all columns are taken into account, i.e. row is removed only if all values in all columns are equal to corresponding values in previous row.
 OPTIMIZE TABLE example FINAL DEDUPLICATE;
 ```

+Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED`: `primary_key`, `secondary_key`, `value`, `partition_key`, and `materialized_value` columns.
+
 ``` sql
-- Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED`: `primary_key`, `secondary_key`, `value`, `partition_key`, and `materialized_value` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY *;
 ```

+Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED` and explicitly not `materialized_value`: `primary_key`, `secondary_key`, `value`, and `partition_key` columns.
+
 ``` sql
-- Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED` and explicitly not `materialized_value`: `primary_key`, `secondary_key`, `value`, and `partition_key` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY * EXCEPT materialized_value;
 ```

+Deduplicate explicitly by `primary_key`, `secondary_key`, and `partition_key` columns.
 ``` sql
-- Deduplicate explicitly by `primary_key`, `secondary_key`, and `partition_key` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY primary_key, secondary_key, partition_key;
 ```

+Deduplicate by any column matching a regex: `primary_key`, `secondary_key`, and `partition_key` columns.
+
 ``` sql
-- Deduplicate by any column matching a regex: `primary_key`, `secondary_key`, and `partition_key` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY COLUMNS('.*_key');
 ```
-
-
-!!! warning "Warning"
-    `OPTIMIZE` can’t fix the “Too many parts” error.
--- a/docs/ru/commercial/cloud.md
+++ b/docs/ru/commercial/cloud.md
@ -40,3 +40,19 @@ toc_title: "Поставщики облачных услуг ClickHouse"
 -   полная интеграция с облачными системами логирования, базами данных и инструментами обработки данных;
 -   встроенная платформа для мониторинга и управления базами данных;
 -   техническая поддержка от экспертов по работе с базами данных.
+
+## SberCloud {#sbercloud}
+
+[Облачная платформа SberCloud.Advanced](https://sbercloud.ru/ru/advanced):
+
+-   предоставляет более 50 высокотехнологичных сервисов;
+-   позволяет быстро создавать и эффективно управлять ИТ-инфраструктурой, приложениями и интернет-сервисами;
+-   радикально минимизирует ресурсы, требуемые для работы корпоративных ИТ-систем;
+-   в разы сокращает время вывода новых продуктов на рынок.
+
+SberCloud.Advanced предоставляет [MapReduce Service (MRS)](https://docs.sbercloud.ru/mrs/ug/topics/ug__clickhouse.html) — надежную, безопасную и простую в использовании платформу корпоративного уровня для хранения, обработки и анализа больших данных. MRS позволяет быстро создавать и управлять кластерами ClickHouse.
+
+-   Инстанс ClickHouse состоит из трех узлов ZooKeeper и нескольких узлов ClickHouse. Выделенный режим реплики используется для обеспечения высокой надежности двойных копий данных.
+-   MRS предлагает возможности гибкого масштабирования при быстром росте сервисов в сценариях, когда емкости кластерного хранилища или вычислительных ресурсов процессора недостаточно. MRS в один клик предоставляет инструмент для балансировки данных при расширении узлов ClickHouse в кластере. Вы можете определить режим и время балансировки данных на основе характеристик сервиса, чтобы обеспечить доступность сервиса.
+-   MRS использует архитектуру развертывания высокой доступности на основе Elastic Load Balance (ELB) — сервиса для автоматического распределения трафика на несколько внутренних узлов. Благодаря ELB, данные записываются в локальные таблицы и считываются из распределенных таблиц на разных узлах. Такая архитектура повышает отказоустойчивость кластера и гарантирует высокую доступность приложений.
+
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@ -753,7 +753,8 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'

 Необязательные параметры:   

-   `use_environment_credentials` — признак, нужно ли считывать учетные данные AWS из переменных окружения `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` и `AWS_SESSION_TOKEN`, если они есть. Значение по умолчанию: `false`.
+-   `use_environment_credentials` — признак, нужно ли считывать учетные данные AWS из сетевого окружения, а также из переменных окружения `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` и `AWS_SESSION_TOKEN`, если они есть. Значение по умолчанию: `false`.
+-   `use_insecure_imds_request` — признак, нужно ли использовать менее безопасное соединение при выполнении запроса к IMDS при получении учётных данных из метаданных Amazon EC2. Значение по умолчанию: `false`.
 -   `proxy` — конфигурация прокси-сервера для конечной точки S3. Каждый элемент `uri` внутри блока `proxy` должен содержать URL прокси-сервера. 
 -   `connect_timeout_ms` — таймаут подключения к сокету в миллисекундах. Значение по умолчанию: 10 секунд. 
 -   `request_timeout_ms` — таймаут выполнения запроса в миллисекундах. Значение по умолчанию: 5 секунд. 
--- a/docs/ru/guides/apply-catboost-model.md
+++ b/docs/ru/guides/apply-catboost-model.md
@ -158,6 +158,8 @@ FROM amazon_train
 <catboost_dynamic_library_path>/home/catboost/data/libcatboostmodel.so</catboost_dynamic_library_path>
 <models_config>/home/catboost/models/*_model.xml</models_config>
 ```
+!!! note "Примечание"
+    Вы можете позднее изменить путь к конфигурации модели CatBoost без перезагрузки сервера.
    
 ## 4. Запустите вывод модели из SQL {#run-model-inference}

--- a/docs/ru/interfaces/cli.md
+++ b/docs/ru/interfaces/cli.md
@ -121,6 +121,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
 -   `--user, -u` — имя пользователя, по умолчанию — ‘default’.
 -   `--password` — пароль, по умолчанию — пустая строка.
 -   `--query, -q` — запрос для выполнения, при использовании в неинтерактивном режиме.
+-   `--queries-file, -qf` - путь к файлу с запросами для выполнения. Необходимо указать только одну из опций: `query` или `queries-file`.
 -   `--database, -d` — выбрать текущую БД. Без указания значение берется из настроек сервера (по умолчанию — БД ‘default’).
 -   `--multiline, -m` — если указано — разрешить многострочные запросы, не отправлять запрос по нажатию Enter.
 -   `--multiquery, -n` — если указано — разрешить выполнять несколько запросов, разделённых точкой с запятой.
@ -130,6 +131,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
 -   `--stacktrace` — если указано, в случае исключения, выводить также его стек-трейс.
 -   `--config-file` — имя конфигурационного файла.
 -   `--secure` — если указано, будет использован безопасный канал.
+-   `--history_file` - путь к файлу с историей команд.
 -   `--param_<name>` — значение параметра для [запроса с параметрами](#cli-queries-with-parameters).

 Начиная с версии 20.5, в `clickhouse-client` есть автоматическая подсветка синтаксиса (включена всегда).
--- a/docs/ru/interfaces/third-party/gui.md
+++ b/docs/ru/interfaces/third-party/gui.md
@ -166,4 +166,19 @@ toc_title: "Визуальные интерфейсы от сторонних р

 [Как сконфигурировать ClickHouse в Looker.](https://docs.looker.com/setup-and-management/database-config/clickhouse)

-[Original article](https://clickhouse.tech/docs/ru/interfaces/third-party/gui/) <!--hide-->
+### SeekTable {#seektable}
+
+[SeekTable](https://www.seektable.com) — это аналитический инструмент для самостоятельного анализа и обработки данных бизнес-аналитики. Он доступен как в виде облачного сервиса, так и в виде локальной версии. Отчеты из SeekTable могут быть встроены в любое веб-приложение.
+
+Основные возможности:
+
+-   Удобный конструктор отчетов.
+-   Гибкая настройка отчетов SQL и создание запросов для специфичных отчетов.
+-   Интегрируется с ClickHouse, используя собственную точку приема запроса TCP/IP или интерфейс HTTP(S) (два разных драйвера).
+-   Поддерживает всю мощь диалекта ClickHouse SQL для построения запросов по различным измерениям и показателям.
+-   [WEB-API](https://www.seektable.com/help/web-api-integration) для автоматизированной генерации отчетов.
+-   Процесс разработки отчетов поддерживает [резервное копирование/восстановление данных](https://www.seektable.com/help/self-hosted-backup-restore); конфигурация моделей данных (кубов) / отчетов представляет собой удобочитаемый XML-файл, который может храниться в системе контроля версий.
+
+SeekTable [бесплатен](https://www.seektable.com/help/cloud-pricing) для личного/индивидуального использования.
+
+[Как сконфигурировать подключение ClickHouse в SeekTable.](https://www.seektable.com/help/clickhouse-pivot-table)
--- a/docs/ru/operations/system-tables/columns.md
+++ b/docs/ru/operations/system-tables/columns.md
@ -4,7 +4,9 @@

 С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table), но для многих таблиц сразу.

-Таблица `system.columns` содержит столбцы (тип столбца указан в скобках):
+Колонки [временных таблиц](../../sql-reference/statements/create/table.md#temporary-tables) содержатся в `system.columns` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких колонок пустое. 
+
+Cтолбцы:

 -   `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
 -   `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
@ -23,3 +25,46 @@
 -   `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ выборки.
 -   `compression_codec` ([String](../../sql-reference/data-types/string.md)) — имя кодека сжатия.

+**Пример**
+
+```sql
+SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
+```
+
+```text
+Row 1:
+──────
+database:                system
+table:                   aggregate_function_combinators
+name:                    name
+type:                    String
+default_kind:            
+default_expression:      
+data_compressed_bytes:   0
+data_uncompressed_bytes: 0
+marks_bytes:             0
+comment:                 
+is_in_partition_key:     0
+is_in_sorting_key:       0
+is_in_primary_key:       0
+is_in_sampling_key:      0
+compression_codec:       
+
+Row 2:
+──────
+database:                system
+table:                   aggregate_function_combinators
+name:                    is_internal
+type:                    UInt8
+default_kind:            
+default_expression:      
+data_compressed_bytes:   0
+data_uncompressed_bytes: 0
+marks_bytes:             0
+comment:                 
+is_in_partition_key:     0
+is_in_sorting_key:       0
+is_in_primary_key:       0
+is_in_sampling_key:      0
+compression_codec:       
+```
--- a/docs/ru/operations/system-tables/tables.md
+++ b/docs/ru/operations/system-tables/tables.md
@ -1,39 +1,94 @@
 # system.tables {#system-tables}

-Содержит метаданные каждой таблицы, о которой знает сервер. Отсоединённые таблицы не отображаются в `system.tables`.
+Содержит метаданные каждой таблицы, о которой знает сервер. 

-Эта таблица содержит следующие столбцы (тип столбца показан в скобках):
+Отсоединённые таблицы ([DETACH](../../sql-reference/statements/detach.md)) не отображаются в `system.tables`.

-   `database String` — имя базы данных, в которой находится таблица.
-   `name` (String) — имя таблицы.
-   `engine` (String) — движок таблицы (без параметров).
-   `is_temporary` (UInt8) — флаг, указывающий на то, временная это таблица или нет.
-   `data_path` (String) — путь к данным таблицы в файловой системе.
-   `metadata_path` (String) — путь к табличным метаданным в файловой системе.
-   `metadata_modification_time` (DateTime) — время последней модификации табличных метаданных.
-   `dependencies_database` (Array(String)) — зависимости базы данных.
-   `dependencies_table` (Array(String)) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы).
-   `create_table_query` (String) — запрос, которым создавалась таблица.
-   `engine_full` (String) — параметры табличного движка.
-   `partition_key` (String) — ключ партиционирования таблицы.
-   `sorting_key` (String) — ключ сортировки таблицы.
-   `primary_key` (String) - первичный ключ таблицы.
-   `sampling_key` (String) — ключ сэмплирования таблицы.
-   `storage_policy` (String) - политика хранения данных:
+Информация о [временных таблицах](../../sql-reference/statements/create/table.md#temporary-tables) содержится в `system.tables` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких таблиц пустое, а флаг `is_temporary` включен. 
+
+Столбцы:
+
+-   `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
+-   `name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
+-   `engine` ([String](../../sql-reference/data-types/string.md)) — движок таблицы (без параметров).
+-   `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на то, временная это таблица или нет.
+-   `data_path` ([String](../../sql-reference/data-types/string.md)) — путь к данным таблицы в файловой системе.
+-   `metadata_path` ([String](../../sql-reference/data-types/string.md)) — путь к табличным метаданным в файловой системе.
+-   `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время последней модификации табличных метаданных.
+-   `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — зависимости базы данных.
+-   `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы).
+-   `create_table_query` ([String](../../sql-reference/data-types/string.md)) — запрос, при помощи которого создавалась таблица.
+-   `engine_full` ([String](../../sql-reference/data-types/string.md)) — параметры табличного движка.
+-   `partition_key` ([String](../../sql-reference/data-types/string.md)) — ключ партиционирования таблицы.
+-   `sorting_key` ([String](../../sql-reference/data-types/string.md)) — ключ сортировки таблицы.
+-   `primary_key` ([String](../../sql-reference/data-types/string.md)) - первичный ключ таблицы.
+-   `sampling_key` ([String](../../sql-reference/data-types/string.md)) — ключ сэмплирования таблицы.
+-   `storage_policy` ([String](../../sql-reference/data-types/string.md)) - политика хранения данных:

    -   [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
    -   [Distributed](../../engines/table-engines/special/distributed.md#distributed)

-   `total_rows` (Nullable(UInt64)) - общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `Null` (включая базовую таблицу `Buffer`).
+-   `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `NULL` (включая базовую таблицу `Buffer`).

-   `total_bytes` (Nullable(UInt64)) - общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `Null` (**не включает** в себя никакого базового хранилища).
+-   `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `NULL` (не включает в себя никакого базового хранилища).

    -   Если таблица хранит данные на диске, возвращает используемое пространство на диске (т. е. сжатое).
    -   Если таблица хранит данные в памяти, возвращает приблизительное количество используемых байт в памяти.

-   `lifetime_rows` (Nullable(UInt64)) - общее количество строк, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
+-   `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество строк, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).

-   `lifetime_bytes` (Nullable(UInt64)) - общее количество байт, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
+-   `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество байт, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).

 Таблица `system.tables` используется при выполнении запроса `SHOW TABLES`.

+**Пример**
+
+```sql
+SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
+```
+
+```text
+Row 1:
+──────
+database:                   system
+name:                       aggregate_function_combinators
+uuid:                       00000000-0000-0000-0000-000000000000
+engine:                     SystemAggregateFunctionCombinators
+is_temporary:               0
+data_paths:                 []
+metadata_path:              /var/lib/clickhouse/metadata/system/aggregate_function_combinators.sql
+metadata_modification_time: 1970-01-01 03:00:00
+dependencies_database:      []
+dependencies_table:         []
+create_table_query:         
+engine_full:                
+partition_key:              
+sorting_key:                
+primary_key:                
+sampling_key:               
+storage_policy:             
+total_rows:                 ᴺᵁᴸᴸ
+total_bytes:                ᴺᵁᴸᴸ
+
+Row 2:
+──────
+database:                   system
+name:                       asynchronous_metrics
+uuid:                       00000000-0000-0000-0000-000000000000
+engine:                     SystemAsynchronousMetrics
+is_temporary:               0
+data_paths:                 []
+metadata_path:              /var/lib/clickhouse/metadata/system/asynchronous_metrics.sql
+metadata_modification_time: 1970-01-01 03:00:00
+dependencies_database:      []
+dependencies_table:         []
+create_table_query:         
+engine_full:                
+partition_key:              
+sorting_key:                
+primary_key:                
+sampling_key:               
+storage_policy:             
+total_rows:                 ᴺᵁᴸᴸ
+total_bytes:                ᴺᵁᴸᴸ
+```
--- a/docs/ru/sql-reference/functions/json-functions.md
+++ b/docs/ru/sql-reference/functions/json-functions.md
@ -16,51 +16,65 @@ toc_title: JSON

 ## visitParamHas(params, name) {#visitparamhasparams-name}

-Проверить наличие поля с именем name.
+Проверяет наличие поля с именем `name`.
+
+Алиас: `simpleJSONHas`.

 ## visitParamExtractUInt(params, name) {#visitparamextractuintparams-name}

-Распарсить UInt64 из значения поля с именем name. Если поле строковое - попытаться распарсить число из начала строки. Если такого поля нет, или если оно есть, но содержит не число, то вернуть 0.
+Пытается выделить число типа UInt64 из значения поля с именем `name`. Если поле строковое, пытается выделить число из начала строки. Если такого поля нет, или если оно есть, но содержит не число, то возвращает 0.
+
+Алиас: `simpleJSONExtractUInt`.

 ## visitParamExtractInt(params, name) {#visitparamextractintparams-name}

 Аналогично для Int64.

+Алиас: `simpleJSONExtractInt`.
+
 ## visitParamExtractFloat(params, name) {#visitparamextractfloatparams-name}

 Аналогично для Float64.

+Алиас: `simpleJSONExtractFloat`.
+
 ## visitParamExtractBool(params, name) {#visitparamextractboolparams-name}

-Распарсить значение true/false. Результат - UInt8.
+Пытается выделить значение true/false. Результат — UInt8.
+
+Алиас: `simpleJSONExtractBool`.

 ## visitParamExtractRaw(params, name) {#visitparamextractrawparams-name}

-Вернуть значение поля, включая разделители.
+Возвращает значение поля, включая разделители.
+
+Алиас: `simpleJSONExtractRaw`.

 Примеры:

 ``` sql
-visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'
-visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'
+visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
+visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
 ```

 ## visitParamExtractString(params, name) {#visitparamextractstringparams-name}

-Распарсить строку в двойных кавычках. У значения убирается экранирование. Если убрать экранированные символы не удалось, то возвращается пустая строка.
+Разбирает строку в двойных кавычках. У значения убирается экранирование. Если убрать экранированные символы не удалось, то возвращается пустая строка.
+
+Алиас: `simpleJSONExtractString`.

 Примеры:

 ``` sql
-visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'
-visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'
-visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''
-visitParamExtractString('{"abc":"hello}', 'abc') = ''
+visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
+visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
+visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
+visitParamExtractString('{"abc":"hello}', 'abc') = '';
 ```

-На данный момент, не поддерживаются записанные в формате `\uXXXX\uYYYY` кодовые точки не из basic multilingual plane (они переводятся не в UTF-8, а в CESU-8).
+На данный момент не поддерживаются записанные в формате `\uXXXX\uYYYY` кодовые точки не из basic multilingual plane (они переводятся не в UTF-8, а в CESU-8).

-Следующие функции используют [simdjson](https://github.com/lemire/simdjson) который разработан под более сложные требования для разбора JSON. Упомянутое выше предположение 2 по-прежнему применимо.
+Следующие функции используют [simdjson](https://github.com/lemire/simdjson), который разработан под более сложные требования для разбора JSON. Упомянутое выше допущение 2 по-прежнему применимо.

 ## isValidJSON(json) {#isvalidjsonjson}

@ -292,4 +306,3 @@ SELECT JSONExtractKeysAndValuesRaw('{"a": [-100, 200.0], "b":{"c": {"d": "hello"
 │ [('d','"hello"'),('f','"world"')]                                                                     │
 └───────────────────────────────────────────────────────────────────────────────────────────────────────┘
 ```
-
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@ -63,6 +63,9 @@ DROP COLUMN [IF EXISTS] name

 Запрос удаляет данные из файловой системы. Так как это представляет собой удаление целых файлов, запрос выполняется почти мгновенно.

+!!! warning "Предупреждение"
+    Вы не можете удалить столбец, используемый в [материализованном представлениии](../../../sql-reference/statements/create/view.md#materialized). В противном случае будет ошибка.
+
 Пример:

 ``` sql
@ -155,7 +158,7 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
 ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
 ```

-## Смотрите также
+**Смотрите также**

 - [REMOVE TTL](ttl.md).

--- a/docs/ru/sql-reference/statements/create/row-policy.md
+++ b/docs/ru/sql-reference/statements/create/row-policy.md
@ -5,7 +5,7 @@ toc_title: "Политика доступа"

 # CREATE ROW POLICY {#create-row-policy-statement}

-Создает [фильтры для строк](../../../operations/access-rights.md#row-policy-management), которые пользователь может прочесть из таблицы.
+Создает [политики доступа к строкам](../../../operations/access-rights.md#row-policy-management), т.е. фильтры, которые определяют, какие строки пользователь может читать из таблицы.

 Синтаксис:

@ -13,33 +13,74 @@ toc_title: "Политика доступа"
 CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1 
        [, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2 ...] 
    [AS {PERMISSIVE | RESTRICTIVE}]
-    [FOR SELECT]
-    [USING condition]
+    [FOR SELECT] USING condition
    [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```

-Секция `ON CLUSTER` позволяет создавать фильтры для строк на кластере, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
+## Секция USING {#create-row-policy-using}

-## Секция AS {#create-row-policy-as}
-
-С помощью данной секции можно создать политику разрешения или ограничения.
-
-Политика разрешения предоставляет доступ к строкам. Разрешительные политики, которые применяются к одной таблице, объединяются с помощью логического оператора `OR`. Политики являются разрешительными по умолчанию.
-
-Политика ограничения запрещает доступ к строкам. Ограничительные политики, которые применяются к одной таблице, объединяются логическим оператором `AND`.
-
-Ограничительные политики применяются к строкам, прошедшим фильтр разрешительной политики. Если вы не зададите разрешительные политики, пользователь не сможет обращаться ни к каким строкам из таблицы.
+Секция `USING` указывает условие для фильтрации строк. Пользователь может видеть строку, если это условие, вычисленное для строки, дает ненулевой результат.

 ## Секция TO {#create-row-policy-to}

-В секции `TO` вы можете перечислить как роли, так и пользователей. Например, `CREATE ROW POLICY ... TO accountant, john@localhost`.
+В секции `TO` перечисляются пользователи и роли, для которых должна действовать политика. Например, `CREATE ROW POLICY ... TO accountant, john@localhost`.

 Ключевым словом `ALL` обозначаются все пользователи, включая текущего. Ключевые слова `ALL EXCEPT` позволяют исключить пользователей из списка всех пользователей. Например, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost`

+!!! note "Note"
+    Если для таблицы не задано ни одной политики доступа к строкам, то любой пользователь может выполнить команду SELECT и получить все строки таблицы. Если определить хотя бы одну политику для таблицы, до доступ к строкам будет управляться этими политиками, причем для всех пользователей (даже для тех, для кого политики не определялись). Например, следующая политика
+
+    `CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter`
+
+    запретит пользователям `mira` и `peter` видеть строки с `b != 1`, и еще запретит всем остальным пользователям (например, пользователю `paul`) видеть какие-либо строки вообще из таблицы `mydb.table1`.
+    
+    Если это нежелательно, такое поведение можно исправить, определив дополнительную политику:
+
+    `CREATE ROW POLICY pol2 ON mydb.table1 USING 1 TO ALL EXCEPT mira, peter`
+
+## Секция AS {#create-row-policy-as}
+
+Может быть одновременно активно более одной политики для одной и той же таблицы и одного и того же пользователя. Поэтому нам нужен способ комбинировать политики.
+
+По умолчанию политики комбинируются с использованием логического оператора `OR`. Например, политики:
+
+``` sql
+CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter
+CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 TO peter, antonio
+```
+
+разрешат пользователю с именем `peter` видеть строки, для которых будет верно `b=1` или `c=2`.
+
+Секция `AS` указывает, как политики должны комбинироваться с другими политиками. Политики могут быть или разрешительными (`PERMISSIVE`), или ограничительными (`RESTRICTIVE`). По умолчанию политики создаются разрешительными (`PERMISSIVE`); такие политики комбинируются с использованием логического оператора `OR`.
+
+Ограничительные (`RESTRICTIVE`) политики комбинируются с использованием логического оператора `AND`.
+
+Общая формула выглядит так:
+
+```
+строка_видима = (одна или больше permissive-политик дала ненулевой результат проверки условия) И
+                (все restrictive-политики дали ненулевой результат проверки условия)
+```
+
+Например, политики
+
+``` sql
+CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter
+CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 AS RESTRICTIVE TO peter, antonio
+```
+
+разрешат пользователю с именем `peter` видеть только те строки, для которых будет одновременно `b=1` и `c=2`.
+
+## Секция ON CLUSTER {#create-row-policy-on-cluster}
+
+Секция `ON CLUSTER` позволяет создавать политики на кластере, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
+
 ## Примеры

-`CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO accountant, john@localhost`
+`CREATE ROW POLICY filter1 ON mydb.mytable USING a<1000 TO accountant, john@localhost`

-`CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO ALL EXCEPT mira`
+`CREATE ROW POLICY filter2 ON mydb.mytable USING a<1000 AND b=5 TO ALL EXCEPT mira`
+
+`CREATE ROW POLICY filter3 ON mydb.mytable USING 1 TO admin`

 <!--hide-->
--- a/docs/ru/sql-reference/statements/create/table.md
+++ b/docs/ru/sql-reference/statements/create/table.md
@ -46,15 +46,32 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function()
 ### Из запроса SELECT {#from-select-query}

 ``` sql
-CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
+CREATE TABLE [IF NOT EXISTS] [db.]table_name[(name1 [type1], name2 [type2], ...)] ENGINE = engine AS SELECT ...
 ```

-Создаёт таблицу со структурой, как результат запроса `SELECT`, с движком engine, и заполняет её данными из SELECT-а.
+Создаёт таблицу со структурой, как результат запроса `SELECT`, с движком `engine`, и заполняет её данными из `SELECT`. Также вы можете явно задать описание столбцов.

-Во всех случаях, если указано `IF NOT EXISTS`, то запрос не будет возвращать ошибку, если таблица уже существует. В этом случае, запрос будет ничего не делать.
+Если таблица уже существует и указано `IF NOT EXISTS`, то запрос ничего не делает.

 После секции `ENGINE` в запросе могут использоваться и другие секции в зависимости от движка. Подробную документацию по созданию таблиц смотрите в описаниях [движков таблиц](../../../engines/table-engines/index.md#table_engines).

+**Пример**
+
+Запрос:
+
+``` sql
+CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1;
+SELECT x, toTypeName(x) FROM t1;
+```
+
+Результат:
+
+```text
+┌─x─┬─toTypeName(x)─┐
+│ 1 │ String        │
+└───┴───────────────┘
+```
+
 ## Модификатор NULL или NOT NULL {#null-modifiers}

 Модификатор `NULL` или `NOT NULL`, указанный после типа данных в определении столбца, позволяет или не позволяет типу данных быть [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable). 
@ -230,7 +247,7 @@ CREATE TABLE codec_example
 )
 ENGINE = MergeTree()
 ```
-## Временные таблицы {#vremennye-tablitsy}
+## Временные таблицы {#temporary-tables}

 ClickHouse поддерживает временные таблицы со следующими характеристиками:

--- a/docs/ru/sql-reference/statements/grant.md
+++ b/docs/ru/sql-reference/statements/grant.md
@ -93,7 +93,7 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION
            - `ALTER ADD CONSTRAINT`
            - `ALTER DROP CONSTRAINT`
        - `ALTER TTL`
-        - `ALTER MATERIALIZE TTL`
+            - `ALTER MATERIALIZE TTL`
        - `ALTER SETTINGS`
        - `ALTER MOVE PARTITION`
        - `ALTER FETCH PARTITION`
@ -104,9 +104,9 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION
 - [CREATE](#grant-create)
    - `CREATE DATABASE`
    - `CREATE TABLE`
+        - `CREATE TEMPORARY TABLE`
    - `CREATE VIEW`
    - `CREATE DICTIONARY`
-    - `CREATE TEMPORARY TABLE`
 - [DROP](#grant-drop)
    - `DROP DATABASE`
    - `DROP TABLE`
@ -152,7 +152,7 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION
    - `SYSTEM RELOAD`
        - `SYSTEM RELOAD CONFIG`
        - `SYSTEM RELOAD DICTIONARY`
-        - `SYSTEM RELOAD EMBEDDED DICTIONARIES`
+            - `SYSTEM RELOAD EMBEDDED DICTIONARIES`
    - `SYSTEM MERGES`
    - `SYSTEM TTL MERGES`
    - `SYSTEM FETCHES`
@ -279,7 +279,7 @@ GRANT INSERT(x,y) ON db.table TO john
            - `ALTER ADD CONSTRAINT`. Уровень: `TABLE`. Алиасы: `ADD CONSTRAINT`
            - `ALTER DROP CONSTRAINT`. Уровень: `TABLE`. Алиасы: `DROP CONSTRAINT`
        - `ALTER TTL`. Уровень: `TABLE`. Алиасы: `ALTER MODIFY TTL`, `MODIFY TTL`
-        - `ALTER MATERIALIZE TTL`. Уровень: `TABLE`. Алиасы: `MATERIALIZE TTL`
+            - `ALTER MATERIALIZE TTL`. Уровень: `TABLE`. Алиасы: `MATERIALIZE TTL`
        - `ALTER SETTINGS`. Уровень: `TABLE`. Алиасы: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING`
        - `ALTER MOVE PARTITION`. Уровень: `TABLE`. Алиасы: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART`
        - `ALTER FETCH PARTITION`. Уровень: `TABLE`. Алиасы: `FETCH PARTITION`
@ -307,9 +307,9 @@ GRANT INSERT(x,y) ON db.table TO john
 - `CREATE`. Уровень: `GROUP`
    - `CREATE DATABASE`. Уровень: `DATABASE`
    - `CREATE TABLE`. Уровень: `TABLE`
+        - `CREATE TEMPORARY TABLE`. Уровень: `GLOBAL`
    - `CREATE VIEW`. Уровень: `VIEW`
    - `CREATE DICTIONARY`. Уровень: `DICTIONARY`
-    - `CREATE TEMPORARY TABLE`. Уровень: `GLOBAL`

 **Дополнительно**

@ -407,7 +407,7 @@ GRANT INSERT(x,y) ON db.table TO john
    - `SYSTEM RELOAD`. Уровень: `GROUP`
        - `SYSTEM RELOAD CONFIG`. Уровень: `GLOBAL`. Алиасы: `RELOAD CONFIG`
        - `SYSTEM RELOAD DICTIONARY`. Уровень: `GLOBAL`. Алиасы: `SYSTEM RELOAD DICTIONARIES`, `RELOAD DICTIONARY`, `RELOAD DICTIONARIES`
-        - `SYSTEM RELOAD EMBEDDED DICTIONARIES`. Уровень: `GLOBAL`. Алиасы: `RELOAD EMBEDDED DICTIONARIES`
+            - `SYSTEM RELOAD EMBEDDED DICTIONARIES`. Уровень: `GLOBAL`. Алиасы: `RELOAD EMBEDDED DICTIONARIES`
    - `SYSTEM MERGES`. Уровень: `TABLE`. Алиасы: `SYSTEM STOP MERGES`, `SYSTEM START MERGES`, `STOP MERGES`, `START MERGES`
    - `SYSTEM TTL MERGES`. Уровень: `TABLE`. Алиасы: `SYSTEM STOP TTL MERGES`, `SYSTEM START TTL MERGES`, `STOP TTL MERGES`, `START TTL MERGES`
    - `SYSTEM FETCHES`. Уровень: `TABLE`. Алиасы: `SYSTEM STOP FETCHES`, `SYSTEM START FETCHES`, `STOP FETCHES`, `START FETCHES`
--- a/docs/ru/sql-reference/statements/optimize.md
+++ b/docs/ru/sql-reference/statements/optimize.md
@ -5,19 +5,83 @@ toc_title: OPTIMIZE

 # OPTIMIZE {#misc_operations-optimize}

-``` sql
-OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE]
-```
-
-Запрос пытается запустить внеплановый мёрж кусков данных для таблиц семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). Другие движки таблиц не поддерживаются.
-
-Если `OPTIMIZE` применяется к таблицам семейства [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md), ClickHouse создаёт задачу на мёрж и ожидает её исполнения на всех узлах (если активирована настройка `replication_alter_partitions_sync`).
-
-   Если `OPTIMIZE` не выполняет мёрж по любой причине, ClickHouse не оповещает об этом клиента. Чтобы включить оповещения, используйте настройку [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop).
-   Если указать `PARTITION`, то оптимизация выполняется только для указанной партиции. [Как задавать имя партиции в запросах](alter/index.md#alter-how-to-specify-part-expr).
-   Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске. Кроме того, слияние является принудительным, даже если выполняются параллельные слияния.
-   Если указать `DEDUPLICATE`, то произойдет схлопывание полностью одинаковых строк (сравниваются значения во всех колонках), имеет смысл только для движка MergeTree.
+Запрос пытается запустить внеплановое слияние кусков данных для таблиц.

 !!! warning "Внимание"
-    Запрос `OPTIMIZE` не может устранить причину появления ошибки «Too many parts».
+    `OPTIMIZE` не устраняет причину появления ошибки `Too many parts`.

+**Синтаксис**
+
+``` sql
+OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
+```
+
+Может применяться к таблицам семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md), [MaterializedView](../../engines/table-engines/special/materializedview.md) и [Buffer](../../engines/table-engines/special/buffer.md). Другие движки таблиц не поддерживаются.
+
+Если запрос `OPTIMIZE` применяется к таблицам семейства [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md), ClickHouse создаёт задачу на слияние и ожидает её исполнения на всех узлах (если активирована настройка `replication_alter_partitions_sync`).
+
+-   По умолчанию, если запросу `OPTIMIZE` не удалось выполнить слияние, то
+ClickHouse не оповещает клиента. Чтобы включить оповещения, используйте настройку [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop).
+-   Если указать `PARTITION`, то оптимизация выполняется только для указанной партиции. [Как задавать имя партиции в запросах](alter/index.md#alter-how-to-specify-part-expr).
+-   Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске данных. Кроме того, слияние является принудительным, даже если выполняются параллельные слияния.
+-   Если указать `DEDUPLICATE`, то произойдет схлопывание полностью одинаковых строк (сравниваются значения во всех столбцах), имеет смысл только для движка MergeTree.
+
+## Выражение BY {#by-expression}
+
+Чтобы выполнить дедупликацию по произвольному набору столбцов, вы можете явно указать список столбцов или использовать любую комбинацию подстановки [`*`](../../sql-reference/statements/select/index.md#asterisk), выражений [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) и [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier).
+
+ Список столбцов для дедупликации должен включать все столбцы, указанные в условиях сортировки (первичный ключ и ключ сортировки), а также в условиях партиционирования (ключ партиционирования).
+
+ !!! note "Примечание"
+    Обратите внимание, что символ подстановки `*` обрабатывается так же, как и в запросах `SELECT`: столбцы `MATERIALIZED` и `ALIAS` не включаются в результат.
+    Если указать пустой список или выражение, которое возвращает пустой список, или дедуплицировать столбец по псевдониму (`ALIAS`), то сервер вернет ошибку.
+
+
+**Примеры**
+
+Рассмотрим таблицу:
+
+``` sql
+CREATE TABLE example (
+    primary_key Int32,
+    secondary_key Int32,
+    value UInt32,
+    partition_key UInt32,
+    materialized_value UInt32 MATERIALIZED 12345,
+    aliased_value UInt32 ALIAS 2,
+    PRIMARY KEY primary_key
+) ENGINE=MergeTree
+PARTITION BY partition_key;
+```
+
+Прежний способ дедупликации, когда учитываются все столбцы. Строка удаляется только в том случае, если все значения во всех столбцах равны соответствующим значениям в предыдущей строке.
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE;
+```
+
+Дедупликация по всем столбцам, кроме `ALIAS` и `MATERIALIZED`: `primary_key`, `secondary_key`, `value`, `partition_key` и `materialized_value`.
+
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY *;
+```
+
+Дедупликация по всем столбцам, кроме `ALIAS`, `MATERIALIZED` и `materialized_value`: столбцы `primary_key`, `secondary_key`, `value` и `partition_key`.
+
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY * EXCEPT materialized_value;
+```
+
+Дедупликация по столбцам `primary_key`, `secondary_key` и `partition_key`.
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY primary_key, secondary_key, partition_key;
+```
+
+Дедупликация по любому столбцу, соответствующему регулярному выражению: столбцам `primary_key`, `secondary_key` и `partition_key`.
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY COLUMNS('.*_key');
+```
--- a/programs/client/ConnectionParameters.cpp
+++ b/programs/client/ConnectionParameters.cpp
@ -7,6 +7,8 @@
 #include <IO/ConnectionTimeouts.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/Exception.h>
+#include <Common/isLocalAddress.h>
+#include <Common/DNSResolver.h>
 #include <common/setTerminalEcho.h>
 #include <ext/scope_guard.h>

@ -60,7 +62,9 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
 #endif
    }

-    compression = config.getBool("compression", true) ? Protocol::Compression::Enable : Protocol::Compression::Disable;
+    /// By default compression is disabled if address looks like localhost.
+    compression = config.getBool("compression", !isLocalAddress(DNSResolver::instance().resolveHost(host)))
+        ? Protocol::Compression::Enable : Protocol::Compression::Disable;

    timeouts = ConnectionTimeouts(
        Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0),
--- a/programs/odbc-bridge/MainHandler.cpp
+++ b/programs/odbc-bridge/MainHandler.cpp
@ -54,9 +54,10 @@ void ODBCHandler::processError(HTTPServerResponse & response, const std::string
 void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
    HTMLForm params(request);
+    LOG_TRACE(log, "Request URI: {}", request.getURI());
+
    if (mode == "read")
        params.read(request.getStream());
-    LOG_TRACE(log, "Request URI: {}", request.getURI());

    if (mode == "read" && !params.has("query"))
    {
@ -64,11 +65,6 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
        return;
    }

-    if (!params.has("columns"))
-    {
-        processError(response, "No 'columns' in request URL");
-        return;
-    }

    if (!params.has("connection_string"))
    {
@ -76,6 +72,16 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
        return;
    }

+    if (!params.has("sample_block"))
+    {
+        processError(response, "No 'sample_block' in request URL");
+        return;
+    }
+
+    std::string format = params.get("format", "RowBinary");
+    std::string connection_string = params.get("connection_string");
+    LOG_TRACE(log, "Connection string: '{}'", connection_string);
+
    UInt64 max_block_size = DEFAULT_BLOCK_SIZE;
    if (params.has("max_block_size"))
    {
@ -88,24 +94,19 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
        max_block_size = parse<size_t>(max_block_size_str);
    }

-    std::string columns = params.get("columns");
+    std::string sample_block_string = params.get("sample_block");
    std::unique_ptr<Block> sample_block;
    try
    {
-        sample_block = parseColumns(std::move(columns));
+        sample_block = parseColumns(std::move(sample_block_string));
    }
    catch (const Exception & ex)
    {
-        processError(response, "Invalid 'columns' parameter in request body '" + ex.message() + "'");
-        LOG_WARNING(log, ex.getStackTraceString());
+        processError(response, "Invalid 'sample_block' parameter in request body '" + ex.message() + "'");
+        LOG_ERROR(log, ex.getStackTraceString());
        return;
    }

-    std::string format = params.get("format", "RowBinary");
-
-    std::string connection_string = params.get("connection_string");
-    LOG_TRACE(log, "Connection string: '{}'", connection_string);
-
    WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);

    try
--- a/programs/odbc-bridge/getIdentifierQuote.cpp
+++ b/programs/odbc-bridge/getIdentifierQuote.cpp
@ -19,7 +19,18 @@ namespace ErrorCodes

 std::string getIdentifierQuote(nanodbc::connection & connection)
 {
-    return connection.get_info<std::string>(SQL_IDENTIFIER_QUOTE_CHAR);
+    std::string quote;
+    try
+    {
+        quote = connection.get_info<std::string>(SQL_IDENTIFIER_QUOTE_CHAR);
+    }
+    catch (...)
+    {
+        LOG_WARNING(&Poco::Logger::get("ODBCGetIdentifierQuote"), "Cannot fetch identifier quote. Default double quote is used. Reason: {}", getCurrentExceptionMessage(false));
+        return "\"";
+    }
+
+    return quote;
 }


--- a/programs/server/CMakeLists.txt
+++ b/programs/server/CMakeLists.txt
@ -19,6 +19,7 @@ set (CLICKHOUSE_SERVER_LINK
        clickhouse_storages_system
        clickhouse_table_functions
        string_utils
+        jemalloc

    ${LINK_RESOURCE_LIB}

--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -101,6 +101,10 @@
 #   include <Server/KeeperTCPHandlerFactory.h>
 #endif

+#if USE_JEMALLOC
+#    include <jemalloc/jemalloc.h>
+#endif
+
 namespace CurrentMetrics
 {
    extern const Metric Revision;
@ -109,11 +113,35 @@ namespace CurrentMetrics
    extern const Metric MaxDDLEntryID;
 }

+#if USE_JEMALLOC
+static bool jemallocOptionEnabled(const char *name)
+{
+    bool value;
+    size_t size = sizeof(value);
+
+    if (mallctl(name, reinterpret_cast<void *>(&value), &size, /* newp= */ nullptr, /* newlen= */ 0))
+        throw Poco::SystemException("mallctl() failed");
+
+    return value;
+}
+#else
+static bool jemallocOptionEnabled(const char *) { return 0; }
+#endif
+

 int mainEntryClickHouseServer(int argc, char ** argv)
 {
    DB::Server app;

+    if (jemallocOptionEnabled("opt.background_thread"))
+    {
+        LOG_ERROR(&app.logger(),
+            "jemalloc.background_thread was requested, "
+            "however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, "
+            "and also background_thread is not compatible with ClickHouse watchdog "
+            "(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)");
+    }
+
    /// Do not fork separate process from watchdog if we attached to terminal.
    /// Otherwise it breaks gdb usage.
    /// Can be overridden by environment variable (cannot use server config at this moment).
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -89,7 +89,7 @@
    <!-- Compatibility with PostgreSQL protocol.
         ClickHouse will pretend to be PostgreSQL for applications connecting to this port.
    -->
-    <!-- <postgresql_port>9005</postgresql_port> -->
+    <postgresql_port>9005</postgresql_port>

    <!-- HTTP API with TLS (HTTPS).
         You have to configure certificate to enable this interface.
--- a/src/Access/AccessType.h
+++ b/src/Access/AccessType.h
@ -130,6 +130,7 @@ enum class AccessType
    M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \
    M(SYSTEM_RELOAD_SYMBOLS, "RELOAD SYMBOLS", GLOBAL, SYSTEM_RELOAD) \
    M(SYSTEM_RELOAD_DICTIONARY, "SYSTEM RELOAD DICTIONARIES, RELOAD DICTIONARY, RELOAD DICTIONARIES", GLOBAL, SYSTEM_RELOAD) \
+    M(SYSTEM_RELOAD_MODEL, "SYSTEM RELOAD MODELS, RELOAD MODEL, RELOAD MODELS", GLOBAL, SYSTEM_RELOAD) \
    M(SYSTEM_RELOAD_EMBEDDED_DICTIONARIES, "RELOAD EMBEDDED DICTIONARIES", GLOBAL, SYSTEM_RELOAD) /* implicitly enabled by the grant SYSTEM_RELOAD_DICTIONARY ON *.* */\
    M(SYSTEM_RELOAD, "", GROUP, SYSTEM) \
    M(SYSTEM_MERGES, "SYSTEM STOP MERGES, SYSTEM START MERGES, STOP_MERGES, START MERGES", TABLE, SYSTEM) \
--- a/src/AggregateFunctions/AggregateFunctionAvg.h
+++ b/src/AggregateFunctions/AggregateFunctionAvg.h
@ -96,7 +96,7 @@ public:
        UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0)
        : Base(argument_types_, {}), num_scale(num_scale_), denom_scale(denom_scale_) {}

-    DataTypePtr getReturnType() const final { return std::make_shared<DataTypeNumber<Float64>>(); }
+    DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<Float64>>(); }

    bool allocatesMemoryInArena() const override { return false; }

--- a/src/AggregateFunctions/AggregateFunctionSumCount.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumCount.cpp
@ -0,0 +1,49 @@
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/AggregateFunctionSumCount.h>
+#include <AggregateFunctions/Helpers.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+#include "registerAggregateFunctions.h"
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+bool allowType(const DataTypePtr& type) noexcept
+{
+    const WhichDataType t(type);
+    return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
+}
+
+AggregateFunctionPtr createAggregateFunctionSumCount(const std::string & name, const DataTypes & argument_types, const Array & parameters)
+{
+    assertNoParameters(name, parameters);
+    assertUnary(name, argument_types);
+
+    AggregateFunctionPtr res;
+    DataTypePtr data_type = argument_types[0];
+    if (!allowType(data_type))
+        throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    if (isDecimal(data_type))
+        res.reset(createWithDecimalType<AggregateFunctionSumCount>(
+            *data_type, argument_types, getDecimalScale(*data_type)));
+    else
+        res.reset(createWithNumericType<AggregateFunctionSumCount>(*data_type, argument_types));
+
+    return res;
+}
+
+}
+
+void registerAggregateFunctionSumCount(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("sumCount", createAggregateFunctionSumCount);
+}
+
+}
--- a/src/AggregateFunctions/AggregateFunctionSumCount.h
+++ b/src/AggregateFunctions/AggregateFunctionSumCount.h
@ -0,0 +1,55 @@
+#pragma once
+
+#include <type_traits>
+#include <DataTypes/DataTypeTuple.h>
+#include <AggregateFunctions/AggregateFunctionAvg.h>
+
+
+namespace DB
+{
+template <typename T>
+using DecimalOrNumberDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<AvgFieldType<T>>, DataTypeNumber<AvgFieldType<T>>>;
+template <typename T>
+class AggregateFunctionSumCount final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionSumCount<T>>
+{
+public:
+    using Base = AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionSumCount<T>>;
+
+    AggregateFunctionSumCount(const DataTypes & argument_types_, UInt32 num_scale_ = 0)
+         : Base(argument_types_, num_scale_), scale(num_scale_) {}
+
+    DataTypePtr getReturnType() const override
+    {
+        DataTypes types;
+        if constexpr (IsDecimalNumber<T>)
+            types.emplace_back(std::make_shared<DecimalOrNumberDataType<T>>(DecimalOrNumberDataType<T>::maxPrecision(), scale));
+        else
+            types.emplace_back(std::make_shared<DecimalOrNumberDataType<T>>());
+
+        types.emplace_back(std::make_shared<DataTypeUInt64>());
+
+        return std::make_shared<DataTypeTuple>(types);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const final
+    {
+        assert_cast<DecimalOrVectorCol<AvgFieldType<T>> &>((assert_cast<ColumnTuple &>(to)).getColumn(0)).getData().push_back(
+            this->data(place).numerator);
+
+        assert_cast<ColumnUInt64 &>((assert_cast<ColumnTuple &>(to)).getColumn(1)).getData().push_back(
+            this->data(place).denominator);
+    }
+
+    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final
+    {
+        this->data(place).numerator += static_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
+        ++this->data(place).denominator;
+    }
+
+    String getName() const final { return "sumCount"; }
+
+private:
+    UInt32 scale;
+};
+
+}
--- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
+++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
@ -17,7 +17,7 @@
 #include <IO/WriteHelpers.h>


-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
 #endif
@ -280,7 +280,7 @@ public:

 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/AggregateFunctions/ReservoirSamplerDeterministic.h
+++ b/src/AggregateFunctions/ReservoirSamplerDeterministic.h
@ -163,7 +163,7 @@ public:
        sorted = false;
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wclass-memaccess"
 #endif
@ -191,7 +191,7 @@ public:
        }
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@ -25,6 +25,7 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory &);
 void registerAggregateFunctionsStatisticsStable(AggregateFunctionFactory &);
 void registerAggregateFunctionsStatisticsSimple(AggregateFunctionFactory &);
 void registerAggregateFunctionSum(AggregateFunctionFactory &);
+void registerAggregateFunctionSumCount(AggregateFunctionFactory &);
 void registerAggregateFunctionSumMap(AggregateFunctionFactory &);
 void registerAggregateFunctionsUniq(AggregateFunctionFactory &);
 void registerAggregateFunctionUniqCombined(AggregateFunctionFactory &);
@ -83,6 +84,7 @@ void registerAggregateFunctions()
        registerAggregateFunctionsStatisticsStable(factory);
        registerAggregateFunctionsStatisticsSimple(factory);
        registerAggregateFunctionSum(factory);
+        registerAggregateFunctionSumCount(factory);
        registerAggregateFunctionSumMap(factory);
        registerAggregateFunctionsUniq(factory);
        registerAggregateFunctionUniqCombined(factory);
--- a/src/AggregateFunctions/ya.make
+++ b/src/AggregateFunctions/ya.make
@ -50,6 +50,7 @@ SRCS(
    AggregateFunctionStatisticsSimple.cpp
    AggregateFunctionStudentTTest.cpp
    AggregateFunctionSum.cpp
+    AggregateFunctionSumCount.cpp
    AggregateFunctionSumMap.cpp
    AggregateFunctionTopK.cpp
    AggregateFunctionUniq.cpp
--- a/src/Bridge/XDBCBridgeHelper.h
+++ b/src/Bridge/XDBCBridgeHelper.h
@ -37,7 +37,7 @@ class IXDBCBridgeHelper : public IBridgeHelper
 public:
    explicit IXDBCBridgeHelper(ContextPtr context_) : IBridgeHelper(context_) {}

-    virtual std::vector<std::pair<std::string, std::string>> getURLParams(const std::string & cols, UInt64 max_block_size) const = 0;
+    virtual std::vector<std::pair<std::string, std::string>> getURLParams(UInt64 max_block_size) const = 0;

    virtual Poco::URI getColumnsInfoURI() const = 0;

@ -138,12 +138,11 @@ protected:
        return uri;
    }

-    URLParams getURLParams(const std::string & cols, UInt64 max_block_size) const override
+    URLParams getURLParams(UInt64 max_block_size) const override
    {
        std::vector<std::pair<std::string, std::string>> result;

        result.emplace_back("connection_string", connection_string); /// already validated
-        result.emplace_back("columns", cols);
        result.emplace_back("max_block_size", std::to_string(max_block_size));

        return result;
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@ -139,6 +139,8 @@ public:
    UInt16 getPort() const;
    const String & getDefaultDatabase() const;

+    Protocol::Compression getCompression() const { return compression; }
+
    /// If last flag is true, you need to call sendExternalTablesData after.
    void sendQuery(
        const ConnectionTimeouts & timeouts,
--- a/src/Columns/ColumnLowCardinality.cpp
+++ b/src/Columns/ColumnLowCardinality.cpp
@ -122,7 +122,7 @@ namespace
        else if (auto * data_uint64 = getIndexesData<UInt64>(column))
            return mapUniqueIndexImpl(*data_uint64);
        else
-            throw Exception("Indexes column for getUniqueIndex must be ColumnUInt, got" + column.getName(),
+            throw Exception("Indexes column for getUniqueIndex must be ColumnUInt, got " + column.getName(),
                            ErrorCodes::LOGICAL_ERROR);
    }
 }
@ -151,7 +151,7 @@ void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n)
    const auto * low_cardinality_src = typeid_cast<const ColumnLowCardinality *>(&src);

    if (!low_cardinality_src)
-        throw Exception("Expected ColumnLowCardinality, got" + src.getName(), ErrorCodes::ILLEGAL_COLUMN);
+        throw Exception("Expected ColumnLowCardinality, got " + src.getName(), ErrorCodes::ILLEGAL_COLUMN);

    size_t position = low_cardinality_src->getIndexes().getUInt(n);

--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@ -111,7 +111,7 @@ public:
    }

 /// Suppress gcc 7.3.1 warning: '*((void*)&<anonymous> +8)' may be used uninitialized in this function
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -128,7 +128,7 @@ public:
        offsets.push_back(new_size);
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Columns/ColumnsCommon.h
+++ b/src/Columns/ColumnsCommon.h
@ -66,7 +66,7 @@ ColumnPtr selectIndexImpl(const Column & column, const IColumn & indexes, size_t
    else if (auto * data_uint64 = detail::getIndexesData<UInt64>(indexes))
        return column.template indexImpl<UInt64>(*data_uint64, limit);
    else
-        throw Exception("Indexes column for IColumn::select must be ColumnUInt, got" + indexes.getName(),
+        throw Exception("Indexes column for IColumn::select must be ColumnUInt, got " + indexes.getName(),
                        ErrorCodes::LOGICAL_ERROR);
 }

--- a/src/Columns/tests/gtest_weak_hash_32.cpp
+++ b/src/Columns/tests/gtest_weak_hash_32.cpp
@ -88,7 +88,6 @@ void checkColumn(
                if (num_collisions <= max_collisions_to_print)
                {
                    collisions_str << "Collision:\n";
-                    collisions_str << print_for_row(it->second) << '\n';
                    collisions_str << print_for_row(i) << std::endl;
                }

--- a/src/Common/Allocator.h
+++ b/src/Common/Allocator.h
@ -277,7 +277,7 @@ private:
  *  GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
  * In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
  */
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wfree-nonheap-object"
 #endif
@ -359,6 +359,6 @@ extern template class Allocator<true, false>;
 extern template class Allocator<false, true>;
 extern template class Allocator<true, true>;

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -146,6 +146,9 @@
    M(StorageBufferPassedTimeMaxThreshold, "") \
    M(StorageBufferPassedRowsMaxThreshold, "") \
    M(StorageBufferPassedBytesMaxThreshold, "") \
+    M(StorageBufferPassedTimeFlushThreshold, "") \
+    M(StorageBufferPassedRowsFlushThreshold, "") \
+    M(StorageBufferPassedBytesFlushThreshold, "") \
    M(StorageBufferLayerLockReadersWaitMilliseconds, "Time for waiting for Buffer layer during reading") \
    M(StorageBufferLayerLockWritersWaitMilliseconds, "Time for waiting free Buffer layer to write to (can be used to tune Buffer layers)") \
    \
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@ -184,6 +184,10 @@ static void * getCallerAddress(const ucontext_t & context)
 #    else
    return reinterpret_cast<void *>(context.uc_mcontext.gregs[REG_RIP]);
 #    endif
+
+#elif defined(__APPLE__) && defined(__aarch64__)
+    return reinterpret_cast<void *>(context.uc_mcontext->__ss.__pc);
+
 #elif defined(__aarch64__)
    return reinterpret_cast<void *>(context.uc_mcontext.pc);
 #elif defined(__powerpc64__)
--- a/src/Common/UInt128.h
+++ b/src/Common/UInt128.h
@ -19,7 +19,7 @@ namespace DB
 struct UInt128
 {
 /// Suppress gcc7 warnings: 'prev_key.DB::UInt128::low' may be used uninitialized in this function
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -92,7 +92,7 @@ struct UInt128
            return static_cast<T>(low);
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

@ -150,7 +150,7 @@ struct DummyUInt256
 {

 /// Suppress gcc7 warnings: 'prev_key.DB::UInt256::a' may be used uninitialized in this function
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -179,7 +179,7 @@ struct DummyUInt256
    bool operator== (const UInt64 rhs) const { return a == rhs && b == 0 && c == 0 && d == 0; }
    bool operator!= (const UInt64 rhs) const { return !operator==(rhs); }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Common/ZooKeeper/IKeeper.h
+++ b/src/Common/ZooKeeper/IKeeper.h
@ -116,6 +116,7 @@ struct Request
    virtual ~Request() = default;
    virtual String getPath() const = 0;
    virtual void addRootPath(const String & /* root_path */) {}
+    virtual size_t bytesSize() const { return 0; }
 };

 struct Response;
@ -131,6 +132,7 @@ struct Response
    Response & operator=(const Response &) = default;
    virtual ~Response() = default;
    virtual void removeRootPath(const String & /* root_path */) {}
+    virtual size_t bytesSize() const { return 0; }
 };

 struct WatchResponse : virtual Response
@ -140,6 +142,8 @@ struct WatchResponse : virtual Response
    String path;

    void removeRootPath(const String & root_path) override;
+
+    size_t bytesSize() const override { return path.size() + sizeof(type) + sizeof(state); }
 };

 using WatchCallback = std::function<void(const WatchResponse &)>;
@ -154,6 +158,9 @@ struct CreateRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size() + data.size()
+            + sizeof(is_ephemeral) + sizeof(is_sequential) + acls.size() * sizeof(ACL); }
 };

 struct CreateResponse : virtual Response
@ -161,6 +168,8 @@ struct CreateResponse : virtual Response
    String path_created;

    void removeRootPath(const String & root_path) override;
+
+    size_t bytesSize() const override { return path_created.size(); }
 };

 struct RemoveRequest : virtual Request
@ -170,6 +179,8 @@ struct RemoveRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size() + sizeof(version); }
 };

 struct RemoveResponse : virtual Response
@ -182,11 +193,15 @@ struct ExistsRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size(); }
 };

 struct ExistsResponse : virtual Response
 {
    Stat stat;
+
+    size_t bytesSize() const override { return sizeof(Stat); }
 };

 struct GetRequest : virtual Request
@ -195,12 +210,16 @@ struct GetRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size(); }
 };

 struct GetResponse : virtual Response
 {
    String data;
    Stat stat;
+
+    size_t bytesSize() const override { return data.size() + sizeof(stat); }
 };

 struct SetRequest : virtual Request
@ -211,11 +230,15 @@ struct SetRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return data.size() + data.size() + sizeof(version); }
 };

 struct SetResponse : virtual Response
 {
    Stat stat;
+
+    size_t bytesSize() const override { return sizeof(stat); }
 };

 struct ListRequest : virtual Request
@ -224,12 +247,22 @@ struct ListRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size(); }
 };

 struct ListResponse : virtual Response
 {
    std::vector<String> names;
    Stat stat;
+
+    size_t bytesSize() const override
+    {
+        size_t size = sizeof(stat);
+        for (const auto & name : names)
+            size += name.size();
+        return size;
+    }
 };

 struct CheckRequest : virtual Request
@ -239,6 +272,8 @@ struct CheckRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return path; }
+
+    size_t bytesSize() const override { return path.size() + sizeof(version); }
 };

 struct CheckResponse : virtual Response
@ -251,6 +286,14 @@ struct MultiRequest : virtual Request

    void addRootPath(const String & root_path) override;
    String getPath() const override { return {}; }
+
+    size_t bytesSize() const override
+    {
+        size_t size = 0;
+        for (const auto & request : requests)
+            size += request->bytesSize();
+        return size;
+    }
 };

 struct MultiResponse : virtual Response
@ -258,6 +301,14 @@ struct MultiResponse : virtual Response
    Responses responses;

    void removeRootPath(const String & root_path) override;
+
+    size_t bytesSize() const override
+    {
+        size_t size = 0;
+        for (const auto & response : responses)
+            size += response->bytesSize();
+        return size;
+    }
 };

 /// This response may be received only as an element of responses in MultiResponse.
--- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
@ -455,6 +455,39 @@ ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const { return std::m
 ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const { return std::make_shared<ZooKeeperMultiResponse>(requests); }
 ZooKeeperResponsePtr ZooKeeperCloseRequest::makeResponse() const { return std::make_shared<ZooKeeperCloseResponse>(); }

+void ZooKeeperSessionIDRequest::writeImpl(WriteBuffer & out) const
+{
+    Coordination::write(internal_id, out);
+    Coordination::write(session_timeout_ms, out);
+    Coordination::write(server_id, out);
+}
+
+void ZooKeeperSessionIDRequest::readImpl(ReadBuffer & in)
+{
+    Coordination::read(internal_id, in);
+    Coordination::read(session_timeout_ms, in);
+    Coordination::read(server_id, in);
+}
+
+Coordination::ZooKeeperResponsePtr ZooKeeperSessionIDRequest::makeResponse() const
+{
+    return std::make_shared<ZooKeeperSessionIDResponse>();
+}
+
+void ZooKeeperSessionIDResponse::readImpl(ReadBuffer & in)
+{
+    Coordination::read(internal_id, in);
+    Coordination::read(session_id, in);
+    Coordination::read(server_id, in);
+}
+
+void ZooKeeperSessionIDResponse::writeImpl(WriteBuffer & out) const
+{
+    Coordination::write(internal_id, out);
+    Coordination::write(session_id, out);
+    Coordination::write(server_id, out);
+}
+
 void ZooKeeperRequestFactory::registerRequest(OpNum op_num, Creator creator)
 {
    if (!op_num_to_request.try_emplace(op_num, creator).second)
@ -511,6 +544,7 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory()
    registerZooKeeperRequest<OpNum::List, ZooKeeperListRequest>(*this);
    registerZooKeeperRequest<OpNum::Check, ZooKeeperCheckRequest>(*this);
    registerZooKeeperRequest<OpNum::Multi, ZooKeeperMultiRequest>(*this);
+    registerZooKeeperRequest<OpNum::SessionID, ZooKeeperSessionIDRequest>(*this);
 }

 }
--- a/src/Common/ZooKeeper/ZooKeeperCommon.h
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.h
@ -84,6 +84,8 @@ struct ZooKeeperSyncRequest final : ZooKeeperRequest
    void readImpl(ReadBuffer & in) override;
    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return false; }
+
+    size_t bytesSize() const override { return ZooKeeperRequest::bytesSize() + path.size(); }
 };

 struct ZooKeeperSyncResponse final : ZooKeeperResponse
@ -92,6 +94,8 @@ struct ZooKeeperSyncResponse final : ZooKeeperResponse
    void readImpl(ReadBuffer & in) override;
    void writeImpl(WriteBuffer & out) const override;
    OpNum getOpNum() const override { return OpNum::Sync; }
+
+    size_t bytesSize() const override { return path.size(); }
 };

 struct ZooKeeperHeartbeatResponse final : ZooKeeperResponse
@ -128,6 +132,9 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return false; }
+
+    size_t bytesSize() const override { return ZooKeeperRequest::bytesSize() + sizeof(xid) +
+            sizeof(type) + scheme.size() + data.size(); }
 };

 struct ZooKeeperAuthResponse final : ZooKeeperResponse
@ -136,6 +143,8 @@ struct ZooKeeperAuthResponse final : ZooKeeperResponse
    void writeImpl(WriteBuffer &) const override {}

    OpNum getOpNum() const override { return OpNum::Auth; }
+
+    size_t bytesSize() const override { return ZooKeeperResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperCloseRequest final : ZooKeeperRequest
@ -172,6 +181,8 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return false; }
+
+    size_t bytesSize() const override { return CreateRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse
@ -181,6 +192,8 @@ struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse
    void writeImpl(WriteBuffer & out) const override;

    OpNum getOpNum() const override { return OpNum::Create; }
+
+    size_t bytesSize() const override { return CreateResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest
@ -194,6 +207,8 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return false; }
+
+    size_t bytesSize() const override { return RemoveRequest::bytesSize() + sizeof(xid); }
 };

 struct ZooKeeperRemoveResponse final : RemoveResponse, ZooKeeperResponse
@ -201,6 +216,8 @@ struct ZooKeeperRemoveResponse final : RemoveResponse, ZooKeeperResponse
    void readImpl(ReadBuffer &) override {}
    void writeImpl(WriteBuffer &) const override {}
    OpNum getOpNum() const override { return OpNum::Remove; }
+
+    size_t bytesSize() const override { return RemoveResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest
@ -211,6 +228,8 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return !has_watch; }
+
+    size_t bytesSize() const override { return ExistsRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperExistsResponse final : ExistsResponse, ZooKeeperResponse
@ -218,6 +237,8 @@ struct ZooKeeperExistsResponse final : ExistsResponse, ZooKeeperResponse
    void readImpl(ReadBuffer & in) override;
    void writeImpl(WriteBuffer & out) const override;
    OpNum getOpNum() const override { return OpNum::Exists; }
+
+    size_t bytesSize() const override { return ExistsResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest
@ -228,6 +249,8 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return !has_watch; }
+
+    size_t bytesSize() const override { return GetRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperGetResponse final : GetResponse, ZooKeeperResponse
@ -235,6 +258,8 @@ struct ZooKeeperGetResponse final : GetResponse, ZooKeeperResponse
    void readImpl(ReadBuffer & in) override;
    void writeImpl(WriteBuffer & out) const override;
    OpNum getOpNum() const override { return OpNum::Get; }
+
+    size_t bytesSize() const override { return GetResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest
@ -247,6 +272,8 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest
    void readImpl(ReadBuffer & in) override;
    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return false; }
+
+    size_t bytesSize() const override { return SetRequest::bytesSize() + sizeof(xid); }
 };

 struct ZooKeeperSetResponse final : SetResponse, ZooKeeperResponse
@ -254,6 +281,8 @@ struct ZooKeeperSetResponse final : SetResponse, ZooKeeperResponse
    void readImpl(ReadBuffer & in) override;
    void writeImpl(WriteBuffer & out) const override;
    OpNum getOpNum() const override { return OpNum::Set; }
+
+    size_t bytesSize() const override { return SetResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest
@ -263,6 +292,8 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest
    void readImpl(ReadBuffer & in) override;
    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return !has_watch; }
+
+    size_t bytesSize() const override { return ListRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperSimpleListRequest final : ZooKeeperListRequest
@ -275,6 +306,8 @@ struct ZooKeeperListResponse : ListResponse, ZooKeeperResponse
    void readImpl(ReadBuffer & in) override;
    void writeImpl(WriteBuffer & out) const override;
    OpNum getOpNum() const override { return OpNum::List; }
+
+    size_t bytesSize() const override { return ListResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperSimpleListResponse final : ZooKeeperListResponse
@ -293,6 +326,8 @@ struct ZooKeeperCheckRequest final : CheckRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override { return !has_watch; }
+
+    size_t bytesSize() const override { return CheckRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperCheckResponse final : CheckResponse, ZooKeeperResponse
@ -300,6 +335,8 @@ struct ZooKeeperCheckResponse final : CheckResponse, ZooKeeperResponse
    void readImpl(ReadBuffer &) override {}
    void writeImpl(WriteBuffer &) const override {}
    OpNum getOpNum() const override { return OpNum::Check; }
+
+    size_t bytesSize() const override { return CheckResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 /// This response may be received only as an element of responses in MultiResponse.
@ -309,6 +346,8 @@ struct ZooKeeperErrorResponse final : ErrorResponse, ZooKeeperResponse
    void writeImpl(WriteBuffer & out) const override;

    OpNum getOpNum() const override { return OpNum::Error; }
+
+    size_t bytesSize() const override { return ErrorResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };

 struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest
@ -323,6 +362,8 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest

    ZooKeeperResponsePtr makeResponse() const override;
    bool isReadRequest() const override;
+
+    size_t bytesSize() const override { return MultiRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
 };

 struct ZooKeeperMultiResponse final : MultiResponse, ZooKeeperResponse
@ -346,6 +387,41 @@ struct ZooKeeperMultiResponse final : MultiResponse, ZooKeeperResponse

    void writeImpl(WriteBuffer & out) const override;

+    size_t bytesSize() const override { return MultiResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
+};
+
+/// Fake internal coordination (keeper) response. Never received from client
+/// and never send to client.
+struct ZooKeeperSessionIDRequest final : ZooKeeperRequest
+{
+    int64_t internal_id;
+    int64_t session_timeout_ms;
+    /// Who requested this session
+    int32_t server_id;
+
+    Coordination::OpNum getOpNum() const override { return OpNum::SessionID; }
+    String getPath() const override { return {}; }
+    void writeImpl(WriteBuffer & out) const override;
+    void readImpl(ReadBuffer & in) override;
+
+    Coordination::ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
+};
+
+/// Fake internal coordination (keeper) response. Never received from client
+/// and never send to client.
+struct ZooKeeperSessionIDResponse final : ZooKeeperResponse
+{
+    int64_t internal_id;
+    int64_t session_id;
+    /// Who requested this session
+    int32_t server_id;
+
+    void readImpl(ReadBuffer & in) override;
+
+    void writeImpl(WriteBuffer & out) const override;
+
+    Coordination::OpNum getOpNum() const override { return OpNum::SessionID; }
 };

 class ZooKeeperRequestFactory final : private boost::noncopyable
--- a/src/Common/ZooKeeper/ZooKeeperConstants.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperConstants.cpp
@ -21,6 +21,7 @@ static const std::unordered_set<int32_t> VALID_OPERATIONS =
    static_cast<int32_t>(OpNum::Check),
    static_cast<int32_t>(OpNum::Multi),
    static_cast<int32_t>(OpNum::Auth),
+    static_cast<int32_t>(OpNum::SessionID),
 };

 std::string toString(OpNum op_num)
@ -55,6 +56,8 @@ std::string toString(OpNum op_num)
            return "Heartbeat";
        case OpNum::Auth:
            return "Auth";
+        case OpNum::SessionID:
+            return "SessionID";
    }
    int32_t raw_op = static_cast<int32_t>(op_num);
    throw Exception("Operation " + std::to_string(raw_op) + " is unknown", Error::ZUNIMPLEMENTED);
--- a/src/Common/ZooKeeper/ZooKeeperConstants.h
+++ b/src/Common/ZooKeeper/ZooKeeperConstants.h
@ -30,6 +30,7 @@ enum class OpNum : int32_t
    Check = 13,
    Multi = 14,
    Auth = 100,
+    SessionID = 997, /// Special internal request
 };

 std::string toString(OpNum op_num);
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@ -1012,6 +1012,16 @@ void ZooKeeper::pushRequest(RequestInfo && info)
    ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
 }

+void ZooKeeper::executeGenericRequest(
+    const ZooKeeperRequestPtr & request,
+    ResponseCallback callback)
+{
+    RequestInfo request_info;
+    request_info.request = request;
+    request_info.callback = callback;
+
+    pushRequest(std::move(request_info));
+}

 void ZooKeeper::create(
    const String & path,
--- a/src/Common/ZooKeeper/ZooKeeperImpl.h
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.h
@ -121,6 +121,9 @@ public:
    /// Useful to check owner of ephemeral node.
    int64_t getSessionID() const override { return session_id; }

+    void executeGenericRequest(
+        const ZooKeeperRequestPtr & request,
+        ResponseCallback callback);

    /// See the documentation about semantics of these methods in IKeeper class.

--- a/src/Common/tests/compact_array.cpp
+++ b/src/Common/tests/compact_array.cpp
@ -1,5 +1,5 @@
 /// Bug in GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
 #endif
@ -263,6 +263,6 @@ int main()
    return 0;
 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
--- a/src/Common/tests/parallel_aggregation.cpp
+++ b/src/Common/tests/parallel_aggregation.cpp
@ -69,7 +69,7 @@ static void aggregate1(Map & map, Source::const_iterator begin, Source::const_it
        ++map[*it];
 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -122,7 +122,7 @@ static void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source:
    }
 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Common/tests/parallel_aggregation2.cpp
+++ b/src/Common/tests/parallel_aggregation2.cpp
@ -62,7 +62,7 @@ struct AggregateIndependent
    }
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -115,7 +115,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
    }
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

@ -265,7 +265,7 @@ struct Creator
    void operator()(Value &) const {}
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -275,7 +275,7 @@ struct Updater
    void operator()(Value & x) const { ++x; }
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@ -80,7 +80,7 @@ public:
    {}


-    off_t appendRecord(ChangelogRecord && record, bool sync)
+    off_t appendRecord(ChangelogRecord && record)
    {
        off_t result = plain_buf.count();
        writeIntBinary(computeRecordChecksum(record), plain_buf);
@ -96,23 +96,21 @@ public:

        entries_written++;

-        if (sync)
-            plain_buf.sync();
-        else
-            plain_buf.next();
        return result;
    }

    void truncateToLength(off_t new_length)
    {
-        flush();
+        plain_buf.next();
        plain_buf.truncate(new_length);
        plain_buf.seek(new_length, SEEK_SET);
    }

-    void flush()
+    void flush(bool force_fsync)
    {
-        plain_buf.sync();
+        plain_buf.next();
+        if (force_fsync)
+            plain_buf.sync();
    }

    uint64_t getEntriesWritten() const
@ -247,9 +245,14 @@ private:
    ReadBufferFromFile read_buf;
 };

-Changelog::Changelog(const std::string & changelogs_dir_, uint64_t rotate_interval_, Poco::Logger * log_)
+Changelog::Changelog(
+    const std::string & changelogs_dir_,
+    uint64_t rotate_interval_,
+    bool force_sync_,
+    Poco::Logger * log_)
    : changelogs_dir(changelogs_dir_)
    , rotate_interval(rotate_interval_)
+    , force_sync(force_sync_)
    , log(log_)
 {
    namespace fs = std::filesystem;
@ -357,6 +360,9 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin

 void Changelog::rotate(uint64_t new_start_log_index)
 {
+    /// Flush previous log
+    flush();
+
    ChangelogFileDescription new_description;
    new_description.prefix = DEFAULT_PREFIX;
    new_description.from_log_index = new_start_log_index;
@ -387,7 +393,7 @@ ChangelogRecord Changelog::buildRecord(uint64_t index, const LogEntryPtr & log_e
    return record;
 }

-void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry, bool force_sync)
+void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry)
 {
    if (!current_writer)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records");
@ -398,14 +404,14 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry, bool
    if (current_writer->getEntriesWritten() == rotate_interval)
        rotate(index);

-    auto offset = current_writer->appendRecord(buildRecord(index, log_entry), force_sync);
+    auto offset = current_writer->appendRecord(buildRecord(index, log_entry));
    if (!index_to_start_pos.try_emplace(index, offset).second)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Record with index {} already exists", index);

    logs[index] = makeClone(log_entry);
 }

-void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry, bool force_sync)
+void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry)
 {
    if (index_to_start_pos.count(index) == 0)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot write at index {} because changelog doesn't contain it", index);
@ -451,7 +457,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry, bool forc

    current_writer->setEntriesWritten(entries_written);

-    appendEntry(index, log_entry, force_sync);
+    appendEntry(index, log_entry);
 }

 void Changelog::compact(uint64_t up_to_log_index)
@ -540,7 +546,7 @@ nuraft::ptr<nuraft::buffer> Changelog::serializeEntriesToBuffer(uint64_t index,
    return buf_out;
 }

-void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer, bool force_sync)
+void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer)
 {
    buffer.pos(0);
    int num_logs = buffer.get_int();
@ -555,23 +561,23 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer,

        LogEntryPtr log_entry = nuraft::log_entry::deserialize(*buf_local);
        if (i == 0 && logs.count(cur_index))
-            writeAt(cur_index, log_entry, force_sync);
+            writeAt(cur_index, log_entry);
        else
-            appendEntry(cur_index, log_entry, force_sync);
+            appendEntry(cur_index, log_entry);
    }
 }

 void Changelog::flush()
 {
-    current_writer->flush();
+    if (current_writer)
+        current_writer->flush(force_sync);
 }

 Changelog::~Changelog()
 {
    try
    {
-        if (current_writer)
-            current_writer->flush();
+        flush();
    }
    catch (...)
    {
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@ -63,17 +63,17 @@ class Changelog
 {

 public:
-    Changelog(const std::string & changelogs_dir_, uint64_t rotate_interval_, Poco::Logger * log_);
+    Changelog(const std::string & changelogs_dir_, uint64_t rotate_interval_, bool force_sync_, Poco::Logger * log_);

    /// Read changelog from files on changelogs_dir_ skipping all entries before from_log_index
    /// Truncate broken entries, remove files after broken entries.
    void readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep);

-    /// Add entry to log with index. Call fsync if force_sync true.
-    void appendEntry(uint64_t index, const LogEntryPtr & log_entry, bool force_sync);
+    /// Add entry to log with index.
+    void appendEntry(uint64_t index, const LogEntryPtr & log_entry);

    /// Write entry at index and truncate all subsequent entries.
-    void writeAt(uint64_t index, const LogEntryPtr & log_entry, bool force_sync);
+    void writeAt(uint64_t index, const LogEntryPtr & log_entry);

    /// Remove log files with to_log_index <= up_to_log_index.
    void compact(uint64_t up_to_log_index);
@ -101,9 +101,9 @@ public:
    BufferPtr serializeEntriesToBuffer(uint64_t index, int32_t count);

    /// Apply entries from buffer overriding existing entries
-    void applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer, bool force_sync);
+    void applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer);

-    /// Fsync log to disk
+    /// Fsync latest log to disk and flush buffer
    void flush();

    uint64_t size() const
@ -124,6 +124,7 @@ private:
 private:
    const std::string changelogs_dir;
    const uint64_t rotate_interval;
+    const bool force_sync;
    Poco::Logger * log;

    std::map<uint64_t, ChangelogFileDescription> existing_changelogs;
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@ -22,18 +22,19 @@ struct Settings;
    M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
    M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
    M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Lower bound of election timer (avoid too often leader elections)", 0) \
-    M(UInt64, reserved_log_items, 10000, "How many log items to store (don't remove during compaction)", 0) \
-    M(UInt64, snapshot_distance, 10000, "How many log items we have to collect to write new snapshot", 0) \
+    M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
+    M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
    M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
    M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0) \
    M(Milliseconds, startup_timeout, 30000, "How many time we will until RAFT to start", 0) \
    M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
-    M(UInt64, rotate_log_storage_interval, 10000, "How many records will be stored in one log storage file", 0) \
+    M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
    M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
    M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
    M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
+    M(UInt64, max_requests_batch_size, 100, "Max size of batch in requests count before it will be sent to RAFT", 0) \
    M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
-    M(Bool, force_sync, true, " Call fsync on each change in RAFT changelog", 0)
+    M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0)

 DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)

--- a/src/Coordination/KeeperLogStore.cpp
+++ b/src/Coordination/KeeperLogStore.cpp
@ -5,9 +5,12 @@ namespace DB

 KeeperLogStore::KeeperLogStore(const std::string & changelogs_path, uint64_t rotate_interval_, bool force_sync_)
    : log(&Poco::Logger::get("KeeperLogStore"))
-    , changelog(changelogs_path, rotate_interval_, log)
-    , force_sync(force_sync_)
+    , changelog(changelogs_path, rotate_interval_, force_sync_, log)
 {
+    if (force_sync_)
+        LOG_INFO(log, "force_sync enabled");
+    else
+        LOG_INFO(log, "force_sync disabled");
 }

 uint64_t KeeperLogStore::start_index() const
@ -38,7 +41,7 @@ uint64_t KeeperLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
 {
    std::lock_guard lock(changelog_lock);
    uint64_t idx = changelog.getNextEntryIndex();
-    changelog.appendEntry(idx, entry, force_sync);
+    changelog.appendEntry(idx, entry);
    return idx;
 }

@ -46,7 +49,7 @@ uint64_t KeeperLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
 void KeeperLogStore::write_at(uint64_t index, nuraft::ptr<nuraft::log_entry> & entry)
 {
    std::lock_guard lock(changelog_lock);
-    changelog.writeAt(index, entry, force_sync);
+    changelog.writeAt(index, entry);
 }

 nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> KeeperLogStore::log_entries(uint64_t start, uint64_t end)
@ -93,7 +96,7 @@ bool KeeperLogStore::flush()
 void KeeperLogStore::apply_pack(uint64_t index, nuraft::buffer & pack)
 {
    std::lock_guard lock(changelog_lock);
-    changelog.applyEntriesFromBuffer(index, pack, force_sync);
+    changelog.applyEntriesFromBuffer(index, pack);
 }

 uint64_t KeeperLogStore::size() const
@ -102,4 +105,10 @@ uint64_t KeeperLogStore::size() const
    return changelog.size();
 }

+void KeeperLogStore::end_of_append_batch(uint64_t /*start_index*/, uint64_t /*count*/)
+{
+    std::lock_guard lock(changelog_lock);
+    changelog.flush();
+}
+
 }
--- a/src/Coordination/KeeperLogStore.h
+++ b/src/Coordination/KeeperLogStore.h
@ -42,11 +42,12 @@ public:

    uint64_t size() const;

+    void end_of_append_batch(uint64_t start_index, uint64_t count) override;
+
 private:
    mutable std::mutex changelog_lock;
    Poco::Logger * log;
    Changelog changelog;
-    bool force_sync;
 };

 }
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@ -24,6 +24,7 @@ namespace ErrorCodes
    extern const int RAFT_ERROR;
    extern const int NO_ELEMENTS_IN_CONFIG;
    extern const int SUPPORT_IS_DISABLED;
+    extern const int LOGICAL_ERROR;
 }

 namespace
@ -73,7 +74,6 @@ KeeperServer::KeeperServer(
                        config.getString("keeper_server.snapshot_storage_path", config.getString("path", DBMS_DEFAULT_PATH) + "coordination/snapshots"),
                        coordination_settings))
    , state_manager(nuraft::cs_new<KeeperStateManager>(server_id, "keeper_server", config, coordination_settings))
-    , responses_queue(responses_queue_)
    , log(&Poco::Logger::get("KeeperServer"))
 {
    if (coordination_settings->quorum_reads)
@ -111,7 +111,7 @@ void KeeperServer::startup()
    params.auto_forwarding_ = coordination_settings->auto_forwarding;
    params.auto_forwarding_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds() * 2;

-    params.return_method_ = nuraft::raft_params::blocking;
+    params.return_method_ = nuraft::raft_params::async_handler;

    nuraft::asio_service::options asio_opts{};
    if (state_manager->isSecure())
@ -222,75 +222,26 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coord

 }

-void KeeperServer::putRequest(const KeeperStorage::RequestForSession & request_for_session)
+
+void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession & request_for_session)
 {
-    auto [session_id, request] = request_for_session;
-    if (!coordination_settings->quorum_reads && isLeaderAlive() && request->isReadRequest())
-    {
-        state_machine->processReadRequest(request_for_session);
-    }
-    else
-    {
-        std::vector<nuraft::ptr<nuraft::buffer>> entries;
-        entries.push_back(getZooKeeperLogEntry(session_id, request));
+    if (!request_for_session.request->isReadRequest())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot process non-read request locally");

-        std::lock_guard lock(append_entries_mutex);
-
-        auto result = raft_instance->append_entries(entries);
-        if (!result->get_accepted())
-        {
-            KeeperStorage::ResponsesForSessions responses;
-            auto response = request->makeResponse();
-            response->xid = request->xid;
-            response->zxid = 0;
-            response->error = Coordination::Error::ZOPERATIONTIMEOUT;
-            responses_queue.push(DB::KeeperStorage::ResponseForSession{session_id, response});
-        }
-
-        if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
-        {
-            KeeperStorage::ResponsesForSessions responses;
-            auto response = request->makeResponse();
-            response->xid = request->xid;
-            response->zxid = 0;
-            response->error = Coordination::Error::ZOPERATIONTIMEOUT;
-            responses_queue.push(DB::KeeperStorage::ResponseForSession{session_id, response});
-        }
-        else if (result->get_result_code() != nuraft::cmd_result_code::OK)
-            throw Exception(ErrorCodes::RAFT_ERROR, "Requests result failed with code {} and message: '{}'", result->get_result_code(), result->get_result_str());
-    }
+    state_machine->processReadRequest(request_for_session);
 }

-int64_t KeeperServer::getSessionID(int64_t session_timeout_ms)
+RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
 {
-    /// Just some sanity check. We don't want to make a lot of clients wait with lock.
-    if (active_session_id_requests > 10)
-        throw Exception(ErrorCodes::RAFT_ERROR, "Too many concurrent SessionID requests already in flight");

-    ++active_session_id_requests;
-    SCOPE_EXIT({ --active_session_id_requests; });
+    std::vector<nuraft::ptr<nuraft::buffer>> entries;
+    for (const auto & [session_id, request] : requests_for_sessions)
+        entries.push_back(getZooKeeperLogEntry(session_id, request));

-    auto entry = nuraft::buffer::alloc(sizeof(int64_t));
-    /// Just special session request
-    nuraft::buffer_serializer bs(entry);
-    bs.put_i64(session_timeout_ms);
-
-    std::lock_guard lock(append_entries_mutex);
-
-    auto result = raft_instance->append_entries({entry});
-
-    if (!result->get_accepted())
-        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send session_id request to RAFT");
-
-    if (result->get_result_code() != nuraft::cmd_result_code::OK)
-        throw Exception(ErrorCodes::RAFT_ERROR, "session_id request failed to RAFT");
-
-    auto resp = result->get();
-    if (resp == nullptr)
-        throw Exception(ErrorCodes::RAFT_ERROR, "Received nullptr as session_id");
-
-    nuraft::buffer_serializer bs_resp(resp);
-    return bs_resp.get_i64();
+    {
+        std::lock_guard lock(append_entries_mutex);
+        return raft_instance->append_entries(entries);
+    }
 }

 bool KeeperServer::isLeader() const
--- a/Show More
+++ b/Show More