diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h
index 064787fb64e..0c7465ec7a5 100644
--- a/base/common/DateLUTImpl.h
+++ b/base/common/DateLUTImpl.h
@@ -322,8 +322,14 @@ public:
         if (offset_is_whole_number_of_hours_everytime)
             return (UInt32(t) / 60) % 60;
 
-        UInt32 date = find(t).date;
-        return (UInt32(t) - date) / 60 % 60;
+        /// To consider the DST changing situation within this day.
+        /// also make the special timezones with no whole hour offset such as 'Australia/Lord_Howe' been taken into account
+        DayNum index = findIndex(t);
+        UInt32 res = t - lut[index].date;
+        if (lut[index].amount_of_offset_change != 0 && t >= lut[index].date + lut[index].time_at_offset_change)
+            res += lut[index].amount_of_offset_change;
+
+        return res / 60 % 60;
     }
 
     inline time_t toStartOfMinute(time_t t) const { return t / 60 * 60; }
diff --git a/base/common/defines.h b/base/common/defines.h
index 845a53179ef..367bdd64234 100644
--- a/base/common/defines.h
+++ b/base/common/defines.h
@@ -1,5 +1,20 @@
 #pragma once
 
+/// __has_feature supported only by clang.
+///
+/// But libcxx/libcxxabi overrides it to 0,
+/// thus the checks for __has_feature will be wrong.
+///
+/// NOTE:
+/// - __has_feature cannot be simply undefined,
+///   since this will be broken if some C++ header will be included after
+///   including <common/defines.h>
+/// - it should not have fallback to 0,
+///   since this may create false-positive detection (common problem)
+#if defined(__clang__) && defined(__has_feature)
+#    define ch_has_feature __has_feature
+#endif
+
 #if defined(_MSC_VER)
 #   if !defined(likely)
 #      define likely(x)   (x)
@@ -32,8 +47,8 @@
 
 /// Check for presence of address sanitizer
 #if !defined(ADDRESS_SANITIZER)
-#    if defined(__has_feature)
-#        if __has_feature(address_sanitizer)
+#    if defined(ch_has_feature)
+#        if ch_has_feature(address_sanitizer)
 #            define ADDRESS_SANITIZER 1
 #        endif
 #    elif defined(__SANITIZE_ADDRESS__)
@@ -42,8 +57,8 @@
 #endif
 
 #if !defined(THREAD_SANITIZER)
-#    if defined(__has_feature)
-#        if __has_feature(thread_sanitizer)
+#    if defined(ch_has_feature)
+#        if ch_has_feature(thread_sanitizer)
 #            define THREAD_SANITIZER 1
 #        endif
 #    elif defined(__SANITIZE_THREAD__)
@@ -52,8 +67,8 @@
 #endif
 
 #if !defined(MEMORY_SANITIZER)
-#    if defined(__has_feature)
-#        if __has_feature(memory_sanitizer)
+#    if defined(ch_has_feature)
+#        if ch_has_feature(memory_sanitizer)
 #            define MEMORY_SANITIZER 1
 #        endif
 #    elif defined(__MEMORY_SANITIZER__)
diff --git a/base/common/phdr_cache.cpp b/base/common/phdr_cache.cpp
index 4f6a066adab..49d566dac19 100644
--- a/base/common/phdr_cache.cpp
+++ b/base/common/phdr_cache.cpp
@@ -15,11 +15,11 @@
 #endif
 
 #define __msan_unpoison(X, Y) // NOLINT
-#if defined(__has_feature)
-#   if __has_feature(memory_sanitizer)
-#       undef __msan_unpoison
-#       include <sanitizer/msan_interface.h>
-#   endif
+#if defined(ch_has_feature)
+#    if ch_has_feature(memory_sanitizer)
+#        undef __msan_unpoison
+#        include <sanitizer/msan_interface.h>
+#    endif
 #endif
 
 #include <link.h>
diff --git a/base/mysqlxx/Connection.cpp b/base/mysqlxx/Connection.cpp
index 8a15115cb06..2dbbc0c73f3 100644
--- a/base/mysqlxx/Connection.cpp
+++ b/base/mysqlxx/Connection.cpp
@@ -51,10 +51,11 @@ Connection::Connection(
     const char* ssl_key,
     unsigned timeout,
     unsigned rw_timeout,
-    bool enable_local_infile)
+    bool enable_local_infile,
+    bool opt_reconnect)
     : Connection()
 {
-    connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile);
+    connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile, opt_reconnect);
 }
 
 Connection::Connection(const std::string & config_name)
@@ -80,7 +81,8 @@ void Connection::connect(const char* db,
     const char * ssl_key,
     unsigned timeout,
     unsigned rw_timeout,
-    bool enable_local_infile)
+    bool enable_local_infile,
+    bool opt_reconnect)
 {
     if (is_connected)
         disconnect();
@@ -104,9 +106,8 @@ void Connection::connect(const char* db,
     if (mysql_options(driver.get(), MYSQL_OPT_LOCAL_INFILE, &enable_local_infile_arg))
         throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
 
-    /// Enables auto-reconnect.
-    bool reconnect = true;
-    if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
+    /// See C API Developer Guide: Automatic Reconnection Control
+    if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&opt_reconnect)))
         throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
 
     /// Specifies particular ssl key and certificate if it needs
diff --git a/base/mysqlxx/Connection.h b/base/mysqlxx/Connection.h
index ca67db0e0c6..65955136eb1 100644
--- a/base/mysqlxx/Connection.h
+++ b/base/mysqlxx/Connection.h
@@ -14,6 +14,8 @@
 
 /// Disable LOAD DATA LOCAL INFILE because it is insecure
 #define MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE false
+/// See https://dev.mysql.com/doc/c-api/5.7/en/c-api-auto-reconnect.html
+#define MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT true
 
 
 namespace mysqlxx
@@ -76,7 +78,8 @@ public:
         const char * ssl_key = "",
         unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
         unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
-        bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
+        bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
+        bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
 
     /// Creates connection. Can be used if Poco::Util::Application is using.
     /// All settings will be got from config_name section of configuration.
@@ -96,7 +99,8 @@ public:
         const char* ssl_key,
         unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
         unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
-        bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
+        bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
+        bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
 
     void connect(const std::string & config_name)
     {
@@ -112,6 +116,7 @@ public:
         std::string ssl_cert = cfg.getString(config_name + ".ssl_cert", "");
         std::string ssl_key = cfg.getString(config_name + ".ssl_key", "");
         bool enable_local_infile = cfg.getBool(config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
+        bool opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
 
         unsigned timeout =
             cfg.getInt(config_name + ".connect_timeout",
@@ -135,7 +140,8 @@ public:
                 ssl_key.c_str(),
                 timeout,
                 rw_timeout,
-                enable_local_infile);
+                enable_local_infile,
+                opt_reconnect);
     }
 
     /// If MySQL connection was established.
diff --git a/base/mysqlxx/Exception.h b/base/mysqlxx/Exception.h
index eaeb3565af1..48cd0997b94 100644
--- a/base/mysqlxx/Exception.h
+++ b/base/mysqlxx/Exception.h
@@ -26,6 +26,15 @@ struct ConnectionFailed : public Exception
 };
 
 
+/// Connection to MySQL server was lost
+struct ConnectionLost : public Exception
+{
+    ConnectionLost(const std::string & msg, int code = 0) : Exception(msg, code) {}
+    const char * name() const throw() override { return "mysqlxx::ConnectionLost"; }
+    const char * className() const throw() override { return "mysqlxx::ConnectionLost"; }
+};
+
+
 /// Erroneous query.
 struct BadQuery : public Exception
 {
diff --git a/base/mysqlxx/Pool.cpp b/base/mysqlxx/Pool.cpp
index 2cb3e62db84..cf8b3cf9267 100644
--- a/base/mysqlxx/Pool.cpp
+++ b/base/mysqlxx/Pool.cpp
@@ -10,7 +10,6 @@
 
 #include <common/sleep.h>
 
-#include <Poco/Util/Application.h>
 #include <Poco/Util/LayeredConfiguration.h>
 
 
@@ -41,7 +40,9 @@ void Pool::Entry::decrementRefCount()
 Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & config_name,
      unsigned default_connections_, unsigned max_connections_,
      const char * parent_config_name_)
-    : default_connections(default_connections_), max_connections(max_connections_)
+    : logger(Poco::Logger::get("mysqlxx::Pool"))
+    , default_connections(default_connections_)
+    , max_connections(max_connections_)
 {
     server = cfg.getString(config_name + ".host");
 
@@ -78,6 +79,9 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
 
         enable_local_infile = cfg.getBool(config_name + ".enable_local_infile",
             cfg.getBool(parent_config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE));
+
+        opt_reconnect = cfg.getBool(config_name + ".opt_reconnect",
+            cfg.getBool(parent_config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT));
     }
     else
     {
@@ -96,6 +100,8 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
 
         enable_local_infile = cfg.getBool(
             config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
+
+        opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
     }
 
     connect_timeout = cfg.getInt(config_name + ".connect_timeout",
@@ -125,20 +131,30 @@ Pool::Entry Pool::get()
     initialize();
     for (;;)
     {
+        logger.trace("(%s): Iterating through existing MySQL connections", getDescription());
+
         for (auto & connection : connections)
         {
             if (connection->ref_count == 0)
                 return Entry(connection, this);
         }
 
+        logger.trace("(%s): Trying to allocate a new connection.", getDescription());
         if (connections.size() < static_cast<size_t>(max_connections))
         {
             Connection * conn = allocConnection();
             if (conn)
                 return Entry(conn, this);
+
+            logger.trace("(%s): Unable to create a new connection: Allocation failed.", getDescription());
+        }
+        else
+        {
+            logger.trace("(%s): Unable to create a new connection: Max number of connections has been reached.", getDescription());
         }
 
         lock.unlock();
+        logger.trace("(%s): Sleeping for %d seconds.", getDescription(), MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
         sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
         lock.lock();
     }
@@ -162,8 +178,7 @@ Pool::Entry Pool::tryGet()
             if (res.tryForceConnected())  /// Tries to reestablish connection as well
                 return res;
 
-            auto & logger = Poco::Util::Application::instance().logger();
-            logger.information("Idle connection to mysql server cannot be recovered, dropping it.");
+            logger.debug("(%s): Idle connection to MySQL server cannot be recovered, dropping it.", getDescription());
 
             /// This one is disconnected, cannot be reestablished and so needs to be disposed of.
             connection_it = connections.erase(connection_it);
@@ -186,6 +201,8 @@ Pool::Entry Pool::tryGet()
 
 void Pool::removeConnection(Connection* connection)
 {
+    logger.trace("(%s): Removing connection.", getDescription());
+
     std::lock_guard<std::mutex> lock(mutex);
     if (connection)
     {
@@ -210,8 +227,6 @@ void Pool::Entry::forceConnected() const
     if (data == nullptr)
         throw Poco::RuntimeException("Tried to access NULL database connection.");
 
-    Poco::Util::Application & app = Poco::Util::Application::instance();
-
     bool first = true;
     while (!tryForceConnected())
     {
@@ -220,7 +235,7 @@ void Pool::Entry::forceConnected() const
         else
             sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
 
-        app.logger().information("MYSQL: Reconnecting to " + pool->description);
+        pool->logger.debug("Entry: Reconnecting to MySQL server %s", pool->description);
         data->conn.connect(
             pool->db.c_str(),
             pool->server.c_str(),
@@ -233,7 +248,8 @@ void Pool::Entry::forceConnected() const
             pool->ssl_key.c_str(),
             pool->connect_timeout,
             pool->rw_timeout,
-            pool->enable_local_infile);
+            pool->enable_local_infile,
+            pool->opt_reconnect);
     }
 }
 
@@ -242,18 +258,22 @@ bool Pool::Entry::tryForceConnected() const
 {
     auto * const mysql_driver = data->conn.getDriver();
     const auto prev_connection_id = mysql_thread_id(mysql_driver);
+
+    pool->logger.trace("Entry(connection %lu): sending PING to check if it is alive.", prev_connection_id);
     if (data->conn.ping())  /// Attempts to reestablish lost connection
     {
         const auto current_connection_id = mysql_thread_id(mysql_driver);
         if (prev_connection_id != current_connection_id)
         {
-            auto & logger = Poco::Util::Application::instance().logger();
-            logger.information("Connection to mysql server has been reestablished. Connection id changed: %lu -> %lu",
-                                prev_connection_id, current_connection_id);
+            pool->logger.debug("Entry(connection %lu): Reconnected to MySQL server. Connection id changed: %lu -> %lu",
+                                current_connection_id, prev_connection_id, current_connection_id);
         }
+
+        pool->logger.trace("Entry(connection %lu): PING ok.", current_connection_id);
         return true;
     }
 
+    pool->logger.trace("Entry(connection %lu): PING failed.", prev_connection_id);
     return false;
 }
 
@@ -274,15 +294,13 @@ void Pool::initialize()
 
 Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)
 {
-    Poco::Util::Application & app = Poco::Util::Application::instance();
-
-    std::unique_ptr<Connection> conn(new Connection);
+    std::unique_ptr<Connection> conn_ptr{new Connection};
 
     try
     {
-        app.logger().information("MYSQL: Connecting to " + description);
+        logger.debug("Connecting to %s", description);
 
-        conn->conn.connect(
+        conn_ptr->conn.connect(
             db.c_str(),
             server.c_str(),
             user.c_str(),
@@ -294,29 +312,29 @@ Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)
             ssl_key.c_str(),
             connect_timeout,
             rw_timeout,
-            enable_local_infile);
+            enable_local_infile,
+            opt_reconnect);
     }
     catch (mysqlxx::ConnectionFailed & e)
     {
+        logger.error(e.what());
+
         if ((!was_successful && !dont_throw_if_failed_first_time)
             || e.errnum() == ER_ACCESS_DENIED_ERROR
             || e.errnum() == ER_DBACCESS_DENIED_ERROR
             || e.errnum() == ER_BAD_DB_ERROR)
         {
-            app.logger().error(e.what());
             throw;
         }
         else
         {
-            app.logger().error(e.what());
             return nullptr;
         }
     }
 
+    connections.push_back(conn_ptr.get());
     was_successful = true;
-    auto * connection = conn.release();
-    connections.push_back(connection);
-    return connection;
+    return conn_ptr.release();
 }
 
 }
diff --git a/base/mysqlxx/Pool.h b/base/mysqlxx/Pool.h
index 83b00e0081a..b6189663f55 100644
--- a/base/mysqlxx/Pool.h
+++ b/base/mysqlxx/Pool.h
@@ -6,6 +6,8 @@
 #include <atomic>
 
 #include <Poco/Exception.h>
+#include <Poco/Logger.h>
+
 #include <mysqlxx/Connection.h>
 
 
@@ -165,19 +167,21 @@ public:
          unsigned rw_timeout_ = MYSQLXX_DEFAULT_RW_TIMEOUT,
          unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS,
          unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS,
-         unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE)
-    : default_connections(default_connections_), max_connections(max_connections_),
-    db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_),
-    connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_) {}
+         unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
+         bool opt_reconnect_ = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT)
+    : logger(Poco::Logger::get("mysqlxx::Pool")), default_connections(default_connections_),
+    max_connections(max_connections_), db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_),
+    connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_),
+    opt_reconnect(opt_reconnect_) {}
 
     Pool(const Pool & other)
-        : default_connections{other.default_connections},
+        : logger(other.logger), default_connections{other.default_connections},
           max_connections{other.max_connections},
           db{other.db}, server{other.server},
           user{other.user}, password{other.password},
           port{other.port}, socket{other.socket},
           connect_timeout{other.connect_timeout}, rw_timeout{other.rw_timeout},
-          enable_local_infile{other.enable_local_infile}
+          enable_local_infile{other.enable_local_infile}, opt_reconnect(other.opt_reconnect)
     {}
 
     Pool & operator=(const Pool &) = delete;
@@ -201,6 +205,8 @@ public:
     void removeConnection(Connection * connection);
 
 protected:
+    Poco::Logger & logger;
+
     /// Number of MySQL connections which are created at launch.
     unsigned default_connections;
     /// Maximum possible number of connections
@@ -231,6 +237,7 @@ private:
     std::string ssl_cert;
     std::string ssl_key;
     bool enable_local_infile;
+    bool opt_reconnect;
 
     /// True if connection was established at least once.
     bool was_successful{false};
diff --git a/base/mysqlxx/PoolWithFailover.cpp b/base/mysqlxx/PoolWithFailover.cpp
index 5bee75aab1b..5e9f70f4ac1 100644
--- a/base/mysqlxx/PoolWithFailover.cpp
+++ b/base/mysqlxx/PoolWithFailover.cpp
@@ -1,3 +1,8 @@
+#include <algorithm>
+#include <ctime>
+#include <random>
+#include <thread>
+
 #include <mysqlxx/PoolWithFailover.h>
 
 
@@ -33,6 +38,19 @@ PoolWithFailover::PoolWithFailover(const Poco::Util::AbstractConfiguration & con
                     std::make_shared<Pool>(config_, replica_name, default_connections_, max_connections_, config_name_.c_str()));
             }
         }
+
+        /// PoolWithFailover objects are stored in a cache inside PoolFactory.
+        /// This cache is reset by ExternalDictionariesLoader after every SYSTEM RELOAD DICTIONAR{Y|IES}
+        /// which triggers massive re-constructing of connection pools.
+        /// The state of PRNGs like std::mt19937 is considered to be quite heavy
+        /// thus here we attempt to optimize its construction.
+        static thread_local std::mt19937 rnd_generator(
+                std::hash<std::thread::id>{}(std::this_thread::get_id()) + std::clock());
+        for (auto & [_, replicas] : replicas_by_priority)
+        {
+            if (replicas.size() > 1)
+                std::shuffle(replicas.begin(), replicas.end(), rnd_generator);
+        }
     }
     else
     {
diff --git a/base/mysqlxx/Query.cpp b/base/mysqlxx/Query.cpp
index f3485c54edc..d4514c3e988 100644
--- a/base/mysqlxx/Query.cpp
+++ b/base/mysqlxx/Query.cpp
@@ -1,11 +1,16 @@
 #if __has_include(<mysql.h>)
+#include <errmsg.h>
 #include <mysql.h>
 #else
+#include <mysql/errmsg.h>
 #include <mysql/mysql.h>
 #endif
 
+#include <Poco/Logger.h>
+
 #include <mysqlxx/Connection.h>
 #include <mysqlxx/Query.h>
+#include <mysqlxx/Types.h>
 
 
 namespace mysqlxx
@@ -57,8 +62,24 @@ void Query::reset()
 void Query::executeImpl()
 {
     std::string query_string = query_buf.str();
-    if (mysql_real_query(conn->getDriver(), query_string.data(), query_string.size()))
-        throw BadQuery(errorMessage(conn->getDriver()), mysql_errno(conn->getDriver()));
+
+    MYSQL* mysql_driver = conn->getDriver();
+
+    auto & logger = Poco::Logger::get("mysqlxx::Query");
+    logger.trace("Running MySQL query using connection %lu", mysql_thread_id(mysql_driver));
+    if (mysql_real_query(mysql_driver, query_string.data(), query_string.size()))
+    {
+        const auto err_no = mysql_errno(mysql_driver);
+        switch (err_no)
+        {
+        case CR_SERVER_GONE_ERROR:
+            [[fallthrough]];
+        case CR_SERVER_LOST:
+            throw ConnectionLost(errorMessage(mysql_driver), err_no);
+        default:
+            throw BadQuery(errorMessage(mysql_driver), err_no);
+        }
+    }
 }
 
 UseQueryResult Query::use()
diff --git a/cmake/find/ccache.cmake b/cmake/find/ccache.cmake
index d9ccd1a9ac6..fea1f8b4c97 100644
--- a/cmake/find/ccache.cmake
+++ b/cmake/find/ccache.cmake
@@ -32,7 +32,10 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
    if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
       message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
 
-      # debian (debhlpers) set SOURCE_DATE_EPOCH environment variable, that is
+      set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
+      set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
+
+      # debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is
       # filled from the debian/changelog or current time.
       #
       # - 4.0+ ccache always includes this environment variable into the hash
@@ -48,9 +51,6 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
          message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
          set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
          set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
-      else()
-         set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
-         set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
       endif()
    else ()
       message(${RECONFIGURE_MESSAGE_LEVEL} "Not using ${CCACHE_FOUND} ${CCACHE_VERSION} bug: https://bugzilla.samba.org/show_bug.cgi?id=8118")
diff --git a/contrib/NuRaft b/contrib/NuRaft
index 7adf7ae33e7..9a0d78de4b9 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 7adf7ae33e7d5c307342431b577c8ab1025ee793
+Subproject commit 9a0d78de4b90546368d954b6434f0e9a823e8d80
diff --git a/contrib/boost b/contrib/boost
index 48f40ebb539..ee24fa55bc4 160000
--- a/contrib/boost
+++ b/contrib/boost
@@ -1 +1 @@
-Subproject commit 48f40ebb539220d328958f8823b094c0b07a4e79
+Subproject commit ee24fa55bc46e4d2ce7d0d052cc5a0d9b1be8c36
diff --git a/contrib/brotli b/contrib/brotli
index 5805f99a533..63be8a99401 160000
--- a/contrib/brotli
+++ b/contrib/brotli
@@ -1 +1 @@
-Subproject commit 5805f99a533a8f8118699c0100d8c102f3605f65
+Subproject commit 63be8a99401992075c23e99f7c84de1c653e39e2
diff --git a/contrib/brotli-cmake/CMakeLists.txt b/contrib/brotli-cmake/CMakeLists.txt
index e22f4593c02..4c5f584de9d 100644
--- a/contrib/brotli-cmake/CMakeLists.txt
+++ b/contrib/brotli-cmake/CMakeLists.txt
@@ -2,6 +2,8 @@ set(BROTLI_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/brotli/c)
 set(BROTLI_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/brotli/c)
 
 set(SRCS
+    ${BROTLI_SOURCE_DIR}/enc/command.c
+    ${BROTLI_SOURCE_DIR}/enc/fast_log.c
     ${BROTLI_SOURCE_DIR}/dec/bit_reader.c
     ${BROTLI_SOURCE_DIR}/dec/state.c
     ${BROTLI_SOURCE_DIR}/dec/huffman.c
@@ -26,6 +28,9 @@ set(SRCS
     ${BROTLI_SOURCE_DIR}/enc/memory.c
     ${BROTLI_SOURCE_DIR}/common/dictionary.c
     ${BROTLI_SOURCE_DIR}/common/transform.c
+    ${BROTLI_SOURCE_DIR}/common/platform.c
+    ${BROTLI_SOURCE_DIR}/common/context.c
+    ${BROTLI_SOURCE_DIR}/common/constants.c
 )
 
 add_library(brotli ${SRCS})
diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index b6fcdd7f7d2..1bfc91ecd92 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -70,6 +70,7 @@ function start_server
         --path "$FASTTEST_DATA"
         --user_files_path "$FASTTEST_DATA/user_files"
         --top_level_domains_path "$FASTTEST_DATA/top_level_domains"
+        --test_keeper_server.log_storage_path "$FASTTEST_DATA/coordination"
     )
     clickhouse-server "${opts[@]}" &>> "$FASTTEST_OUTPUT/server.log" &
     server_pid=$!
@@ -355,7 +356,6 @@ function run_tests
 
         # JSON functions
         01666_blns
-        01674_htm_xml_coarse_parse
     )
 
     (time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
@@ -375,7 +375,7 @@ function run_tests
         stop_server ||:
 
         # Clean the data so that there is no interference from the previous test run.
-        rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
+        rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files,coordination} ||:
 
         start_server
 
diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile
index 4963ff0094d..938d8d45ffd 100644
--- a/docker/test/integration/base/Dockerfile
+++ b/docker/test/integration/base/Dockerfile
@@ -18,7 +18,8 @@ RUN apt-get update \
         curl \
         tar \
         krb5-user \
-        iproute2
+        iproute2 \
+        lsof
 RUN rm -rf \
         /var/lib/apt/lists/* \
         /var/cache/debconf \
diff --git a/docker/test/integration/runner/compose/docker_compose_zookeeper.yml b/docker/test/integration/runner/compose/docker_compose_zookeeper.yml
index 49e285b5515..1601d217a25 100644
--- a/docker/test/integration/runner/compose/docker_compose_zookeeper.yml
+++ b/docker/test/integration/runner/compose/docker_compose_zookeeper.yml
@@ -1,11 +1,11 @@
 version: '2.3'
 services:
     zoo1:
-        image: zookeeper:3.4.12
+        image: zookeeper:3.6.2
         restart: always
         environment:
             ZOO_TICK_TIME: 500
-            ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
+            ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
             ZOO_MY_ID: 1
             JVMFLAGS: -Dzookeeper.forceSync=no
         volumes:
@@ -16,11 +16,11 @@ services:
               source: ${ZK_DATA_LOG1:-}
               target: /datalog
     zoo2:
-        image: zookeeper:3.4.12
+        image: zookeeper:3.6.2
         restart: always
         environment:
             ZOO_TICK_TIME: 500
-            ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
+            ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888
             ZOO_MY_ID: 2
             JVMFLAGS: -Dzookeeper.forceSync=no
         volumes:
@@ -31,11 +31,11 @@ services:
               source: ${ZK_DATA_LOG2:-}
               target: /datalog
     zoo3:
-        image: zookeeper:3.4.12
+        image: zookeeper:3.6.2
         restart: always
         environment:
             ZOO_TICK_TIME: 500
-            ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
+            ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
             ZOO_MY_ID: 3
             JVMFLAGS: -Dzookeeper.forceSync=no
         volumes:
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 2b19a5e75a8..4d862cf987e 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -358,6 +358,8 @@ mkdir analyze analyze/tmp ||:
 build_log_column_definitions
 
 # Split the raw test output into files suitable for analysis.
+# To debug calculations only for a particular test, substitute a suitable
+# wildcard here, e.g. `for test_file in modulo-raw.tsv`.
 for test_file in *-raw.tsv
 do
     test_name=$(basename "$test_file" "-raw.tsv")
@@ -467,7 +469,13 @@ create view broken_queries as
 create table query_run_metrics_for_stats engine File(
         TSV, -- do not add header -- will parse with grep
         'analyze/query-run-metrics-for-stats.tsv')
-    as select test, query_index, 0 run, version, metric_values
+    as select test, query_index, 0 run, version,
+        -- For debugging, add a filter for a particular metric like this:
+        -- arrayFilter(m, n -> n = 'client_time', metric_values, metric_names)
+        --     metric_values
+        -- Note that further reporting may break, because the metric names are
+        -- not filtered.
+        metric_values
     from query_run_metric_arrays
     where (test, query_index) not in broken_queries
     order by test, query_index, run, version
@@ -585,8 +593,19 @@ create view query_metric_stats as
 -- Main statistics for queries -- query time as reported in query log.
 create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
     as select
-        abs(diff) > report_threshold        and abs(diff) > stat_threshold as changed_fail,
-        abs(diff) > report_threshold - 0.05 and abs(diff) > stat_threshold as changed_show,
+        -- It is important to have a non-strict inequality with stat_threshold
+        -- here. The randomization distribution is actually discrete, and when
+        -- the number of runs is small, the quantile we need (e.g. 0.99) turns
+        -- out to be the maximum value of the distribution. We can also hit this
+        -- maximum possible value with our test run, and this obviously means
+        -- that we have observed the difference to the best precision possible
+        -- for the given number of runs. If we use a strict equality here, we
+        -- will miss such cases. This happened in the wild and lead to some
+        -- uncaught regressions, because for the default 7 runs we do for PRs,
+        -- the randomization distribution has only 16 values, so the max quantile
+        -- is actually 0.9375.
+        abs(diff) > report_threshold        and abs(diff) >= stat_threshold as changed_fail,
+        abs(diff) > report_threshold - 0.05 and abs(diff) >= stat_threshold as changed_show,
 
         not changed_fail and stat_threshold > report_threshold + 0.10 as unstable_fail,
         not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show,
diff --git a/docker/test/performance-comparison/config/config.d/user_files.xml b/docker/test/performance-comparison/config/config.d/user_files.xml
new file mode 100644
index 00000000000..6611e986a90
--- /dev/null
+++ b/docker/test/performance-comparison/config/config.d/user_files.xml
@@ -0,0 +1,7 @@
+<yandex>
+    <!-- Directory with user provided files that are accessible by 'file' table function. -->
+    <user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
+
+    <!-- Path to configuration file with users, access rights, profiles of settings, quotas. -->
+    <users_config>users.xml</users_config>
+</yandex>
\ No newline at end of file
diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
index f3609bcfcdb..41bc7f777bf 100644
--- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
+++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
@@ -19,4 +19,9 @@
             <max_threads>12</max_threads>
         </default>
     </profiles>
+    <users>
+        <default>
+            <access_management>1</access_management>
+        </default>
+    </users>
 </yandex>
diff --git a/docker/test/performance-comparison/eqmed.sql b/docker/test/performance-comparison/eqmed.sql
index 139f0758798..d0111550ee6 100644
--- a/docker/test/performance-comparison/eqmed.sql
+++ b/docker/test/performance-comparison/eqmed.sql
@@ -1,4 +1,6 @@
--- input is table(test text, query text, run UInt32, version int, metrics Array(float))
+-- The input is table(test text, query text, run UInt32, version UInt8, metrics Array(float)).
+-- Run like this:
+-- clickhouse-local --queries-file eqmed.sql -S 'test text, query text, run UInt32, version UInt8, metrics Array(float)' --file analyze/tmp/modulo_0.tsv
 select
    arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[1] as l) l_rounded,
    arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[2] as r) r_rounded,
@@ -8,14 +10,19 @@ select
 from
    (
       -- quantiles of randomization distributions
+      -- note that for small number of runs, the exact quantile might not make
+      -- sense, because the last possible value of randomization distribution
+      -- might take a larger percentage of distirbution (i.e. the distribution
+      -- actually has discrete values, and the last step can be large).
       select quantileExactForEach(0.99)(
         arrayMap(x, y -> abs(x - y), metrics_by_label[1], metrics_by_label[2]) as d
       ) threshold
-      ---- uncomment to see what the distribution is really like
-      --, uniqExact(d.1) u
+      ---- Uncomment to see what the distribution is really like. This debug
+      ---- code only works for single (the first) metric.
+      --, uniqExact(d[1]) u
       --, arraySort(x->x.1,
       --      arrayZip(
-      --          (sumMap([d.1], [1]) as f).1,
+      --          (sumMap([d[1]], [1]) as f).1,
       --          f.2)) full_histogram
       from
          (
diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index dc1e4db4477..2e1c546ce8c 100755
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -81,6 +81,8 @@ clickhouse-client --query "SHOW TABLES FROM test"
 ./stress --hung-check --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" && echo "OK" > /test_output/script_exit_code.txt || echo "FAIL" > /test_output/script_exit_code.txt
 
 stop
+# TODO remove me when persistent snapshots will be ready
+rm -fr /var/lib/clickhouse/coordination ||:
 start
 
 clickhouse-client --query "SELECT 'Server successfuly started'" > /test_output/alive_check.txt || echo 'Server failed to start' > /test_output/alive_check.txt
diff --git a/docs/_description_templates/template-data-type.md b/docs/_description_templates/template-data-type.md
index edb6586ee7d..5e560b9325d 100644
--- a/docs/_description_templates/template-data-type.md
+++ b/docs/_description_templates/template-data-type.md
@@ -26,4 +26,4 @@ The name of an additional section can be any, for example, **Usage**.
 
 -   [link](#)
 
-[Original article](https://clickhouse.tech/docs/en/data_types/<data-type-name>/) <!--hide-->
+[Original article](https://clickhouse.tech/docs/en/data-types/<data-type-name>/) <!--hide-->
diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md
index c519d6bb136..fb1df62bb15 100644
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@@ -38,20 +38,20 @@ SETTINGS
 
 Required parameters:
 
--   `kafka_broker_list` – A comma-separated list of brokers (for example, `localhost:9092`).
--   `kafka_topic_list` – A list of Kafka topics.
--   `kafka_group_name` – A group of Kafka consumers. Reading margins are tracked for each group separately. If you don’t want messages to be duplicated in the cluster, use the same group name everywhere.
--   `kafka_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
+-   `kafka_broker_list` — A comma-separated list of brokers (for example, `localhost:9092`).
+-   `kafka_topic_list` — A list of Kafka topics.
+-   `kafka_group_name` — A group of Kafka consumers. Reading margins are tracked for each group separately. If you don’t want messages to be duplicated in the cluster, use the same group name everywhere.
+-   `kafka_format` — Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
 
 Optional parameters:
 
--   `kafka_row_delimiter` – Delimiter character, which ends the message.
--   `kafka_schema` – Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
--   `kafka_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition.
--   `kafka_max_block_size` - The maximum batch size (in messages) for poll (default: `max_block_size`).
--   `kafka_skip_broken_messages` – Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data).
--   `kafka_commit_every_batch` - Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`).
--   `kafka_thread_per_consumer` - Provide independent thread for each consumer (default: `0`). When enabled, every consumer flush the data independently, in parallel (otherwise - rows from several consumers squashed to form one block).
+-   `kafka_row_delimiter` — Delimiter character, which ends the message.
+-   `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
+-   `kafka_num_consumers` — The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition.
+-   `kafka_max_block_size` — The maximum batch size (in messages) for poll (default: `max_block_size`).
+-   `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data).
+-   `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`).
+-   `kafka_thread_per_consumer` — Provide independent thread for each consumer (default: `0`). When enabled, every consumer flush the data independently, in parallel (otherwise — rows from several consumers squashed to form one block).
 
 Examples:
 
diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md
index cda16c2a226..2acec40ef02 100644
--- a/docs/en/engines/table-engines/special/file.md
+++ b/docs/en/engines/table-engines/special/file.md
@@ -66,7 +66,8 @@ SELECT * FROM file_engine_table
 
 ## Usage in ClickHouse-local {#usage-in-clickhouse-local}
 
-In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`.
+In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`. It is possible to read and write compressed files based on an additional engine parameter or file extension (`gz`, `br` or `xz`).
+
 **Example:**
 
 ``` bash
diff --git a/docs/en/getting-started/example-datasets/brown-benchmark.md b/docs/en/getting-started/example-datasets/brown-benchmark.md
index b5ca23eddb9..c9b74a84a54 100644
--- a/docs/en/getting-started/example-datasets/brown-benchmark.md
+++ b/docs/en/getting-started/example-datasets/brown-benchmark.md
@@ -5,7 +5,7 @@ toc_title: Brown University Benchmark
 
 # Brown University Benchmark
 
-MgBench - A new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
+`MgBench` is a new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
 
 Download the data:
 ```
@@ -153,7 +153,7 @@ ORDER BY dt,
          hr;
 
 
--- Q1.4: Over a 1-month period, how often was each server blocked on disk I/O?
+-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
 
 SELECT machine_name,
        COUNT(*) AS spikes
@@ -301,7 +301,7 @@ WHERE event_type = 'temperature'
   AND log_time >= '2019-11-29 17:00:00.000';
 
 
--- Q3.4: Over the past 6 months, how frequently was each door opened?
+-- Q3.4: Over the past 6 months, how frequently were each door opened?
 
 SELECT device_name,
        device_floor,
@@ -412,3 +412,5 @@ ORDER BY yr,
 ```
 
 The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.tech/play?user=play), [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
+
+[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/brown-benchmark/) <!--hide-->
diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md
new file mode 100644
index 00000000000..76effdd4c62
--- /dev/null
+++ b/docs/en/getting-started/example-datasets/cell-towers.md
@@ -0,0 +1,133 @@
+---
+toc_priority: 21
+toc_title: Cell Towers
+---
+
+# Cell Towers
+
+This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
+
+As of 2021 it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc).
+
+OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License, and we redistribute a snapshot of this dataset under the terms of the same license. The up to date version of the dataset is available to download after sign in.
+
+
+## Get the Dataset
+
+Download the snapshot of the dataset from Feb 2021: [https://datasets.clickhouse.tech/cell_towers.csv.xz] (729 MB).
+
+Optionally validate the integrity:
+```
+md5sum cell_towers.csv.xz
+8cf986f4a0d9f12c6f384a0e9192c908  cell_towers.csv.xz
+```
+
+Decompress it with the following command:
+```
+xz -d cell_towers.csv.xz
+```
+
+Create a table:
+
+```
+CREATE TABLE cell_towers
+(
+    radio Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5),
+    mcc UInt16,
+    net UInt16,
+    area UInt16,
+    cell UInt64,
+    unit Int16,
+    lon Float64,
+    lat Float64,
+    range UInt32,
+    samples UInt32,
+    changeable UInt8,
+    created DateTime,
+    updated DateTime,
+    averageSignal UInt8
+)
+ENGINE = MergeTree ORDER BY (radio, mcc, net, created);
+```
+
+Insert the dataset:
+```
+clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv
+```
+
+
+## Run some queries
+
+Number of cell towers by type:
+```
+SELECT radio, count() AS c FROM cell_towers GROUP BY radio ORDER BY c DESC
+
+┌─radio─┬────────c─┐
+│ UMTS  │ 20686487 │
+│ LTE   │ 12101148 │
+│ GSM   │  9931312 │
+│ CDMA  │   556344 │
+│ NR    │      867 │
+└───────┴──────────┘
+
+5 rows in set. Elapsed: 0.011 sec. Processed 43.28 million rows, 43.28 MB (3.83 billion rows/s., 3.83 GB/s.)
+```
+
+Cell towers by mobile country code (MCC):
+```
+SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10
+
+┌─mcc─┬─count()─┐
+│ 310 │ 5024650 │
+│ 262 │ 2622423 │
+│ 250 │ 1953176 │
+│ 208 │ 1891187 │
+│ 724 │ 1836150 │
+│ 404 │ 1729151 │
+│ 234 │ 1618924 │
+│ 510 │ 1353998 │
+│ 440 │ 1343355 │
+│ 311 │ 1332798 │
+└─────┴─────────┘
+
+10 rows in set. Elapsed: 0.019 sec. Processed 43.28 million rows, 86.55 MB (2.33 billion rows/s., 4.65 GB/s.)
+```
+
+See the dictionary here: [https://en.wikipedia.org/wiki/Mobile_country_code](https://en.wikipedia.org/wiki/Mobile_country_code).
+
+So, the top countries are USA, Germany and Russia.
+
+You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts/) in ClickHouse to decode these values.
+
+
+### Example of using `pointInPolygon` function
+
+Create a table where we will store polygons:
+
+```
+CREATE TEMPORARY TABLE moscow (polygon Array(Tuple(Float64, Float64)));
+```
+
+This is a rough shape of Moscow (without "new Moscow"):
+
+```
+INSERT INTO moscow VALUES ([(37.84172564285271, 55.78000432402266), (37.8381207618713, 55.775874525970494), (37.83979446823122, 55.775626746008065), (37.84243326983639, 55.77446586811748), (37.84262672750849, 55.771974101091104), (37.84153238623039, 55.77114545193181), (37.841124690460184, 55.76722010265554), (37.84239076983644, 55.76654891107098), (37.842283558197025, 55.76258709833121), (37.8421759312134, 55.758073999993734), (37.84198330422974, 55.75381499999371), (37.8416827275085, 55.749277102484484), (37.84157576190186, 55.74794544108413), (37.83897929098507, 55.74525257875241), (37.83739676451868, 55.74404373042019), (37.838732481460525, 55.74298009816793), (37.841183997352545, 55.743060321833575), (37.84097476190185, 55.73938799999373), (37.84048155819702, 55.73570799999372), (37.840095812164286, 55.73228210777237), (37.83983814285274, 55.73080491981639), (37.83846476321406, 55.729799917464675), (37.83835745269769, 55.72919751082619), (37.838636380279524, 55.72859509486539), (37.8395161005249, 55.727705075632784), (37.83897964285276, 55.722727886185154), (37.83862557539366, 55.72034817326636), (37.83559735744853, 55.71944437307499), (37.835370708803126, 55.71831419154461), (37.83738169402022, 55.71765218986692), (37.83823396494291, 55.71691750159089), (37.838056931213345, 55.71547311301385), (37.836812846557606, 55.71221445615604), (37.83522525396725, 55.709331054395555), (37.83269301586908, 55.70953687463627), (37.829667367706236, 55.70903403789297), (37.83311126588435, 55.70552351822608), (37.83058993121339, 55.70041317726053), (37.82983872750851, 55.69883771404813), (37.82934501586913, 55.69718947487017), (37.828926414016685, 55.69504441658371), (37.82876530422971, 55.69287499999378), (37.82894754100031, 55.690759754047335), (37.827697554878185, 55.68951421135665), (37.82447346292115, 55.68965045405069), (37.83136543914793, 55.68322046195302), (37.833554015869154, 55.67814012759211), (37.83544184655761, 55.67295011628339), (37.837480388885474, 55.6672498719639), (37.838960677246064, 55.66316274139358), (37.83926093121332, 55.66046999999383), (37.839025050262435, 55.65869897264431), (37.83670784390257, 55.65794084879904), (37.835656529083245, 55.65694309303843), (37.83704060449217, 55.65689306460552), (37.83696819873806, 55.65550363526252), (37.83760389616388, 55.65487847246661), (37.83687972750851, 55.65356745541324), (37.83515216004943, 55.65155951234079), (37.83312418518067, 55.64979413590619), (37.82801726983639, 55.64640836412121), (37.820614174591, 55.64164525405531), (37.818908190475426, 55.6421883258084), (37.81717543386075, 55.64112490388471), (37.81690987037274, 55.63916106913107), (37.815099354492155, 55.637925371757085), (37.808769150787356, 55.633798276884455), (37.80100123544311, 55.62873670012244), (37.79598013491824, 55.62554336109055), (37.78634567724606, 55.62033499605651), (37.78334147619623, 55.618768681480326), (37.77746201055901, 55.619855533402706), (37.77527329626457, 55.61909966711279), (37.77801986242668, 55.618770300976294), (37.778212973541216, 55.617257701952106), (37.77784818518065, 55.61574504433011), (37.77016867724609, 55.61148576294007), (37.760191219573976, 55.60599579539028), (37.75338926983641, 55.60227892751446), (37.746329965606634, 55.59920577639331), (37.73939925396728, 55.59631430313617), (37.73273665739439, 55.5935318803559), (37.7299954450912, 55.59350760316188), (37.7268679946899, 55.59469840523759), (37.72626726983634, 55.59229549697373), (37.7262673598022, 55.59081598950582), (37.71897193121335, 55.5877595845419), (37.70871550793456, 55.58393177431724), (37.700497489410374, 55.580917323756644), (37.69204305026244, 55.57778089778455), (37.68544477378839, 55.57815154690915), (37.68391050793454, 55.57472945079756), (37.678803592590306, 55.57328235936491), (37.6743402539673, 55.57255251445782), (37.66813862698363, 55.57216388774464), (37.617927457672096, 55.57505691895805), (37.60443099999999, 55.5757737568051), (37.599683515869145, 55.57749105910326), (37.59754177842709, 55.57796291823627), (37.59625834786988, 55.57906686095235), (37.59501783265684, 55.57746616444403), (37.593090671936025, 55.57671634534502), (37.587018007904, 55.577944600233785), (37.578692203704804, 55.57982895000019), (37.57327546607398, 55.58116294118248), (37.57385012109279, 55.581550362779), (37.57399562266922, 55.5820107079112), (37.5735356072979, 55.58226289171689), (37.57290393054962, 55.582393529795155), (37.57037722355653, 55.581919415056234), (37.5592298306885, 55.584471614867844), (37.54189249206543, 55.58867650795186), (37.5297256269836, 55.59158133551745), (37.517837865081766, 55.59443656218868), (37.51200186508174, 55.59635625174229), (37.506808949737554, 55.59907823904434), (37.49820432275389, 55.6062944994944), (37.494406071441674, 55.60967103463367), (37.494760001358024, 55.61066689753365), (37.49397137107085, 55.61220931698269), (37.49016528606031, 55.613417718449064), (37.48773249206542, 55.61530616333343), (37.47921386508177, 55.622640129112334), (37.470652153442394, 55.62993723476164), (37.46273446298218, 55.6368075123157), (37.46350692265317, 55.64068225239439), (37.46050283203121, 55.640794546982576), (37.457627470916734, 55.64118904154646), (37.450718034393326, 55.64690488145138), (37.44239252645875, 55.65397824729769), (37.434587576721185, 55.66053543155961), (37.43582144975277, 55.661693766520735), (37.43576786245721, 55.662755031737014), (37.430982915344174, 55.664610641628116), (37.428547447097685, 55.66778515273695), (37.42945134592044, 55.668633314343566), (37.42859571562949, 55.66948145750025), (37.4262836402282, 55.670813882451405), (37.418709037048295, 55.6811141674414), (37.41922139651101, 55.68235377885389), (37.419218771842885, 55.68359335082235), (37.417196501327446, 55.684375235224735), (37.41607020370478, 55.68540557585352), (37.415640857147146, 55.68686637150793), (37.414632153442334, 55.68903015131686), (37.413344899475064, 55.690896881757396), (37.41171432275391, 55.69264232162232), (37.40948282275393, 55.69455101638112), (37.40703674603271, 55.69638690385348), (37.39607169577025, 55.70451821283731), (37.38952706878662, 55.70942491932811), (37.387778313491815, 55.71149057784176), (37.39049275399779, 55.71419814298992), (37.385557272491454, 55.7155489617061), (37.38388335714726, 55.71849856042102), (37.378368238098155, 55.7292763261685), (37.37763597123337, 55.730845879211614), (37.37890062088197, 55.73167906388319), (37.37750451918789, 55.734703664681774), (37.375610832015965, 55.734851959522246), (37.3723813571472, 55.74105626086403), (37.37014935714723, 55.746115620904355), (37.36944173016362, 55.750883999993725), (37.36975304365541, 55.76335905525834), (37.37244070571134, 55.76432079697595), (37.3724259757175, 55.76636979670426), (37.369922155757884, 55.76735417953104), (37.369892695770275, 55.76823419316575), (37.370214730163575, 55.782312184391266), (37.370493611114505, 55.78436801120489), (37.37120164550783, 55.78596427165359), (37.37284851456452, 55.7874378183096), (37.37608325135799, 55.7886695054807), (37.3764587460632, 55.78947647305964), (37.37530000265506, 55.79146512926804), (37.38235915344241, 55.79899647809345), (37.384344043655396, 55.80113596939471), (37.38594269577028, 55.80322699999366), (37.38711208598329, 55.804919036911976), (37.3880239841309, 55.806610999993666), (37.38928977249147, 55.81001864976979), (37.39038389947512, 55.81348641242801), (37.39235781481933, 55.81983538336746), (37.393709457672124, 55.82417822811877), (37.394685720901464, 55.82792275755836), (37.39557615344238, 55.830447148154136), (37.39844478226658, 55.83167107969975), (37.40019761214057, 55.83151823557964), (37.400398790382326, 55.83264967594742), (37.39659544313046, 55.83322180909622), (37.39667059524539, 55.83402792148566), (37.39682089947515, 55.83638877400216), (37.39643489154053, 55.83861656112751), (37.3955338994751, 55.84072348043264), (37.392680272491454, 55.84502158126453), (37.39241188227847, 55.84659117913199), (37.392529730163616, 55.84816071336481), (37.39486835714723, 55.85288092980303), (37.39873052645878, 55.859893456073635), (37.40272161111449, 55.86441833633205), (37.40697072750854, 55.867579567544375), (37.410007082016016, 55.868369880337), (37.4120992989502, 55.86920843741314), (37.412668021163924, 55.87055369615854), (37.41482461111453, 55.87170587948249), (37.41862266137694, 55.873183961039565), (37.42413732540892, 55.874879126654704), (37.4312182698669, 55.875614937236705), (37.43111093783558, 55.8762723478417), (37.43332105622856, 55.87706546369396), (37.43385747619623, 55.87790681284802), (37.441303050262405, 55.88027084462084), (37.44747234260555, 55.87942070143253), (37.44716141796871, 55.88072960917233), (37.44769797085568, 55.88121221323979), (37.45204320500181, 55.882080694420715), (37.45673176190186, 55.882346110794586), (37.463383999999984, 55.88252729504517), (37.46682797486874, 55.88294937719063), (37.470014457672086, 55.88361266759345), (37.47751410450743, 55.88546991372396), (37.47860317658232, 55.88534929207307), (37.48165826025772, 55.882563306475106), (37.48316434442331, 55.8815803226785), (37.483831555817645, 55.882427612793315), (37.483182967125686, 55.88372791409729), (37.483092277908824, 55.88495581062434), (37.4855716508179, 55.8875561994203), (37.486440636245746, 55.887827444039566), (37.49014203439328, 55.88897899871799), (37.493210285705544, 55.890208937135604), (37.497512451065035, 55.891342397444696), (37.49780744510645, 55.89174030252967), (37.49940333499519, 55.89239745507079), (37.50018383334346, 55.89339220941865), (37.52421672750851, 55.903869074155224), (37.52977457672118, 55.90564076517974), (37.53503220370484, 55.90661661218259), (37.54042858064267, 55.90714113744566), (37.54320461007303, 55.905645048442985), (37.545686966066306, 55.906608607018505), (37.54743976120755, 55.90788552162358), (37.55796999999999, 55.90901557907218), (37.572711542327866, 55.91059395704873), (37.57942799999998, 55.91073854155573), (37.58502865872187, 55.91009969268444), (37.58739968913264, 55.90794809960554), (37.59131567193598, 55.908713267595054), (37.612687423278814, 55.902866854295375), (37.62348079629517, 55.90041967242986), (37.635797880950896, 55.898141151686396), (37.649487626983664, 55.89639275532968), (37.65619302513125, 55.89572360207488), (37.66294133862307, 55.895295577183965), (37.66874564418033, 55.89505457604897), (37.67375601586915, 55.89254677027454), (37.67744661901856, 55.8947775867987), (37.688347, 55.89450045676125), (37.69480554232789, 55.89422926332761), (37.70107096560668, 55.89322256101114), (37.705962965606716, 55.891763491662616), (37.711885134918205, 55.889110234998974), (37.71682005026245, 55.886577568759876), (37.7199315476074, 55.88458159806678), (37.72234560316464, 55.882281005794134), (37.72364385977171, 55.8809452036196), (37.725371142837474, 55.8809722706006), (37.727870902099546, 55.88037213862385), (37.73394330422971, 55.877941504088696), (37.745339592590376, 55.87208120378722), (37.75525267724611, 55.86703807949492), (37.76919976190188, 55.859821640197474), (37.827835219574, 55.82962968399116), (37.83341438888553, 55.82575289922351), (37.83652584655761, 55.82188784027888), (37.83809213491821, 55.81612575504693), (37.83605359521481, 55.81460347077685), (37.83632178569025, 55.81276696067908), (37.838623105812026, 55.811486181656385), (37.83912198147584, 55.807329380532785), (37.839079078033414, 55.80510270463816), (37.83965844708251, 55.79940712529036), (37.840581150787344, 55.79131399999368), (37.84172564285271, 55.78000432402266)]);
+```
+
+Check how many cell towers are in Moscow:
+
+```
+SELECT count() FROM cell_towers WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow))
+
+┌─count()─┐
+│  310463 │
+└─────────┘
+
+1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.)
+```
+
+The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.tech/play?user=play), [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=).
+
+Although you cannot create temporary tables there.
+
+[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/cell-towers/) <!--hide-->
diff --git a/docs/en/getting-started/example-datasets/index.md b/docs/en/getting-started/example-datasets/index.md
index 72f44d8caf1..53007c33306 100644
--- a/docs/en/getting-started/example-datasets/index.md
+++ b/docs/en/getting-started/example-datasets/index.md
@@ -20,5 +20,6 @@ The list of documented datasets:
 -   [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
 -   [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
 -   [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md)
+-   [Cell Towers](../../getting-started/example-datasets/cell-towers.md)
 
 [Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->
diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md
index 6e46cddba52..83673cdceb6 100644
--- a/docs/en/getting-started/example-datasets/ontime.md
+++ b/docs/en/getting-started/example-datasets/ontime.md
@@ -15,17 +15,9 @@ This dataset can be obtained in two ways:
 Downloading data:
 
 ``` bash
-for s in `seq 1987 2018`
-do
-for m in `seq 1 12`
-do
-wget https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_${s}_${m}.zip
-done
-done
+echo https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_{1987..2021}_{1..12}.zip | xargs -P10 wget --no-check-certificate --continue
 ```
 
-(from https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh )
-
 Creating a table:
 
 ``` sql
@@ -145,12 +137,14 @@ ORDER BY (Carrier, FlightDate)
 SETTINGS index_granularity = 8192;
 ```
 
-Loading data:
+Loading data with multiple threads:
 
 ``` bash
-$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
+ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
 ```
 
+(if you will have memory shortage or other issues on your server, remove the `-P $(nproc)` part)
+
 ## Download of Prepared Partitions {#download-of-prepared-partitions}
 
 ``` bash
diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index 310286e3d44..18533cfc6c2 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -148,28 +148,48 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
 
 For successful requests that don’t return a data table, an empty response body is returned.
 
-You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
 
-If you specified `compress=1` in the URL, the server compresses the data it sends you.
-If you specified `decompress=1` in the URL, the server decompresses the same data that you pass in the `POST` method.
+## Compression {#compression}
 
-You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, you must append `Accept-Encoding: compression_method`. ClickHouse supports `gzip`, `br`, and `deflate` [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens). To enable HTTP compression, you must use the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the data compression level in the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting for all the compression methods.
+You can use compression to reduce network traffic when transmitting a large amount of data or for creating dumps that are immediately compressed.
 
-You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed.
+You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
 
-Examples of sending data with compression:
+If you specify `compress=1` in the URL, the server will compress the data it sends to you. If you specify `decompress=1` in the URL, the server will decompress the data which you pass in the `POST` method.
 
-``` bash
-#Sending data to the server:
-$ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
+You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). ClickHouse supports the following [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens):
 
-#Sending data to the client:
-$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
-```
+- `gzip`
+- `br`
+- `deflate`
+- `xz`
+
+To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`.
+In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods.
 
 !!! note "Note"
     Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly.
 
+**Examples**
+
+``` bash
+# Sending compressed data to the server
+$ echo "SELECT 1" | gzip -c | \
+  curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
+```
+
+``` bash
+# Receiving compressed data from the server
+$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
+    -H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
+$ zcat result.gz
+0
+1
+2
+```
+
+## Default Database {#default-database}
+
 You can use the ‘database’ URL parameter or the ‘X-ClickHouse-Database’ header to specify the default database.
 
 ``` bash
diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md
index 7b096b76f75..ec7e4239a9d 100644
--- a/docs/en/operations/caches.md
+++ b/docs/en/operations/caches.md
@@ -8,18 +8,21 @@ toc_title: Caches
 When performing queries, ClichHouse uses different caches.
 
 Main cache types:
+
 - `mark_cache` — Cache of marks used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family.
 - `uncompressed_cache` — Cache of uncompressed data used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family.
 
 Additional cache types:
-- DNS cache
-- [regexp](../interfaces/formats.md#data-format-regexp) cache
-- compiled expressions cache
-- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache
-- [dictionaries data cache](../sql-reference/dictionaries/index.md)
+
+- DNS cache.
+- [Regexp](../interfaces/formats.md#data-format-regexp) cache.
+- Compiled expressions cache.
+- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache.
+- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
 
 Indirectly used:
-- OS page cache
+
+- OS page cache.
 
 To drop cache, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md) statements.
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 6440f09bb40..3c343e09fd3 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1104,7 +1104,7 @@ The maximum number of replicas for each shard when executing a query. In limited
 - the sampling key is an expression that is expensive to calculate
 - the cluster's latency distribution has a long tail, so that querying more servers increases the query's overall latency
 
-In addition, this setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain conditions. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
+In addition, this setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain conditions. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details.
 
 ## compile {#compile}
 
@@ -1956,8 +1956,8 @@ Default value: 16.
 
 **See Also**
 
--   [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine
--   [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine
+-   [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine.
+-   [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine.
 
 ## validate_polygons {#validate_polygons}
 
@@ -2658,8 +2658,6 @@ Result:
 
 Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
 
-[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
-
 ## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
 
 Allows to select data from a file engine table without file.
@@ -2679,3 +2677,16 @@ Possible values:
 - 1 — Enabled.
 
 Default value: `0`.
+
+## allow_experimental_geo_types {#allow-experimental-geo-types}
+
+Allows working with experimental [geo data types](../../sql-reference/data-types/geo.md).
+
+Possible values:
+
+-   0 — Working with geo data types is disabled.
+-   1 — Working with geo data types is enabled.
+
+Default value: `0`.
+
+[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md
index c252458af8a..fa871d215b5 100644
--- a/docs/en/operations/system-tables/distributed_ddl_queue.md
+++ b/docs/en/operations/system-tables/distributed_ddl_queue.md
@@ -14,7 +14,7 @@ Columns:
 -   `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query.
 -   `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time.
 -   `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time.
--   `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution (in milliseconds).
+-   `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration of query execution (in milliseconds).
 -   `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ZooKeeper](../../operations/tips.md#zookeeper).
 
 **Example**
diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md
index 5dc23aee686..e66f082167e 100644
--- a/docs/en/operations/system-tables/index.md
+++ b/docs/en/operations/system-tables/index.md
@@ -20,7 +20,7 @@ System tables:
 
 Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
 
-Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), crash_log and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
+Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
 
 System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are:
 
@@ -33,7 +33,7 @@ System log tables can be customized by creating a config file with the same name
 
 An example:
 
-```
+```xml
 <yandex>
     <query_log>
         <database>system</database>
diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index 04f9f3660b5..cfabf42bff1 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -91,6 +91,8 @@ $ clickhouse-local --query "
 
 Now let’s output memory user for each Unix user:
 
+Query:
+
 ``` bash
 $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
     | clickhouse-local --structure "user String, mem Float64" \
@@ -98,6 +100,8 @@ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
             FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
 ```
 
+Result:
+
 ``` text
 Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
 ┏━━━━━━━━━━┳━━━━━━━━━━┓
diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
index 035bc91b9ed..c6c97b5428b 100644
--- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
@@ -253,8 +253,8 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
 
 **Parameters**
 
--   `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
--   `mode` - It is an optional parameter.
+-   `window` — Length of the sliding window. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
+-   `mode` - It is an optional argument.
     -   `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
 
 **Returned value**
diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
index 7639117042f..72aa607a751 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
@@ -52,15 +52,15 @@ Input table:
 Query:
 
 ``` sql
-SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
+SELECT argMax(user, salary), argMax(tuple(user, salary), salary), argMax(tuple(user, salary)) FROM salary;
 ```
 
 Result:
 
 ``` text
-┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
-│ director             │ ('director',5000)           │
-└──────────────────────┴─────────────────────────────┘
+┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┬─argMax(tuple(user, salary))─┐
+│ director             │ ('director',5000)                   │ ('director',5000)           │
+└──────────────────────┴─────────────────────────────────────┴─────────────────────────────┘
 ```
 
 [Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md
index 12dc4ac1e9d..d53a47a36a3 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/avg.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md
@@ -9,7 +9,7 @@ Calculates the arithmetic mean.
 **Syntax**
 
 ``` sql
-avgWeighted(x)
+avg(x)
 ```
 
 **Arguments**
diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md
new file mode 100644
index 00000000000..9ed328e0de6
--- /dev/null
+++ b/docs/en/sql-reference/data-types/geo.md
@@ -0,0 +1,106 @@
+---
+toc_priority: 62
+toc_title: Geo
+---
+
+# Geo Data Types {#geo-data-types}
+
+Clickhouse supports data types for representing geographical objects — locations, lands, etc. 
+
+!!! warning "Warning"
+    Currently geo data types are an experimental feature. To work with them you must set `allow_experimental_geo_types = 1`.
+
+**See Also**
+- [Representing simple geographical features](https://en.wikipedia.org/wiki/GeoJSON).
+- [allow_experimental_geo_types](../../operations/settings/settings.md#allow-experimental-geo-types) setting.
+
+## Point {#point-data-type}
+
+`Point` is represented by its X and Y coordinates, stored as a [Tuple](tuple.md)([Float64](float.md), [Float64](float.md)).
+
+**Example**
+
+Query:
+
+```sql
+SET allow_experimental_geo_types = 1;
+CREATE TABLE geo_point (p Point) ENGINE = Memory();
+INSERT INTO geo_point VALUES((10, 10));
+SELECT p, toTypeName(p) FROM geo_point;
+```
+Result: 
+
+``` text
+┌─p─────┬─toTypeName(p)─┐
+│ (10,10) │ Point         │
+└───────┴───────────────┘
+```
+
+## Ring {#ring-data-type}
+
+`Ring` is a simple polygon without holes stored as an array of points: [Array](array.md)([Point](#point-data-type)).
+
+**Example**
+
+Query:
+
+```sql
+SET allow_experimental_geo_types = 1;
+CREATE TABLE geo_ring (r Ring) ENGINE = Memory();
+INSERT INTO geo_ring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]);
+SELECT r, toTypeName(r) FROM geo_ring;
+```
+Result: 
+
+``` text
+┌─r─────────────────────────────┬─toTypeName(r)─┐
+│ [(0,0),(10,0),(10,10),(0,10)] │ Ring          │
+└───────────────────────────────┴───────────────┘
+```
+
+## Polygon {#polygon-data-type}
+
+`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring-data-type)). First element of outer array is the outer shape of polygon and all the following elements are holes.
+
+**Example**
+
+This is a polygon with one hole:
+
+```sql
+SET allow_experimental_geo_types = 1;
+CREATE TABLE geo_polygon (pg Polygon) ENGINE = Memory();
+INSERT INTO geo_polygon VALUES([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]]);
+SELECT pg, toTypeName(pg) FROM geo_polygon;
+```
+
+Result: 
+
+``` text
+┌─pg────────────────────────────────────────────────────────────┬─toTypeName(pg)─┐
+│ [[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] │ Polygon        │
+└───────────────────────────────────────────────────────────────┴────────────────┘
+```
+
+## MultiPolygon {#multipolygon-data-type}
+
+`MultiPolygon` consists of multiple polygons and is stored as an array of polygons: [Array](array.md)([Polygon](#polygon-data-type)). 
+
+**Example**
+
+This multipolygon consists of two separate polygons — the first one without holes, and the second with one hole:
+
+```sql
+SET allow_experimental_geo_types = 1;
+CREATE TABLE geo_multipolygon (mpg MultiPolygon) ENGINE = Memory();
+INSERT INTO geo_multipolygon VALUES([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]);
+SELECT mpg, toTypeName(mpg) FROM geo_multipolygon;
+```
+Result: 
+
+``` text
+┌─mpg─────────────────────────────────────────────────────────────────────────────────────────────┬─toTypeName(mpg)─┐
+│ [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] │ MultiPolygon    │
+└─────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┘
+```
+
+[Original article](https://clickhouse.tech/docs/en/data-types/geo/) <!--hide-->
diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md
index 2d2746f85d3..244779c5ca8 100644
--- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md
+++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md
@@ -21,7 +21,11 @@ The following aggregate functions are supported:
 -   [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md)
 -   [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md)
 
-Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function.
+
+!!! note "Note"
+    Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes.
+    
+    `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function.
 
 **Parameters**
 
@@ -31,11 +35,7 @@ Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way
 **Example**
 
 ``` sql
-CREATE TABLE t
-(
-    column1 SimpleAggregateFunction(sum, UInt64),
-    column2 SimpleAggregateFunction(any, String)
-) ENGINE = ...
+CREATE TABLE simple (id UInt64, val SimpleAggregateFunction(sum, Double)) ENGINE=AggregatingMergeTree ORDER BY id;
 ```
 
 [Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) <!--hide-->
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index f26e1bee6c9..c557d8f369c 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -61,40 +61,58 @@ int32samoa: 1546300800
 
 Converts a date or date with time to a UInt16 number containing the year number (AD).
 
+Alias: `YEAR`.
+
 ## toQuarter {#toquarter}
 
 Converts a date or date with time to a UInt8 number containing the quarter number.
 
+Alias: `QUARTER`.
+
 ## toMonth {#tomonth}
 
 Converts a date or date with time to a UInt8 number containing the month number (1-12).
 
+Alias: `MONTH`.
+
 ## toDayOfYear {#todayofyear}
 
 Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366).
 
+Alias: `DAYOFYEAR`.
+
 ## toDayOfMonth {#todayofmonth}
 
 Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31).
 
+Aliases: `DAYOFMONTH`, `DAY`.
+
 ## toDayOfWeek {#todayofweek}
 
 Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7).
 
+Alias: `DAYOFWEEK`.
+
 ## toHour {#tohour}
 
 Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23).
 This function assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true – even in Moscow the clocks were twice changed at a different time).
 
+Alias: `HOUR`.
+
 ## toMinute {#tominute}
 
 Converts a date with time to a UInt8 number containing the number of the minute of the hour (0-59).
 
+Alias: `MINUTE`.
+
 ## toSecond {#tosecond}
 
 Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59).
 Leap seconds are not accounted for.
 
+Alias: `SECOND`.
+
 ## toUnixTimestamp {#to-unix-timestamp}
 
 For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
@@ -753,7 +771,7 @@ This is necessary for searching for pageviews in the corresponding session.
 
 ## formatDateTime {#formatdatetime}
 
-Function formats a Time according given Format string. N.B.: Format is a constant expression, e.g. you can not have multiple formats for single result column.
+Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.
 
 **Syntax**
 
@@ -812,31 +830,32 @@ Result:
 └────────────────────────────────────────────┘
 ```
 
-[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) <!--hide-->
-
 ## FROM\_UNIXTIME {#fromunixfime}
 
-When there is only single argument of integer type, it act in the same way as `toDateTime` and return [DateTime](../../sql-reference/data-types/datetime.md).
-type.
+Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
 
-For example:
+**Example:**
+
+Query:
 
 ```sql
-SELECT FROM_UNIXTIME(423543535)
+SELECT FROM_UNIXTIME(423543535);
 ```
 
+Result:
+
 ```text
 ┌─FROM_UNIXTIME(423543535)─┐
 │      1983-06-04 10:58:55 │
 └──────────────────────────┘
 ```
 
-When there are two arguments, first is integer or DateTime, second is constant format string, it act in the same way as `formatDateTime` and return `String` type.
+When there are two arguments: first is an [Integer](../../sql-reference/data-types/int-uint.md) or [DateTime](../../sql-reference/data-types/datetime.md), second is a constant format string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.
 
 For example:
 
 ```sql
-SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime
+SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
 ```
 
 ```text
@@ -988,3 +1007,5 @@ Result:
 │ 2020-01-01                         │
 └────────────────────────────────────┘
 ```
+
+[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) <!--hide-->
diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md
index 31e84c08b39..c1013ebb0e1 100644
--- a/docs/en/sql-reference/functions/encoding-functions.md
+++ b/docs/en/sql-reference/functions/encoding-functions.md
@@ -75,6 +75,8 @@ Result:
 
 Returns a string containing the argument’s hexadecimal representation.
 
+Alias: `HEX`.
+
 **Syntax**
 
 ``` sql
diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md
index 0dd7469b25e..df27685dcb3 100644
--- a/docs/en/sql-reference/functions/encryption-functions.md
+++ b/docs/en/sql-reference/functions/encryption-functions.md
@@ -55,7 +55,7 @@ CREATE TABLE encryption_test
     `comment` String,
     `secret` String
 )
-ENGINE = Memory
+ENGINE = Memory;
 ```
 
 Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes:
@@ -110,7 +110,7 @@ Result:
 
 Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
 
-Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`.
+Will produce the same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `iv`.
 
 Supported encryption modes:
 
@@ -132,13 +132,12 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
 -   `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
 -   `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
 -   `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
--   `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
+-   `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
 
 **Returned value**
 
 - Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
 
-
 **Examples**
 
 Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext:
@@ -157,7 +156,6 @@ Result:
 └───────────────────┘
 ```
 
-
 But `encrypt` fails when `key` or `iv` is longer than expected:
 
 Query:
@@ -252,7 +250,7 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
 
 **Examples**
 
-Re-using table from [encrypt](./encryption-functions.md#encrypt).
+Re-using table from [encrypt](#encrypt).
 
 Query:
 
@@ -284,6 +282,7 @@ SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920
 ```
 
 Result:
+
 ``` text
 ┌─comment─────────────────────────────┬─plaintext─┐
 │ aes-256-cfb128 no IV                │ Secret    │
@@ -294,7 +293,7 @@ Result:
 └─────────────────────────────────────┴───────────┘
 ```
 
-Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
+Notice how only a portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
 
 ## aes_decrypt_mysql {#aes_decrypt_mysql}
 
@@ -331,6 +330,7 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
 **Examples**
 
 Let's decrypt data we've previously encrypted with MySQL:
+
 ``` sql
 mysql> SET  block_encryption_mode='aes-256-cfb128';
 Query OK, 0 rows affected (0.00 sec)
@@ -345,11 +345,13 @@ mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviv
 ```
 
 Query:
+
 ``` sql
 SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
 ```
 
 Result:
+
 ``` text
 ┌─plaintext─┐
 │ Secret    │
diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md
index df75e96c8fb..f57f0f7e27d 100644
--- a/docs/en/sql-reference/functions/functions-for-nulls.md
+++ b/docs/en/sql-reference/functions/functions-for-nulls.md
@@ -13,6 +13,8 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal
 isNull(x)
 ```
 
+Alias: `ISNULL`.
+
 **Arguments**
 
 -   `x` — A value with a non-compound data type.
diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 6bf1bebabaa..465ad01527f 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -9,7 +9,7 @@ Hash functions can be used for the deterministic pseudo-random shuffling of elem
 
 ## halfMD5 {#hash-functions-halfmd5}
 
-[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
+[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
 
 ``` sql
 halfMD5(par1, ...)
@@ -54,7 +54,7 @@ sipHash64(par1,...)
 
 This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function.
 
-Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
+Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
 
 1.  After hashing all the input parameters, the function gets the array of hashes.
 2.  Function takes the first and the second elements and calculates a hash for the array of them.
diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index eaea5e250fb..2209b042084 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -9,10 +9,14 @@ toc_title: IP Addresses
 
 Takes a UInt32 number. Interprets it as an IPv4 address in big endian. Returns a string containing the corresponding IPv4 address in the format A.B.C.d (dot-separated numbers in decimal form).
 
+Alias: `INET_NTOA`.
+
 ## IPv4StringToNum(s) {#ipv4stringtonums}
 
 The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it returns 0.
 
+Alias: `INET_ATON`.
+
 ## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum}
 
 Similar to IPv4NumToString, but using xxx instead of the last octet.
@@ -49,7 +53,11 @@ Since using ‘xxx’ is highly unusual, this may be changed in the future. We r
 ### IPv6NumToString(x) {#ipv6numtostringx}
 
 Accepts a FixedString(16) value containing the IPv6 address in binary format. Returns a string containing this address in text format.
-IPv6-mapped IPv4 addresses are output in the format ::ffff:111.222.33.44. Examples:
+IPv6-mapped IPv4 addresses are output in the format ::ffff:111.222.33.44. 
+
+Alias: `INET6_NTOA`.
+
+Examples:
 
 ``` sql
 SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr
@@ -119,6 +127,8 @@ The reverse function of IPv6NumToString. If the IPv6 address has an invalid form
 If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
 HEX can be uppercase or lowercase.
 
+Alias: `INET6_ATON`.
+
 ``` sql
 SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
 ```
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 191bd100dda..2c08fa3acb7 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -98,6 +98,8 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
 
 Repeats a string as many times as specified and concatenates the replicated values as a single string.
 
+Alias: `REPEAT`.
+
 **Syntax**
 
 ``` sql
@@ -276,10 +278,14 @@ Returns the string ‘s’ that was converted from the encoding in ‘from’ to
 
 Encodes ‘s’ string into base64
 
+Alias: `TO_BASE64`.
+
 ## base64Decode(s) {#base64decode}
 
 Decode base64-encoded string ‘s’ into original string. In case of failure raises an exception.
 
+Alias: `FROM_BASE64`.
+
 ## tryBase64Decode(s) {#trybase64decode}
 
 Similar to base64Decode, but in case of error an empty string would be returned.
diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index 2b3a9d9103f..1d4839cbbf9 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -174,4 +174,129 @@ Result:
 └──────────────────────────────┴───────────────────────────────────┘
 ```
 
+## mapContains {#mapcontains}
+
+Determines  whether the `map` contains the `key` parameter.
+
+**Syntax**
+
+``` sql
+mapContains(map, key)
+```
+
+**Parameters** 
+
+-   `map` — Map. [Map](../../sql-reference/data-types/map.md).
+-   `key` — Key. Type matches the type of keys of `map` parameter.
+
+**Returned value**
+
+-   `1` if `map` contains `key`, `0` if not.
+
+Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
+
+INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
+
+SELECT mapContains(a, 'name') FROM test;
+
+```
+
+Result:
+
+```text
+┌─mapContains(a, 'name')─┐
+│                      1 │
+│                      0 │
+└────────────────────────┘
+```
+
+## mapKeys {#mapkeys}
+
+Returns all keys from the `map` parameter.
+
+**Syntax**
+
+```sql
+mapKeys(map)
+```
+
+**Parameters**
+
+-   `map` — Map. [Map](../../sql-reference/data-types/map.md).
+
+**Returned value**
+
+-   Array containing all keys from the `map`.
+
+Type: [Array](../../sql-reference/data-types/array.md).
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
+
+INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
+
+SELECT mapKeys(a) FROM test;
+```
+
+Result:
+
+```text
+┌─mapKeys(a)────────────┐
+│ ['name','age']        │
+│ ['number','position'] │
+└───────────────────────┘
+```
+
+## mapValues {#mapvalues}
+
+Returns all values from the `map` parameter.
+
+**Syntax**
+
+```sql
+mapKeys(map)
+```
+
+**Parameters**
+
+-   `map` — Map. [Map](../../sql-reference/data-types/map.md).
+
+**Returned value**
+
+-   Array containing all the values from `map`.
+
+Type: [Array](../../sql-reference/data-types/array.md).
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
+
+INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
+
+SELECT mapValues(a) FROM test;
+```
+
+Result:
+
+```text
+┌─mapValues(a)─────┐
+│ ['eleven','11']  │
+│ ['twelve','6.0'] │
+└──────────────────┘
+```
+
 [Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->
diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 0ea2bf0f1a6..8a793b99ac9 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -36,10 +36,14 @@ The behavior of functions for the [NaN and Inf](../../sql-reference/data-types/f
 
 **Example**
 
+Query:
+
 ``` sql
-SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
+SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8);
 ```
 
+Result:
+
 ``` text
 ┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
 │ -9223372036854775808 │          32 │            16 │           8 │
@@ -52,10 +56,14 @@ It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 3
 
 **Example**
 
+Query:
+
 ``` sql
-select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
+SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123');
 ```
 
+Result:
+
 ``` text
 ┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
 │                  123123 │                         0 │
@@ -68,10 +76,14 @@ It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 3
 
 **Example**
 
+Query:
+
 ``` sql
-select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
+SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123');
 ```
 
+Result:
+
 ``` text
 ┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
 │                  123123 │                      ᴺᵁᴸᴸ │
@@ -102,10 +114,14 @@ The behavior of functions for negative agruments and for the [NaN and Inf](../..
 
 **Example**
 
+Query:
+
 ``` sql
-SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
+SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
 ```
 
+Result:
+
 ``` text
 ┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
 │ 9223372036854775808 │    4294967264 │             16 │            8 │
@@ -124,6 +140,8 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
 
 ## toDate {#todate}
 
+Alias: `DATE`.
+
 ## toDateOrZero {#todateorzero}
 
 ## toDateOrNull {#todateornull}
@@ -168,20 +186,28 @@ A value in the `Nullable(Decimal(P,S))` data type. The value contains:
 
 **Examples**
 
+Query:
+
 ``` sql
-SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val)
+SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
 ```
 
+Result:
+
 ``` text
 ┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
 │ -1.11100 │ Nullable(Decimal(9, 5))                            │
 └──────────┴────────────────────────────────────────────────────┘
 ```
 
+Query:
+
 ``` sql
-SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
+SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val);
 ```
 
+Result:
+
 ``` text
 ┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
 │ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2))                            │
@@ -213,20 +239,28 @@ A value in the `Nullable(Decimal(P,S))` data type. The value contains:
 
 **Example**
 
+Query:
+
 ``` sql
-SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val)
+SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
 ```
 
+Result:
+
 ``` text
 ┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
 │ -1.11100 │ Decimal(9, 5)                                      │
 └──────────┴────────────────────────────────────────────────────┘
 ```
 
+Query:
+
 ``` sql
-SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val)
+SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val);
 ```
 
+Result:
+
 ``` text
 ┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
 │ 0.00 │ Decimal(9, 2)                                      │
@@ -258,12 +292,18 @@ Conversion between numeric types uses the same rules as assignments between diff
 
 Additionally, the toString function of the DateTime argument can take a second String argument containing the name of the time zone. Example: `Asia/Yekaterinburg` In this case, the time is formatted according to the specified time zone.
 
+**Example**
+
+Query:
+
 ``` sql
 SELECT
     now() AS now_local,
-    toString(now(), 'Asia/Yekaterinburg') AS now_yekat
+    toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
 ```
 
+Result:
+
 ``` text
 ┌───────────now_local─┬─now_yekat───────────┐
 │ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
@@ -281,36 +321,81 @@ If the string has fewer bytes than N, it is padded with null bytes to the right.
 
 Accepts a String or FixedString argument. Returns the String with the content truncated at the first zero byte found.
 
-Example:
+**Example**
+
+Query:
 
 ``` sql
-SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut
+SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut;
 ```
 
+Result:
+
 ``` text
 ┌─s─────────────┬─s_cut─┐
 │ foo\0\0\0\0\0 │ foo   │
 └───────────────┴───────┘
 ```
 
+Query:
+
 ``` sql
-SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
+SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
 ```
 
+Result:
+
 ``` text
 ┌─s──────────┬─s_cut─┐
 │ foo\0bar\0 │ foo   │
 └────────────┴───────┘
 ```
 
+## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
+
+## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}
+
+## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264}
+
+## reinterpretAsDate {#reinterpretasdate}
+
+## reinterpretAsDateTime {#reinterpretasdatetime}
+
+These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn’t long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch.
+
+## reinterpretAsString {#type_conversion_functions-reinterpretAsString}
+
+This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
+
+## reinterpretAsFixedString {#reinterpretasfixedstring}
+
+This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
+
+## reinterpretAsUUID {#reinterpretasuuid}
+
+This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. 
+
+**Syntax**
+
+``` sql
+reinterpretAsUUID(fixed_string)
+```
+
+**Parameters**
+
+-   `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
+
 ## reinterpret(x, T) {#type_conversion_function-reinterpret}
 
-Performs byte reinterpretation of ‘x’ as ‘t’ data type.
+**Returned value**
 
-Following reinterpretations are allowed:
-1. Any type that has fixed size and value of that type can be represented continuously into FixedString.
-2. Any type that if value of that type can be represented continuously into String. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
-3. FixedString, String, types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
+-   The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
+
+**Examples**
+
+String to UUID.
+
+Query:
 
 ``` sql
 SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
@@ -318,39 +403,45 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
     reinterpret('1', 'UInt32') as string_to_int;
 ```
 
+Result:
+
 ``` text
-┌─int_to_uint─┬─int_to_float─┬─string_to_int─┐
-│         255 │        1e-45 │            49 │
-└─────────────┴──────────────┴───────────────┘
+┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐
+│                                  08090a0b-0c0d-0e0f-0001-020304050607 │
+└───────────────────────────────────────────────────────────────────────┘
 ```
 
-## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretAsUInt8163264256}
+Going back and forth from String to UUID.
 
-## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretAsInt8163264128256}
+Query:
 
-## reinterpretAsDecimal(32\|64\|128\|256) {#reinterpretAsDecimal3264128256}
+``` sql
+WITH
+    generateUUIDv4() AS uuid,
+    identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str,
+    reinterpretAsUUID(reverse(unhex(str))) AS uuid2
+SELECT uuid = uuid2;
+```
 
-## reinterpretAsFloat(32\|64) {#type_conversion_function-reinterpretAsFloat}
+Result:
 
-## reinterpretAsDate {#type_conversion_function-reinterpretAsDate}
-
-## reinterpretAsDateTime {#type_conversion_function-reinterpretAsDateTime}
-
-## reinterpretAsDateTime64 {#type_conversion_function-reinterpretAsDateTime64}
-
-## reinterpretAsString {#type_conversion_function-reinterpretAsString}
-
-## reinterpretAsFixedString {#type_conversion_function-reinterpretAsFixedString}
-
-## reinterpretAsUUID {#type_conversion_function-reinterpretAsUUID}
-
-These functions are aliases for `reinterpret` function.
+``` text
+┌─equals(uuid, uuid2)─┐
+│                   1 │
+└─────────────────────┘
+```
 
 ## CAST(x, T) {#type_conversion_function-cast}
 
-Converts ‘x’ to the ‘t’ data type. The syntax CAST(x AS t) is also supported.
+Converts input value `x` to the `T` data type.
 
-Example:
+The syntax `CAST(x AS t)` is also supported.
+
+Note, that if value `x` does not fit the bounds of type T, the function overflows. For example, CAST(-1, 'UInt8') returns 255.
+
+**Example**
+
+Query:
 
 ``` sql
 SELECT
@@ -358,9 +449,11 @@ SELECT
     CAST(timestamp AS DateTime) AS datetime,
     CAST(timestamp AS Date) AS date,
     CAST(timestamp, 'String') AS string,
-    CAST(timestamp, 'FixedString(22)') AS fixed_string
+    CAST(timestamp, 'FixedString(22)') AS fixed_string;
 ```
 
+Result:
+
 ``` text
 ┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐
 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │
@@ -369,12 +462,18 @@ SELECT
 
 Conversion to FixedString(N) only works for arguments of type String or FixedString(N).
 
-Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported. Example:
+Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported. 
+
+**Example**
+
+Query:
 
 ``` sql
-SELECT toTypeName(x) FROM t_null
+SELECT toTypeName(x) FROM t_null;
 ```
 
+Result:
+
 ``` text
 ┌─toTypeName(x)─┐
 │ Int8          │
@@ -382,10 +481,14 @@ SELECT toTypeName(x) FROM t_null
 └───────────────┘
 ```
 
+Query:
+
 ``` sql
-SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
+SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null;
 ```
 
+Result:
+
 ``` text
 ┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐
 │ Nullable(UInt16)                        │
@@ -399,15 +502,19 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
 
 ## accurateCast(x, T) {#type_conversion_function-accurate-cast}
 
-Converts ‘x’ to the ‘t’ data type. The differente from cast(x, T) is that accurateCast
-does not allow overflow of numeric types during cast if type value x does not fit
-bounds of type T.
+Converts `x` to the `T` data type. 
+
+The difference from [cast(x, T)](#type_conversion_function-cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception.
+
+**Example**
+
+Query:
 
-Example
 ``` sql
 SELECT cast(-1, 'UInt8') as uint8;
 ```
 
+Result:
 
 ``` text
 ┌─uint8─┐
@@ -415,38 +522,46 @@ SELECT cast(-1, 'UInt8') as uint8;
 └───────┘
 ```
 
+Query:
+
 ```sql
 SELECT accurateCast(-1, 'UInt8') as uint8;
 ```
 
+Result:
+
 ``` text
 Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
-
 ```
 
 ## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
 
-Converts ‘x’ to the ‘t’ data type. Always returns nullable type and returns NULL
-if the casted value is not representable in the target type.
+Converts input value `x` to the specified data type `T`. Always returns [Nullable](../../sql-reference/data-types/nullable.md) type and returns [NULL](../../sql-reference/syntax.md#null-literal) if the casted value is not representable in the target type.
 
-Example:
+**Syntax**
+
+```sql
+accurateCastOrNull(x, T)
+```
+
+**Parameters**
+
+-   `x` — Input value.
+-   `T` — The name of the returned data type.
+
+**Returned value**
+
+-   The value, converted to the specified data type `T`.
+
+**Example**
+
+Query:
 
 ``` sql
-SELECT
-    accurateCastOrNull(-1, 'UInt8') as uint8,
-    accurateCastOrNull(128, 'Int8') as int8,
-    accurateCastOrNull('Test', 'FixedString(2)') as fixed_string
+SELECT toTypeName(accurateCastOrNull(5, 'UInt8'));
 ```
 
-``` text
-┌─uint8─┬─int8─┬─fixed_string─┐
-│  ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ         │
-└───────┴──────┴──────────────┘┘
-```
-
-``` sql
-SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
-```
+Result:
 
 ``` text
 ┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
@@ -454,6 +569,23 @@ SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
 └────────────────────────────────────────────┘
 ```
 
+Query:
+
+``` sql
+SELECT
+    accurateCastOrNull(-1, 'UInt8') as uint8,
+    accurateCastOrNull(128, 'Int8') as int8,
+    accurateCastOrNull('Test', 'FixedString(2)') as fixed_string;
+```
+
+Result:
+
+``` text
+┌─uint8─┬─int8─┬─fixed_string─┐
+│  ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ         │
+└───────┴──────┴──────────────┘
+```
+
 ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
 
 Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type.
@@ -481,6 +613,8 @@ toIntervalYear(number)
 
 **Example**
 
+Query:
+
 ``` sql
 WITH
     toDate('2019-01-01') AS date,
@@ -488,9 +622,11 @@ WITH
     toIntervalWeek(1) AS interval_to_week
 SELECT
     date + interval_week,
-    date + interval_to_week
+    date + interval_to_week;
 ```
 
+Result:
+
 ``` text
 ┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
 │                2019-01-08 │                   2019-01-08 │
@@ -506,7 +642,7 @@ The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 112
 **Syntax**
 
 ``` sql
-parseDateTimeBestEffort(time_string [, time_zone]);
+parseDateTimeBestEffort(time_string [, time_zone])
 ```
 
 **Arguments**
@@ -549,7 +685,7 @@ Query:
 
 ``` sql
 SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
-AS parseDateTimeBestEffort
+AS parseDateTimeBestEffort;
 ```
 
 Result:
@@ -564,7 +700,7 @@ Query:
 
 ``` sql
 SELECT parseDateTimeBestEffort('1284101485')
-AS parseDateTimeBestEffort
+AS parseDateTimeBestEffort;
 ```
 
 Result:
@@ -579,7 +715,7 @@ Query:
 
 ``` sql
 SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
-AS parseDateTimeBestEffort
+AS parseDateTimeBestEffort;
 ```
 
 Result:
@@ -593,7 +729,7 @@ Result:
 Query:
 
 ``` sql
-SELECT parseDateTimeBestEffort('10 20:19')
+SELECT parseDateTimeBestEffort('10 20:19');
 ```
 
 Result:
@@ -613,12 +749,12 @@ Result:
 
 ## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS}
 
-This function is similar to [‘parseDateTimeBestEffort’](#parsedatetimebesteffort), the only difference is that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity.
+This function is similar to [parseDateTimeBestEffort](#parsedatetimebesteffort), the only difference is that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity.
 
 **Syntax**
 
 ``` sql
-parseDateTimeBestEffortUS(time_string [, time_zone]);
+parseDateTimeBestEffortUS(time_string [, time_zone])
 ```
 
 **Arguments**
@@ -693,6 +829,178 @@ Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it r
 
 Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed.
 
+## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull}
+
+Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns `NULL` when it encounters a date format that cannot be processed.
+
+**Syntax**
+
+``` sql
+parseDateTimeBestEffortUSOrNull(time_string[, time_zone])
+```
+
+**Parameters**
+
+-   `time_string` — String containing a date or date with time to convert. The date must be in the US date format (`MM/DD/YYYY`, etc). [String](../../sql-reference/data-types/string.md).
+-   `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
+
+**Supported non-standard formats**
+
+-   A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
+-   A string with a date and a time components: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
+-   A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY`, etc.
+-   A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case, `YYYY-MM` are substituted with `2000-01`.
+-   A string that includes date and time along with timezone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
+
+**Returned values**
+
+-   `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type.
+-   `NULL` if the input string cannot be converted to the `DateTime` data type.
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrNull('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrNull;
+```
+
+Result:
+
+``` text
+┌─parseDateTimeBestEffortUSOrNull─┐
+│             2021-02-10 21:12:57 │
+└─────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrNull('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrNull;
+```
+
+Result:
+
+``` text
+┌─parseDateTimeBestEffortUSOrNull─┐
+│             2021-02-11 00:12:57 │
+└─────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrNull('02.10.2021') AS parseDateTimeBestEffortUSOrNull;
+```
+
+Result:
+
+``` text
+┌─parseDateTimeBestEffortUSOrNull─┐
+│             2021-02-10 00:00:00 │
+└─────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrNull('10.2021') AS parseDateTimeBestEffortUSOrNull;
+```
+
+Result:
+
+``` text
+┌─parseDateTimeBestEffortUSOrNull─┐
+│                            ᴺᵁᴸᴸ │
+└─────────────────────────────────┘
+```
+
+## parseDateTimeBestEffortUSOrZero {#parsedatetimebesteffortusorzero}
+
+Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed.
+
+**Syntax**
+
+``` sql
+parseDateTimeBestEffortUSOrZero(time_string[, time_zone])
+```
+
+**Parameters**
+
+-   `time_string` — String containing a date or date with time to convert. The date must be in the US date format (`MM/DD/YYYY`, etc). [String](../../sql-reference/data-types/string.md).
+-   `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
+
+**Supported non-standard formats**
+
+-   A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
+-   A string with a date and a time components: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
+-   A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY`, etc.
+-   A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case, `YYYY-MM` are substituted with `2000-01`.
+-   A string that includes date and time along with timezone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
+
+**Returned values**
+
+-   `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type.
+-   Zero date or zero date with time if the input string cannot be converted to the `DateTime` data type.
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrZero('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrZero;
+```
+
+Result:
+
+``` text
+┌─parseDateTimeBestEffortUSOrZero─┐
+│             2021-02-10 21:12:57 │
+└─────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrZero('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrZero;
+```
+
+Result:
+
+``` text
+┌─parseDateTimeBestEffortUSOrZero─┐
+│             2021-02-11 00:12:57 │
+└─────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrZero('02.10.2021') AS parseDateTimeBestEffortUSOrZero;
+```
+
+Result:
+
+``` text
+┌─parseDateTimeBestEffortUSOrZero─┐
+│             2021-02-10 00:00:00 │
+└─────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOrZero;
+```
+
+Result:
+
+``` text
+┌─parseDateTimeBestEffortUSOrZero─┐
+│             1970-01-01 00:00:00 │
+└─────────────────────────────────┘
+```
+
 ## toLowCardinality {#tolowcardinality}
 
 Converts input parameter to the [LowCardianlity](../../sql-reference/data-types/lowcardinality.md) version of same data type.
@@ -720,7 +1028,7 @@ Type: `LowCardinality(expr_result_type)`
 Query:
 
 ``` sql
-SELECT toLowCardinality('1')
+SELECT toLowCardinality('1');
 ```
 
 Result:
@@ -759,7 +1067,7 @@ Query:
 
 ``` sql
 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
-SELECT toUnixTimestamp64Milli(dt64)
+SELECT toUnixTimestamp64Milli(dt64);
 ```
 
 Result:
@@ -772,7 +1080,7 @@ Result:
 
 ``` sql
 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
-SELECT toUnixTimestamp64Nano(dt64)
+SELECT toUnixTimestamp64Nano(dt64);
 ```
 
 Result:
@@ -806,13 +1114,17 @@ fromUnixTimestamp64Milli(value [, ti])
 
 -   `value` converted to the `DateTime64` data type.
 
-**Examples**
+**Example**
+
+Query:
 
 ``` sql
 WITH CAST(1234567891011, 'Int64') AS i64
-SELECT fromUnixTimestamp64Milli(i64, 'UTC')
+SELECT fromUnixTimestamp64Milli(i64, 'UTC');
 ```
 
+Result:
+
 ``` text
 ┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
 │              2009-02-13 23:31:31.011 │
@@ -844,7 +1156,7 @@ Query:
 
 ``` sql
 SELECT formatRow('CSV', number, 'good')
-FROM numbers(3)
+FROM numbers(3);
 ```
 
 Result:
@@ -885,7 +1197,7 @@ Query:
 
 ``` sql
 SELECT formatRowNoNewline('CSV', number, 'good')
-FROM numbers(3)
+FROM numbers(3);
 ```
 
 Result:
diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md
index bfa8b3d1003..34866f3d09a 100644
--- a/docs/en/sql-reference/operators/in.md
+++ b/docs/en/sql-reference/operators/in.md
@@ -13,10 +13,28 @@ SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ...
 
 If the left side is a single column that is in the index, and the right side is a set of constants, the system uses the index for processing the query.
 
-Don’t list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section “External data for query processing”), then use a subquery.
+Don’t list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section [External data for query processing](../../engines/table-engines/special/external-data.md)), then use a subquery.
 
 The right side of the operator can be a set of constant expressions, a set of tuples with constant expressions (shown in the examples above), or the name of a database table or SELECT subquery in brackets.
 
+ClickHouse allows types to differ in the left and the right parts of `IN` subquery. In this case it converts the left side value to the type of the right side, as if the [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null) function is applied. That means, that the data type becomes [Nullable](../../sql-reference/data-types/nullable.md), and if the conversion cannot be performed, it returns [NULL](../../sql-reference/syntax.md#null-literal).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT '1' IN (SELECT 1);
+```
+
+Result:
+
+``` text
+┌─in('1', _subquery49)─┐
+│                    1 │
+└──────────────────────┘
+```
+
 If the right side of the operator is the name of a table (for example, `UserID IN users`), this is equivalent to the subquery `UserID IN (SELECT * FROM users)`. Use this when working with external data that is sent along with the query. For example, the query can be sent together with a set of user IDs loaded to the ‘users’ temporary table, which should be filtered.
 
 If the right side of the operator is a table name that has the Set engine (a prepared data set that is always in RAM), the data set will not be created over again for each query.
diff --git a/docs/en/sql-reference/statements/alter/ttl.md b/docs/en/sql-reference/statements/alter/ttl.md
index 5331afdb2f8..e8bfb78ec68 100644
--- a/docs/en/sql-reference/statements/alter/ttl.md
+++ b/docs/en/sql-reference/statements/alter/ttl.md
@@ -81,5 +81,5 @@ The `TTL` is no longer there, so the second row is not deleted:
 
 ### See Also
 
-- More about the [TTL-expression](../../../sql-reference/statements/create/table#ttl-expression).
-- Modify column [with TTL](../../../sql-reference/statements/alter/column#alter_modify-column).
+- More about the [TTL-expression](../../../../sql-reference/statements/create/table#ttl-expression).
+- Modify column [with TTL](../../../../sql-reference/statements/alter/column#alter_modify-column).
diff --git a/docs/en/sql-reference/statements/select/all.md b/docs/en/sql-reference/statements/select/all.md
index 5e0de4c142b..891b82c4319 100644
--- a/docs/en/sql-reference/statements/select/all.md
+++ b/docs/en/sql-reference/statements/select/all.md
@@ -4,10 +4,8 @@ toc_title: ALL
 
 # ALL Clause {#select-all}
 
-`SELECT ALL` is identical to `SELECT` without `DISTINCT`.
+If there are multiple matching rows in the table, then `ALL` returns all of them. `SELECT ALL` is identical to `SELECT` without `DISTINCT`. If both `ALL` and `DISTINCT` specified, exception will be thrown.
 
-- If `ALL` specified, ignore it.
-- If both `ALL` and `DISTINCT` specified, exception will be thrown.
 
 `ALL` can also be specified inside aggregate function with the same effect(noop), for instance:
 
@@ -19,3 +17,5 @@ equals to
 ```sql
 SELECT sum(number) FROM numbers(10);
 ```
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/all) <!--hide-->
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index d1eb81e52c6..da0999e66eb 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -5,7 +5,7 @@ toc_title: file
 
 # file {#file}
 
-Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones. 
+Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones.
 
 `file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables.
 
@@ -15,9 +15,9 @@ Creates a table from a file. This table function is similar to [url](../../sql-r
 file(path, format, structure)
 ```
 
-**Input parameters**
+**Parameters**
 
--   `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
+-   `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
 -   `format` — The [format](../../interfaces/formats.md#formats) of the file.
 -   `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
 
@@ -39,7 +39,7 @@ $ cat /var/lib/clickhouse/user_files/test.csv
     78,43,45
 ```
 
-Getting data from a table in `test.csv` and selecting first two rows from it:
+Getting data from a table in `test.csv` and selecting the first two rows from it:
 
 ``` sql
 SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2;
@@ -51,7 +51,8 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
 │       3 │       2 │       1 │
 └─────────┴─────────┴─────────┘
 ```
-Getting the first 10 lines of a table that contains 3 columns of UInt32 type from a CSV file:
+
+Getting the first 10 lines of a table that contains 3 columns of [UInt32](../../sql-reference/data-types/int-uint.md) type from a CSV file:
 
 ``` sql
 SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
@@ -71,17 +72,16 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
 └─────────┴─────────┴─────────┘
 ```
 
-
 ## Globs in Path {#globs-in-path}
 
-Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
+Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
 
 -   `*` — Substitutes any number of any characters except `/` including empty string.
 -   `?` — Substitutes any single character.
 -   `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
 -   `{N..M}` — Substitutes any number in range from N to M including both borders.
 
-Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
+Constructions with `{}` are similar to the [remote](remote.md) table function.
 
 **Example**
 
@@ -94,13 +94,13 @@ Suppose we have several files with the following relative paths:
 -   'another_dir/some_file_2'
 -   'another_dir/some_file_3'
 
-Query the amount of rows in these files:
+Query the number of rows in these files:
 
 ``` sql
 SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32');
 ```
 
-Query the amount of rows in all files of these two directories:
+Query the number of rows in all files of these two directories:
 
 ``` sql
 SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
@@ -124,6 +124,6 @@ SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String,
 
 **See Also**
 
--   [Virtual columns](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns)
+-   [Virtual columns](index.md#table_engines-virtual_columns)
 
 [Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/file/) <!--hide-->
diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md
index 14cd4369285..7b4e2a301b3 100644
--- a/docs/en/sql-reference/table-functions/mysql.md
+++ b/docs/en/sql-reference/table-functions/mysql.md
@@ -44,7 +44,7 @@ The rest of the conditions and the `LIMIT` sampling constraint are executed in C
 A table object with the same columns as the original MySQL table.
 
 !!! info "Note"
-    In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. 
+    In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. 
 
 **Examples**
 
diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md
index 8af5b588412..e80e58a76aa 100644
--- a/docs/en/sql-reference/table-functions/remote.md
+++ b/docs/en/sql-reference/table-functions/remote.md
@@ -5,7 +5,7 @@ toc_title: remote
 
 # remote, remoteSecure {#remote-remotesecure}
 
-Allows to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. `remoteSecure` - same as `remote` but with secured connection. 
+Allows to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. `remoteSecure` - same as `remote` but with a secured connection.
 
 Both functions can be used in `SELECT` and `INSERT` queries.
 
@@ -18,31 +18,31 @@ remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
 remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
 ```
 
-**Input parameters**
+**Parameters**
 
-- `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`. 
+- `addresses_expr` — An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`. 
    
     The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets. 
    
-    The port is the TCP port on the remote server. If the port is omitted, it uses  [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the server’s config file in `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) in `remoteSecure` (by default, 9440). 
+    The port is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the server’s config file in `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) in `remoteSecure` (by default, 9440).
 
     The port is required for an IPv6 address.
 
     Type: [String](../../sql-reference/data-types/string.md).
 
-- `db` - Database name. Type: [String](../../sql-reference/data-types/string.md).
-- `table` - Table name. Type: [String](../../sql-reference/data-types/string.md).
-- `user` - User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
-- `password` - User password. If the password is not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
-- `sharding_key` - Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+- `db` — Database name. Type: [String](../../sql-reference/data-types/string.md).
+- `table` — Table name. Type: [String](../../sql-reference/data-types/string.md).
+- `user` — User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
+- `password` — User password. If the password is not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
+- `sharding_key` — Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-Dataset from remote servers.
+The dataset from remote servers.
 
 **Usage**
 
-Using the `remote` table function is less optimal than creating a `Distributed` table, because in this case the server connection is re-established for every request. In addition, if host names are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and don’t use the `remote` table function.
+Using the `remote` table function is less optimal than creating a `Distributed` table because in this case the server connection is re-established for every request. Also, if hostnames are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and don’t use the `remote` table function.
 
 The `remote` table function can be useful in the following cases:
 
@@ -62,7 +62,7 @@ localhost
 [2a02:6b8:0:1111::11]:9000
 ```
 
-Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like to shards with different data). Example:
+Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like shards with different data). Example:
 
 ``` text
 example01-01-1,example01-02-1
@@ -82,7 +82,7 @@ example01-{01..02}-1
 
 If you have multiple pairs of curly brackets, it generates the direct product of the corresponding sets.
 
-Addresses and parts of addresses in curly brackets can be separated by the pipe symbol (\|). In this case, the corresponding sets of addresses are interpreted as replicas, and the query will be sent to the first healthy replica. However, the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md) setting. This example specifies two shards that each have two replicas:
+Addresses and parts of addresses in curly brackets can be separated by the pipe symbol (\|). In this case, the corresponding sets of addresses are interpreted as replicas, and the query will be sent to the first healthy replica. However, the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#settings-load_balancing) setting. This example specifies two shards that each have two replicas:
 
 ``` text
 example01-{01..02}-{1|2}
diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md
index d70774b7588..63b0ff0e152 100644
--- a/docs/en/sql-reference/table-functions/url.md
+++ b/docs/en/sql-reference/table-functions/url.md
@@ -15,25 +15,25 @@ toc_title: url
 url(URL, format, structure)
 ```
 
-**Input parameters**
+**Parameters**
 
-- `URL` - HTTP or HTTPS server address, which can accept `GET` (for `SELECT`) or `POST` (for `INSERT`) requests. Type: [String](../../sql-reference/data-types/string.md).
-- `format` - [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md).
-- `structure` - Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
+- `URL` — HTTP or HTTPS server address, which can accept `GET` or `POST` requests (for `SELECT` or `INSERT` queries correspondingly). Type: [String](../../sql-reference/data-types/string.md).
+- `format` — [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md).
+- `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
 
 **Returned value**
 
-A table with the specified format and structure and with data from the defined URL.
+A table with the specified format and structure and with data from the defined `URL`.
 
 **Examples**
 
-Getting the first 3 lines of a table that contains columns of `String` and `UInt32` type from HTTP-server which answers in `CSV` format.
+Getting the first 3 lines of a table that contains columns of `String` and [UInt32](../../sql-reference/data-types/int-uint.md) type from HTTP-server which answers in [CSV](../../interfaces/formats.md/#csv) format.
 
 ``` sql
 SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3;
 ```
 
-Inserting data from a URL into a table:
+Inserting data from a `URL` into a table:
 
 ``` sql
 CREATE TABLE test_table (column1 String, column2 UInt32) ENGINE=Memory;
diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md
index 940fee2452b..5a6971b1ae6 100644
--- a/docs/ru/engines/table-engines/integrations/kafka.md
+++ b/docs/ru/engines/table-engines/integrations/kafka.md
@@ -31,21 +31,26 @@ SETTINGS
     [kafka_schema = '',]
     [kafka_num_consumers = N,]
     [kafka_skip_broken_messages = N]
+    [kafka_commit_every_batch = 0,]
+    [kafka_thread_per_consumer = 0]
 ```
 
 Обязательные параметры:
 
--   `kafka_broker_list` – перечень брокеров, разделенный запятыми (`localhost:9092`).
--   `kafka_topic_list` – перечень необходимых топиков Kafka.
--   `kafka_group_name` – группа потребителя Kafka. Отступы для чтения отслеживаются для каждой группы отдельно. Если необходимо, чтобы сообщения не повторялись на кластере, используйте везде одно имя группы.
--   `kafka_format` – формат сообщений. Названия форматов должны быть теми же, что можно использовать в секции `FORMAT`, например, `JSONEachRow`. Подробнее читайте в разделе [Форматы](../../../interfaces/formats.md).
+-   `kafka_broker_list` — перечень брокеров, разделенный запятыми (`localhost:9092`).
+-   `kafka_topic_list` — перечень необходимых топиков Kafka.
+-   `kafka_group_name` — группа потребителя Kafka. Отступы для чтения отслеживаются для каждой группы отдельно. Если необходимо, чтобы сообщения не повторялись на кластере, используйте везде одно имя группы.
+-   `kafka_format` — формат сообщений. Названия форматов должны быть теми же, что можно использовать в секции `FORMAT`, например, `JSONEachRow`. Подробнее читайте в разделе [Форматы](../../../interfaces/formats.md).
 
 Опциональные параметры:
 
--   `kafka_row_delimiter` – символ-разделитель записей (строк), которым завершается сообщение.
--   `kafka_schema` – опциональный параметр, необходимый, если используется формат, требующий определения схемы. Например, [Cap’n Proto](https://capnproto.org/) требует путь к файлу со схемой и название корневого объекта `schema.capnp:Message`.
--   `kafka_num_consumers` – количество потребителей (consumer) на таблицу. По умолчанию: `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. Общее число потребителей не должно превышать количество партиций в топике, так как на одну партицию может быть назначено не более одного потребителя.
--   `kafka_skip_broken_messages` – максимальное количество некорректных сообщений в блоке. Если `kafka_skip_broken_messages = N`, то движок отбрасывает `N` сообщений Кафки, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию – 0.
+-   `kafka_row_delimiter` — символ-разделитель записей (строк), которым завершается сообщение.
+-   `kafka_schema` — опциональный параметр, необходимый, если используется формат, требующий определения схемы. Например, [Cap’n Proto](https://capnproto.org/) требует путь к файлу со схемой и название корневого объекта `schema.capnp:Message`.
+-   `kafka_num_consumers` — количество потребителей (consumer) на таблицу. По умолчанию: `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. Общее число потребителей не должно превышать количество партиций в топике, так как на одну партицию может быть назначено не более одного потребителя.
+-   `kafka_max_block_size` — максимальный размер пачек (в сообщениях) для poll (по умолчанию `max_block_size`).
+-   `kafka_skip_broken_messages` — максимальное количество некорректных сообщений в блоке. Если `kafka_skip_broken_messages = N`, то движок отбрасывает `N` сообщений Кафки, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию – 0.
+-   `kafka_commit_every_batch` — включает или отключает режим записи каждой принятой и обработанной пачки по отдельности вместо единой записи целого блока (по умолчанию `0`).
+-   `kafka_thread_per_consumer` — включает или отключает предоставление отдельного потока каждому потребителю (по умолчанию `0`). При включенном режиме каждый потребитель сбрасывает данные независимо и параллельно, при отключённом — строки с данными от нескольких потребителей собираются в один блок.
 
 Примеры
 
diff --git a/docs/ru/engines/table-engines/special/file.md b/docs/ru/engines/table-engines/special/file.md
index 6a55ef31732..9be09fd33e6 100644
--- a/docs/ru/engines/table-engines/special/file.md
+++ b/docs/ru/engines/table-engines/special/file.md
@@ -63,7 +63,7 @@ SELECT * FROM file_engine_table
 
 ## Использование движка в Clickhouse-local {#ispolzovanie-dvizhka-v-clickhouse-local}
 
-В [clickhouse-local](../../../engines/table-engines/special/file.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`.
+В [clickhouse-local](../../../engines/table-engines/special/file.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`. Можно записывать и читать сжатые файлы. Для этого нужно задать дополнительный параметр движка или расширение файла (`gz`, `br` или `xz`).
 
 **Пример:**
 
diff --git a/docs/ru/getting-started/example-datasets/brown-benchmark.md b/docs/ru/getting-started/example-datasets/brown-benchmark.md
new file mode 100644
index 00000000000..23702e07fcd
--- /dev/null
+++ b/docs/ru/getting-started/example-datasets/brown-benchmark.md
@@ -0,0 +1,416 @@
+---
+toc_priority: 20
+toc_title: Brown University Benchmark
+---
+
+# Brown University Benchmark
+
+`MgBench` — это аналитический тест производительности для данных журнала событий, сгенерированных машиной. Бенчмарк разработан [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
+
+Скачать данные:
+```
+wget https://datasets.clickhouse.tech/mgbench{1..3}.csv.xz
+```
+
+Распаковать данные:
+```
+xz -v -d mgbench{1..3}.csv.xz
+```
+
+Создание таблиц:
+```
+CREATE DATABASE mgbench;
+
+
+CREATE TABLE mgbench.logs1 (
+  log_time      DateTime,
+  machine_name  LowCardinality(String),
+  machine_group LowCardinality(String),
+  cpu_idle      Nullable(Float32),
+  cpu_nice      Nullable(Float32),
+  cpu_system    Nullable(Float32),
+  cpu_user      Nullable(Float32),
+  cpu_wio       Nullable(Float32),
+  disk_free     Nullable(Float32),
+  disk_total    Nullable(Float32),
+  part_max_used Nullable(Float32),
+  load_fifteen  Nullable(Float32),
+  load_five     Nullable(Float32),
+  load_one      Nullable(Float32),
+  mem_buffers   Nullable(Float32),
+  mem_cached    Nullable(Float32),
+  mem_free      Nullable(Float32),
+  mem_shared    Nullable(Float32),
+  swap_free     Nullable(Float32),
+  bytes_in      Nullable(Float32),
+  bytes_out     Nullable(Float32)
+)
+ENGINE = MergeTree()
+ORDER BY (machine_group, machine_name, log_time);
+
+
+CREATE TABLE mgbench.logs2 (
+  log_time    DateTime,
+  client_ip   IPv4,
+  request     String,
+  status_code UInt16,
+  object_size UInt64
+)
+ENGINE = MergeTree()
+ORDER BY log_time;
+
+
+CREATE TABLE mgbench.logs3 (
+  log_time     DateTime64,
+  device_id    FixedString(15),
+  device_name  LowCardinality(String),
+  device_type  LowCardinality(String),
+  device_floor UInt8,
+  event_type   LowCardinality(String),
+  event_unit   FixedString(1),
+  event_value  Nullable(Float32)
+)
+ENGINE = MergeTree()
+ORDER BY (event_type, log_time);
+```
+
+Вставка данных:
+
+```
+clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv
+clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv
+clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv
+```
+
+Запуск тестов производительности:
+```
+-- Q1.1: What is the CPU/network utilization for each web server since midnight?
+
+SELECT machine_name,
+       MIN(cpu) AS cpu_min,
+       MAX(cpu) AS cpu_max,
+       AVG(cpu) AS cpu_avg,
+       MIN(net_in) AS net_in_min,
+       MAX(net_in) AS net_in_max,
+       AVG(net_in) AS net_in_avg,
+       MIN(net_out) AS net_out_min,
+       MAX(net_out) AS net_out_max,
+       AVG(net_out) AS net_out_avg
+FROM (
+  SELECT machine_name,
+         COALESCE(cpu_user, 0.0) AS cpu,
+         COALESCE(bytes_in, 0.0) AS net_in,
+         COALESCE(bytes_out, 0.0) AS net_out
+  FROM logs1
+  WHERE machine_name IN ('anansi','aragog','urd')
+    AND log_time >= TIMESTAMP '2017-01-11 00:00:00'
+) AS r
+GROUP BY machine_name;
+
+
+-- Q1.2: Which computer lab machines have been offline in the past day?
+
+SELECT machine_name,
+       log_time
+FROM logs1
+WHERE (machine_name LIKE 'cslab%' OR
+       machine_name LIKE 'mslab%')
+  AND load_one IS NULL
+  AND log_time >= TIMESTAMP '2017-01-10 00:00:00'
+ORDER BY machine_name,
+         log_time;
+
+
+-- Q1.3: What are the hourly average metrics during the past 10 days for a specific workstation?
+
+SELECT dt,
+       hr,
+       AVG(load_fifteen) AS load_fifteen_avg,
+       AVG(load_five) AS load_five_avg,
+       AVG(load_one) AS load_one_avg,
+       AVG(mem_free) AS mem_free_avg,
+       AVG(swap_free) AS swap_free_avg
+FROM (
+  SELECT CAST(log_time AS DATE) AS dt,
+         EXTRACT(HOUR FROM log_time) AS hr,
+         load_fifteen,
+         load_five,
+         load_one,
+         mem_free,
+         swap_free
+  FROM logs1
+  WHERE machine_name = 'babbage'
+    AND load_fifteen IS NOT NULL
+    AND load_five IS NOT NULL
+    AND load_one IS NOT NULL
+    AND mem_free IS NOT NULL
+    AND swap_free IS NOT NULL
+    AND log_time >= TIMESTAMP '2017-01-01 00:00:00'
+) AS r
+GROUP BY dt,
+         hr
+ORDER BY dt,
+         hr;
+
+
+-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
+
+SELECT machine_name,
+       COUNT(*) AS spikes
+FROM logs1
+WHERE machine_group = 'Servers'
+  AND cpu_wio > 0.99
+  AND log_time >= TIMESTAMP '2016-12-01 00:00:00'
+  AND log_time < TIMESTAMP '2017-01-01 00:00:00'
+GROUP BY machine_name
+ORDER BY spikes DESC
+LIMIT 10;
+
+
+-- Q1.5: Which externally reachable VMs have run low on memory?
+
+SELECT machine_name,
+       dt,
+       MIN(mem_free) AS mem_free_min
+FROM (
+  SELECT machine_name,
+         CAST(log_time AS DATE) AS dt,
+         mem_free
+  FROM logs1
+  WHERE machine_group = 'DMZ'
+    AND mem_free IS NOT NULL
+) AS r
+GROUP BY machine_name,
+         dt
+HAVING MIN(mem_free) < 10000
+ORDER BY machine_name,
+         dt;
+
+
+-- Q1.6: What is the total hourly network traffic across all file servers?
+
+SELECT dt,
+       hr,
+       SUM(net_in) AS net_in_sum,
+       SUM(net_out) AS net_out_sum,
+       SUM(net_in) + SUM(net_out) AS both_sum
+FROM (
+  SELECT CAST(log_time AS DATE) AS dt,
+         EXTRACT(HOUR FROM log_time) AS hr,
+         COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in,
+         COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out
+  FROM logs1
+  WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon',
+      'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey',
+      'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps',
+      'poprocks','razzles','runts','smarties','smuggler','spree','stride',
+      'tootsie','trident','wrigley','york')
+) AS r
+GROUP BY dt,
+         hr
+ORDER BY both_sum DESC
+LIMIT 10;
+
+
+-- Q2.1: Which requests have caused server errors within the past 2 weeks?
+
+SELECT *
+FROM logs2
+WHERE status_code >= 500
+  AND log_time >= TIMESTAMP '2012-12-18 00:00:00'
+ORDER BY log_time;
+
+
+-- Q2.2: During a specific 2-week period, was the user password file leaked?
+
+SELECT *
+FROM logs2
+WHERE status_code >= 200
+  AND status_code < 300
+  AND request LIKE '%/etc/passwd%'
+  AND log_time >= TIMESTAMP '2012-05-06 00:00:00'
+  AND log_time < TIMESTAMP '2012-05-20 00:00:00';
+
+
+-- Q2.3: What was the average path depth for top-level requests in the past month?
+
+SELECT top_level,
+       AVG(LENGTH(request) - LENGTH(REPLACE(request, '/', ''))) AS depth_avg
+FROM (
+  SELECT SUBSTRING(request FROM 1 FOR len) AS top_level,
+         request
+  FROM (
+    SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len,
+           request
+    FROM logs2
+    WHERE status_code >= 200
+      AND status_code < 300
+      AND log_time >= TIMESTAMP '2012-12-01 00:00:00'
+  ) AS r
+  WHERE len > 0
+) AS s
+WHERE top_level IN ('/about','/courses','/degrees','/events',
+                    '/grad','/industry','/news','/people',
+                    '/publications','/research','/teaching','/ugrad')
+GROUP BY top_level
+ORDER BY top_level;
+
+
+-- Q2.4: During the last 3 months, which clients have made an excessive number of requests?
+
+SELECT client_ip,
+       COUNT(*) AS num_requests
+FROM logs2
+WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00'
+GROUP BY client_ip
+HAVING COUNT(*) >= 100000
+ORDER BY num_requests DESC;
+
+
+-- Q2.5: What are the daily unique visitors?
+
+SELECT dt,
+       COUNT(DISTINCT client_ip)
+FROM (
+  SELECT CAST(log_time AS DATE) AS dt,
+         client_ip
+  FROM logs2
+) AS r
+GROUP BY dt
+ORDER BY dt;
+
+
+-- Q2.6: What are the average and maximum data transfer rates (Gbps)?
+
+SELECT AVG(transfer) / 125000000.0 AS transfer_avg,
+       MAX(transfer) / 125000000.0 AS transfer_max
+FROM (
+  SELECT log_time,
+         SUM(object_size) AS transfer
+  FROM logs2
+  GROUP BY log_time
+) AS r;
+
+
+-- Q3.1: Did the indoor temperature reach freezing over the weekend?
+
+SELECT *
+FROM logs3
+WHERE event_type = 'temperature'
+  AND event_value <= 32.0
+  AND log_time >= '2019-11-29 17:00:00.000';
+
+
+-- Q3.4: Over the past 6 months, how frequently were each door opened?
+
+SELECT device_name,
+       device_floor,
+       COUNT(*) AS ct
+FROM logs3
+WHERE event_type = 'door_open'
+  AND log_time >= '2019-06-01 00:00:00.000'
+GROUP BY device_name,
+         device_floor
+ORDER BY ct DESC;
+
+
+-- Q3.5: Where in the building do large temperature variations occur in winter and summer?
+
+WITH temperature AS (
+  SELECT dt,
+         device_name,
+         device_type,
+         device_floor
+  FROM (
+    SELECT dt,
+           hr,
+           device_name,
+           device_type,
+           device_floor,
+           AVG(event_value) AS temperature_hourly_avg
+    FROM (
+      SELECT CAST(log_time AS DATE) AS dt,
+             EXTRACT(HOUR FROM log_time) AS hr,
+             device_name,
+             device_type,
+             device_floor,
+             event_value
+      FROM logs3
+      WHERE event_type = 'temperature'
+    ) AS r
+    GROUP BY dt,
+             hr,
+             device_name,
+             device_type,
+             device_floor
+  ) AS s
+  GROUP BY dt,
+           device_name,
+           device_type,
+           device_floor
+  HAVING MAX(temperature_hourly_avg) - MIN(temperature_hourly_avg) >= 25.0
+)
+SELECT DISTINCT device_name,
+       device_type,
+       device_floor,
+       'WINTER'
+FROM temperature
+WHERE dt >= DATE '2018-12-01'
+  AND dt < DATE '2019-03-01'
+UNION
+SELECT DISTINCT device_name,
+       device_type,
+       device_floor,
+       'SUMMER'
+FROM temperature
+WHERE dt >= DATE '2019-06-01'
+  AND dt < DATE '2019-09-01';
+
+
+-- Q3.6: For each device category, what are the monthly power consumption metrics?
+
+SELECT yr,
+       mo,
+       SUM(coffee_hourly_avg) AS coffee_monthly_sum,
+       AVG(coffee_hourly_avg) AS coffee_monthly_avg,
+       SUM(printer_hourly_avg) AS printer_monthly_sum,
+       AVG(printer_hourly_avg) AS printer_monthly_avg,
+       SUM(projector_hourly_avg) AS projector_monthly_sum,
+       AVG(projector_hourly_avg) AS projector_monthly_avg,
+       SUM(vending_hourly_avg) AS vending_monthly_sum,
+       AVG(vending_hourly_avg) AS vending_monthly_avg
+FROM (
+  SELECT dt,
+         yr,
+         mo,
+         hr,
+         AVG(coffee) AS coffee_hourly_avg,
+         AVG(printer) AS printer_hourly_avg,
+         AVG(projector) AS projector_hourly_avg,
+         AVG(vending) AS vending_hourly_avg
+  FROM (
+    SELECT CAST(log_time AS DATE) AS dt,
+           EXTRACT(YEAR FROM log_time) AS yr,
+           EXTRACT(MONTH FROM log_time) AS mo,
+           EXTRACT(HOUR FROM log_time) AS hr,
+           CASE WHEN device_name LIKE 'coffee%' THEN event_value END AS coffee,
+           CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer,
+           CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector,
+           CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending
+    FROM logs3
+    WHERE device_type = 'meter'
+  ) AS r
+  GROUP BY dt,
+           yr,
+           mo,
+           hr
+) AS s
+GROUP BY yr,
+         mo
+ORDER BY yr,
+         mo;
+```
+
+Данные также доступны для работы с интерактивными запросами через [Playground](https://gh-api.clickhouse.tech/play?user=play), [пример](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/getting_started/example_datasets/brown-benchmark/) <!--hide-->
diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md
index 096fb6c05bc..d6f930f3f63 100644
--- a/docs/ru/interfaces/http.md
+++ b/docs/ru/interfaces/http.md
@@ -149,28 +149,48 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
 
 Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа.
 
-Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу `clickhouse-compressor` (устанавливается вместе с пакетом `clickhouse-client`). Для повышения эффективности вставки данных можно отключить проверку контрольной суммы на стороне сервера с помощью настройки[http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress).
 
-Если вы указали `compress = 1` в URL, то сервер сжимает данные, которые он отправляет.
-Если вы указали `decompress = 1` в URL, сервер распаковывает те данные, которые вы передаёте методом `POST`.
+## Сжатие {#compression}
 
-Также, можно использовать [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). Для отправки сжатого запроса `POST`, добавьте заголовок `Content-Encoding: compression_method`. Чтобы ClickHouse сжимал ответ, добавьте заголовок `Accept-Encoding: compression_method`. ClickHouse поддерживает следующие [методы сжатия](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens): `gzip`, `br`, and `deflate`. Чтобы включить HTTP compression, используйте настройку ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression). Уровень сжатия данных для всех методов сжатия можно настроить с помощью настройки [http_zlib_compression_level](#settings-http_zlib_compression_level).
+Сжатие можно использовать для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов.
 
-Это может быть использовано для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов.
+Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу `clickhouse-compressor`. Она устанавливается вместе с пакетом `clickhouse-client`. Для повышения эффективности вставки данных можно отключить проверку контрольной суммы на стороне сервера с помощью настройки [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress).
 
-Примеры отправки данных со сжатием:
+Если вы указали `compress=1` в URL, то сервер сжимает данные, которые он отправляет. Если вы указали `decompress=1` в URL, сервер распаковывает те данные, которые вы передаёте методом `POST`.
 
-``` bash
-$ #Отправка данных на сервер:
-$ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
+Также можно использовать [сжатие HTTP](https://en.wikipedia.org/wiki/HTTP_compression). ClickHouse поддерживает следующие [методы сжатия](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens): 
 
-$ #Отправка данных клиенту:
-$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
-```
+- `gzip`
+- `br`
+- `deflate`
+- `xz`
+
+Для отправки сжатого запроса `POST`, добавьте заголовок `Content-Encoding: compression_method`. 
+Чтобы ClickHouse сжимал ответ, разрешите сжатие настройкой [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) и добавьте заголовок `Accept-Encoding: compression_method`. Уровень сжатия данных для всех методов сжатия можно задать с помощью настройки [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level).
 
 !!! note "Примечание"
     Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия.
 
+**Примеры**
+
+``` bash
+# Отправка сжатых данных на сервер
+$ echo "SELECT 1" | gzip -c | \
+  curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
+```
+
+``` bash
+# Получение сжатых данных с сервера
+$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
+    -H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
+$ zcat result.gz
+0
+1
+2
+```
+
+## База данных по умолчанию {#default-database}
+
 Вы можете использовать параметр URL `database` или заголовок `X-ClickHouse-Database`, чтобы указать БД по умолчанию.
 
 ``` bash
diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md
index 26e05b02509..97fa382fdd9 100644
--- a/docs/ru/interfaces/third-party/client-libraries.md
+++ b/docs/ru/interfaces/third-party/client-libraries.md
@@ -22,6 +22,7 @@ toc_title: "\u041a\u043b\u0438\u0435\u043d\u0442\u0441\u043a\u0438\u0435\u0020\u
     -   [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client)
     -   [SeasClick C++ client](https://github.com/SeasX/SeasClick)
     -   [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
+    -   [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
 -   Go
     -   [clickhouse](https://github.com/kshvakov/clickhouse/)
     -   [go-clickhouse](https://github.com/roistat/go-clickhouse)
diff --git a/docs/ru/operations/caches.md b/docs/ru/operations/caches.md
new file mode 100644
index 00000000000..7744c596cd9
--- /dev/null
+++ b/docs/ru/operations/caches.md
@@ -0,0 +1,29 @@
+---
+toc_priority: 65
+toc_title: Кеши
+---
+
+# Типы кеша {#cache-types}
+
+При выполнении запросов ClickHouse использует различные типы кеша.
+
+Основные типы кеша:
+
+- `mark_cache` — кеш засечек, используемых движками таблиц семейства [MergeTree](../engines/table-engines/mergetree-family/mergetree.md).
+- `uncompressed_cache` — кеш несжатых данных, используемых движками таблиц семейства [MergeTree](../engines/table-engines/mergetree-family/mergetree.md).
+
+Дополнительные типы кеша:
+
+- DNS-кеш.
+- Кеш данных формата [regexp](../interfaces/formats.md#data-format-regexp).
+- Кеш скомпилированных выражений.
+- Кеш схем формата [Avro](../interfaces/formats.md#data-format-avro).
+- Кеш данных в [словарях](../sql-reference/dictionaries/index.md).
+
+Непрямое использование:
+
+- Кеш страницы ОС.
+
+Чтобы очистить кеш, используйте выражение [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md).
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/caches/) <!--hide-->
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 7322b6c9184..f8f587c8a36 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -1937,6 +1937,21 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
 
 Значение по умолчанию: 16.
 
+## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size}
+
+Задает количество потоков для фонового потокового вывода сообщений. Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе.
+
+Допустимые значения:
+
+-   Положительное целое число.
+
+Значение по умолчанию: 16.
+
+**Смотрите также**
+
+-   Движок [Kafka](../../engines/table-engines/integrations/kafka.md#kafka).
+-   Движок [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine).
+
 ## format_avro_schema_registry_url {#format_avro_schema_registry_url}
 
 Задает URL реестра схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html) для использования с форматом [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent).
@@ -2537,4 +2552,15 @@ SELECT * FROM test2;
 
 Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
 
+## allow_experimental_geo_types {#allow-experimental-geo-types}
+
+Разрешает использование экспериментальных типов данных для работы с [географическими структурами](../../sql-reference/data-types/geo.md).
+
+Возможные значения:
+
+-   0 — Использование типов данных для работы с географическими структурами не поддерживается.
+-   1 — Использование типов данных для работы с географическими структурами поддерживается.
+
+Значение по умолчанию: `0`.
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
diff --git a/docs/ru/operations/system-tables/distributed_ddl_queue.md b/docs/ru/operations/system-tables/distributed_ddl_queue.md
index 058ed06f639..71be69e98d7 100644
--- a/docs/ru/operations/system-tables/distributed_ddl_queue.md
+++ b/docs/ru/operations/system-tables/distributed_ddl_queue.md
@@ -14,7 +14,7 @@
 -   `initiator` ([String](../../sql-reference/data-types/string.md)) — узел, выполнивший запрос.
 -   `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса.
 -   `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время окончания запроса.
--   `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — продолжительность выполнения запроса (в миллисекундах).
+-   `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — продолжительность выполнения запроса (в миллисекундах).
 -   `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — код исключения из [ZooKeeper](../../operations/tips.md#zookeeper).
 
 **Пример**
diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md
index 93ea1c92068..cdea6102a81 100644
--- a/docs/ru/operations/system-tables/index.md
+++ b/docs/ru/operations/system-tables/index.md
@@ -9,25 +9,54 @@ toc_title: "\u0421\u0438\u0441\u0442\u0435\u043c\u043d\u044b\u0435\u0020\u0442\u
 
 Системные таблицы содержат информацию о:
 
--   Состоянии сервера, процессов и окружении.
--   Внутренних процессах сервера.
+-   состоянии сервера, процессов и окружении.
+-   внутренних процессах сервера.
 
 Системные таблицы:
 
--   Находятся в базе данных `system`.
--   Доступны только для чтения данных.
--   Не могут быть удалены или изменены, но их можно отсоединить.
+-   находятся в базе данных `system`.
+-   доступны только для чтения данных.
+-   не могут быть удалены или изменены, но их можно отсоединить.
 
-Системные таблицы `metric_log`, `query_log`, `query_thread_log`, `trace_log` системные таблицы хранят данные в файловой системе. Остальные системные таблицы хранят свои данные в оперативной памяти. Сервер ClickHouse создает такие системные таблицы при запуске.
+Большинство системных таблиц хранят свои данные в оперативной памяти. Сервер ClickHouse создает эти системные таблицы при старте.
+
+В отличие от других системных таблиц, таблицы с системными логами [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) и [text_log](../../operations/system-tables/text_log.md) используют движок таблиц [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) и по умолчанию хранят свои данные в файловой системе. Если удалить таблицу из файловой системы, сервер ClickHouse снова создаст пустую таблицу во время следующей записи данных. Если схема системной таблицы изменилась в новом релизе, то ClickHouse переименует текущую таблицу и создаст новую.
+
+Таблицы с системными логами `log` можно настроить, создав конфигурационный файл с тем же именем, что и таблица в разделе `/etc/clickhouse-server/config.d/`, или указав соответствующие элементы в `/etc/clickhouse-server/config.xml`. Настраиваться могут следующие элементы:
+
+-   `database` — база данных, к которой принадлежит системная таблица. Эта опция на текущий момент устарела. Все системные таблицы находятся в базе данных `system`.
+-   `table` — таблица для добавления данных.
+-   `partition_by` — [ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md).
+-   `ttl` — [время жизни](../../sql-reference/statements/alter/ttl.md) таблицы.
+-   `flush_interval_milliseconds` — интервал сброса данных на диск, в миллисекундах.
+-   `engine` — полное имя движка (начиная с `ENGINE =` ) с параметрами. Эта опция противоречит `partition_by` и `ttl`. Если указать оба параметра вместе, сервер вернет ошибку и завершит работу.
+
+Пример:
+
+```xml
+<yandex>
+    <query_log>
+        <database>system</database>
+        <table>query_log</table>
+        <partition_by>toYYYYMM(event_date)</partition_by>
+        <ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
+        <!--
+        <engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
+        -->
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+    </query_log>
+</yandex>
+```
+
+По умолчанию размер таблицы не ограничен. Управлять размером таблицы можно используя [TTL](../../sql-reference/statements/alter/ttl.md#manipuliatsii-s-ttl-tablitsy) для удаления устаревших записей журнала. Также вы можете использовать функцию партиционирования для таблиц `MergeTree`.
 
 ### Источники системных показателей 
 
 Для сбора системных показателей сервер ClickHouse использует:
 
--   Возможности `CAP_NET_ADMIN`.
+-   возможности `CAP_NET_ADMIN`.
 -   [procfs](https://ru.wikipedia.org/wiki/Procfs) (только Linux).
 
-**procfs**
 
 Если для сервера ClickHouse не включено `CAP_NET_ADMIN`, он пытается обратиться к `ProcfsMetricsProvider`. `ProcfsMetricsProvider` позволяет собирать системные показатели для каждого запроса (для CPU и I/O).
 
diff --git a/docs/ru/operations/utilities/clickhouse-local.md b/docs/ru/operations/utilities/clickhouse-local.md
index 2b5c9b119e2..15d069c9acf 100644
--- a/docs/ru/operations/utilities/clickhouse-local.md
+++ b/docs/ru/operations/utilities/clickhouse-local.md
@@ -21,7 +21,8 @@ toc_title: clickhouse-local
 Основной формат вызова:
 
 ``` bash
-$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" -q "query"
+$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" \
+    --query "query"
 ```
 
 Ключи команды:
@@ -76,7 +77,9 @@ $ clickhouse-local --query "
 1	2
 ```
 
-А теперь давайте выведем на экран объём оперативной памяти, занимаемой пользователями (Unix):
+Объём оперативной памяти, занимаемой процессами, которые запустил пользователь (Unix):
+
+Запрос:
 
 ``` bash
 $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
@@ -85,6 +88,8 @@ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
             FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
 ```
 
+Результат:
+
 ``` text
 Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
 ┏━━━━━━━━━━┳━━━━━━━━━━┓
diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md
index f20acaa45c3..d96f7a13bcc 100644
--- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md
@@ -239,7 +239,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
 
 **Параметры**
 
--   `window` — ширина скользящего окна по времени в секундах. [UInt](../../sql-reference/aggregate-functions/parametric-functions.md).
+-   `window` — ширина скользящего окна по времени. Единица измерения зависит от `timestamp` и может варьироваться. Должно соблюдаться условие `timestamp события cond2 <= timestamp события cond1 + window`.
 -   `mode` - необязательный параметр. Если установлено значение `'strict'`, то функция `windowFunnel()` применяет условия только для уникальных значений.
 -   `timestamp` — имя столбца, содержащего временные отметки. [Date](../../sql-reference/aggregate-functions/parametric-functions.md), [DateTime](../../sql-reference/aggregate-functions/parametric-functions.md#data_type-datetime) и другие параметры с типом `Integer`. В случае хранения меток времени в столбцах с типом `UInt64`, максимально допустимое значение соответствует ограничению для типа `Int64`, т.е. равно `2^63-1`.
 -   `cond` — условия или данные, описывающие цепочку событий. [UInt8](../../sql-reference/aggregate-functions/parametric-functions.md).
diff --git a/docs/ru/sql-reference/data-types/geo.md b/docs/ru/sql-reference/data-types/geo.md
new file mode 100644
index 00000000000..23293b30927
--- /dev/null
+++ b/docs/ru/sql-reference/data-types/geo.md
@@ -0,0 +1,106 @@
+---
+toc_priority: 62
+toc_title: Географические структуры
+---
+
+# Типы данных для работы с географическими структурами {#geo-data-types}
+
+ClickHouse поддерживает типы данных для отображения географических объектов — точек (местоположений), территорий и т.п.
+
+!!! warning "Предупреждение"
+    Сейчас использование типов данных для работы с географическими структурами является экспериментальной возможностью. Чтобы использовать эти типы данных, включите настройку `allow_experimental_geo_types = 1`.
+
+**См. также**
+- [Хранение географических структур данных](https://ru.wikipedia.org/wiki/GeoJSON).
+- Настройка [allow_experimental_geo_types](../../operations/settings/settings.md#allow-experimental-geo-types).
+
+## Point {#point-data-type}
+
+Тип `Point` (точка) определяется парой координат X и Y и хранится в виде кортежа [Tuple](tuple.md)([Float64](float.md), [Float64](float.md)).
+
+**Пример**
+
+Запрос:
+
+```sql
+SET allow_experimental_geo_types = 1;
+CREATE TABLE geo_point (p Point) ENGINE = Memory();
+INSERT INTO geo_point VALUES((10, 10));
+SELECT p, toTypeName(p) FROM geo_point;
+```
+Результат: 
+
+``` text
+┌─p─────┬─toTypeName(p)─┐
+│ (10,10) │ Point         │
+└───────┴───────────────┘
+```
+
+## Ring {#ring-data-type}
+
+Тип `Ring` описывает простой многоугольник без внутренних областей (дыр) и хранится в виде массива точек: [Array](array.md)([Point](#point-data-type)).
+
+**Пример**
+
+Запрос:
+
+```sql
+SET allow_experimental_geo_types = 1;
+CREATE TABLE geo_ring (r Ring) ENGINE = Memory();
+INSERT INTO geo_ring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]);
+SELECT r, toTypeName(r) FROM geo_ring;
+```
+Результат: 
+
+``` text
+┌─r─────────────────────────────┬─toTypeName(r)─┐
+│ [(0,0),(10,0),(10,10),(0,10)] │ Ring          │
+└───────────────────────────────┴───────────────┘
+```
+
+## Polygon {#polygon-data-type}
+
+Тип `Polygon` описывает многоугольник с внутренними областями (дырами) и хранится в виде массива: [Array](array.md)([Ring](#ring-data-type)). Первый элемент массива описывает внешний многоугольник (контур), а остальные элементы описывают дыры.
+
+**Пример**
+
+Запись в этой таблице описывает многоугольник с одной дырой:
+
+```sql
+SET allow_experimental_geo_types = 1;
+CREATE TABLE geo_polygon (pg Polygon) ENGINE = Memory();
+INSERT INTO geo_polygon VALUES([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]]);
+SELECT pg, toTypeName(pg) FROM geo_polygon;
+```
+
+Результат: 
+
+``` text
+┌─pg────────────────────────────────────────────────────────────┬─toTypeName(pg)─┐
+│ [[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] │ Polygon        │
+└───────────────────────────────────────────────────────────────┴────────────────┘
+```
+
+## MultiPolygon {#multipolygon-data-type}
+
+Тип `MultiPolygon` описывает элемент, состоящий из нескольких простых многоугольников (полигональную сетку). Он хранится в виде массива многоугольников: [Array](array.md)([Polygon](#polygon-data-type)). 
+
+**Пример**
+
+Запись в этой таблице описывает элемент, состоящий из двух многоугольников — первый без дыр, а второй с одной дырой:
+
+```sql
+SET allow_experimental_geo_types = 1;
+CREATE TABLE geo_multipolygon (mpg MultiPolygon) ENGINE = Memory();
+INSERT INTO geo_multipolygon VALUES([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]);
+SELECT mpg, toTypeName(mpg) FROM geo_multipolygon;
+```
+Result: 
+
+``` text
+┌─mpg─────────────────────────────────────────────────────────────────────────────────────────────┬─toTypeName(mpg)─┐
+│ [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] │ MultiPolygon    │
+└─────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┘
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/data-types/geo/) <!--hide-->
diff --git a/docs/ru/sql-reference/data-types/lowcardinality.md b/docs/ru/sql-reference/data-types/lowcardinality.md
index 3b5b337d731..52713e2d747 100644
--- a/docs/ru/sql-reference/data-types/lowcardinality.md
+++ b/docs/ru/sql-reference/data-types/lowcardinality.md
@@ -23,7 +23,7 @@ LowCardinality(data_type)
 
 Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
 
-При работе со строками используйте `LowCardinality` вместо [Enum](enum.md). `LowCardinality` обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
+При работе со строками, использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
 
 ## Пример
 
diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md
index 52f0412a177..668b579ff78 100644
--- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md
+++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md
@@ -1,8 +1,9 @@
-# SimpleAggregateFunction {#data-type-simpleaggregatefunction}
+# SimpleAggregateFunction(func, type) {#data-type-simpleaggregatefunction}
 
-`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we don’t have to store and process any extra data.
+Хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, которые обладают следующим свойством: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк,
+а затем повторного выполнения `f` к результатам: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Это свойство гарантирует, что результатов частичной агрегации достаточно для вычисления комбинированной, поэтому хранить и обрабатывать какие-либо дополнительные данные не требуется.
 
-The following aggregate functions are supported:
+Поддерживаются следующие агрегатные функции:
 
 -   [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any)
 -   [`anyLast`](../../sql-reference/aggregate-functions/reference/anylast.md#anylastx)
@@ -15,22 +16,24 @@ The following aggregate functions are supported:
 -   [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor)
 -   [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray)
 -   [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md#groupuniqarray)
+-   [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap)
+-   [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap)
+-   [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap)
 
-Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function.
+!!! note "Примечание"
+    Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State]((../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются. 
+    
+    `SimpleAggregateFunction` имеет лучшую производительность, чем `AggregateFunction` с той же агрегатной функцией.
 
-**Parameters**
+**Параметры**
 
--   Name of the aggregate function.
--   Types of the aggregate function arguments.
+-   `func` — имя агрегатной функции.
+-   `type` — типы аргументов агрегатной функции.
 
-**Example**
+**Пример**
 
 ``` sql
-CREATE TABLE t
-(
-    column1 SimpleAggregateFunction(sum, UInt64),
-    column2 SimpleAggregateFunction(any, String)
-) ENGINE = ...
+CREATE TABLE simple (id UInt64, val SimpleAggregateFunction(sum, Double)) ENGINE=AggregatingMergeTree ORDER BY id;
 ```
 
-[Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/data-types/simpleaggregatefunction/) <!--hide-->
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
index 3bb11b638b2..77275b65a05 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@@ -572,7 +572,7 @@ SOURCE(CLICKHOUSE(
 или
 
 ``` sql
-SOURCE(MONGO(
+SOURCE(MONGODB(
     host 'localhost'
     port 27017
     user ''
diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index 80057e6f0e0..fe216b1aed1 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -1355,6 +1355,52 @@ SELECT arrayAvg(x -> (x * x), [2, 4]) AS res;
 └─────┘
 ```
 
+**Синтаксис** 
+
+``` sql
+arraySum(arr)
+```
+
+**Возвращаемое значение**
+
+-   Число.
+
+Тип: [Int](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
+
+**Параметры** 
+
+-   `arr` — [Массив](../../sql-reference/data-types/array.md).
+
+**Примеры**
+
+Запрос:
+
+```sql
+SELECT arraySum([2,3]) AS res;
+```
+
+Результат:
+
+``` text
+┌─res─┐
+│   5 │
+└─────┘
+```
+
+Запрос:
+
+``` sql
+SELECT arraySum(x -> x*x, [2, 3]) AS res;
+```
+
+Результат:
+
+``` text
+┌─res─┐
+│  13 │
+└─────┘
+```
+
 ## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
 
 Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием.
diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md
index 31482cde77f..4fab11e39a8 100644
--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@@ -63,40 +63,58 @@ int32samoa: 1546300800
 
 Переводит дату или дату-с-временем в число типа UInt16, содержащее номер года (AD).
 
+Синоним: `YEAR`.
+
 ## toQuarter {#toquarter}
 
 Переводит дату или дату-с-временем в число типа UInt8, содержащее номер квартала.
 
+Синоним: `QUARTER`.
+
 ## toMonth {#tomonth}
 
 Переводит дату или дату-с-временем в число типа UInt8, содержащее номер месяца (1-12).
 
+Синоним: `MONTH`.
+
 ## toDayOfYear {#todayofyear}
 
 Переводит дату или дату-с-временем в число типа UInt16, содержащее номер дня года (1-366).
 
+Синоним: `DAYOFYEAR`.
+
 ## toDayOfMonth {#todayofmonth}
 
 Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в месяце (1-31).
 
+Синонимы: `DAYOFMONTH`, `DAY`.
+
 ## toDayOfWeek {#todayofweek}
 
 Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в неделе (понедельник - 1, воскресенье - 7).
 
+Синоним: `DAYOFWEEK`.
+
 ## toHour {#tohour}
 
 Переводит дату-с-временем в число типа UInt8, содержащее номер часа в сутках (0-23).
 Функция исходит из допущения, что перевод стрелок вперёд, если осуществляется, то на час, в два часа ночи, а перевод стрелок назад, если осуществляется, то на час, в три часа ночи (что, в общем, не верно - даже в Москве два раза перевод стрелок был осуществлён в другое время).
 
+Синоним: `HOUR`.
+
 ## toMinute {#tominute}
 
 Переводит дату-с-временем в число типа UInt8, содержащее номер минуты в часе (0-59).
 
+Синоним: `MINUTE`.
+
 ## toSecond {#tosecond}
 
 Переводит дату-с-временем в число типа UInt8, содержащее номер секунды в минуте (0-59).
 Секунды координации не учитываются.
 
+Синоним: `SECOND`.
+
 ## toUnixTimestamp {#to-unix-timestamp}
 
 Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
@@ -305,7 +323,9 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d
 
 Переводит дату-с-временем или дату в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января.
 
-Пример:
+**Пример**
+
+Запрос:
 
 ```sql
 SELECT
@@ -313,6 +333,9 @@ SELECT
     toYear(date),
     toISOYear(date)
 ```
+
+Результат:
+
 ```text
 ┌───────date─┬─toYear(toDate('2017-01-01'))─┬─toISOYear(toDate('2017-01-01'))─┐
 │ 2017-01-01 │                         2017 │                            2016 │
@@ -326,12 +349,18 @@ SELECT
 
 1 Января 2017 г. - воскресение, т.е. первая ISO неделя 2017 года началась в понедельник 2 января, поэтому 1 января 2017 это последняя неделя 2016 года.
 
+**Пример**
+
+Запрос:
+
 ```sql
 SELECT
     toISOWeek(toDate('2017-01-01')) AS ISOWeek20170101,
     toISOWeek(toDate('2017-01-02')) AS ISOWeek20170102
 ```
 
+Результат:
+
 ```text
 ┌─ISOWeek20170101─┬─ISOWeek20170102─┐
 │              52 │               1 │
@@ -368,10 +397,14 @@ SELECT
 
 **Пример**
 
+Запрос:
+
 ```sql
 SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS week1, toWeek(date,9) AS week9;
 ```
 
+Результат:
+
 ```text
 ┌───────date─┬─week0─┬─week1─┬─week9─┐
 │ 2016-12-27 │    52 │    52 │     1 │
@@ -387,10 +420,14 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we
 
 **Пример**
 
+Запрос:
+
 ```sql
 SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9;
 ```
 
+Результат:
+
 ```text
 ┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐
 │ 2016-12-27 │    201652 │    201652 │    201701 │
@@ -573,7 +610,7 @@ dateDiff('unit', startdate, enddate, [timezone])
 SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'));
 ```
 
-Ответ:
+Результат:
 
 ``` text
 ┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐
@@ -654,10 +691,10 @@ formatDateTime(Time, Format\[, Timezone\])
 Запрос:
 
 ``` sql
-SELECT formatDateTime(toDate('2010-01-04'), '%g')
+SELECT formatDateTime(toDate('2010-01-04'), '%g');
 ```
 
-Ответ:
+Результат:
 
 ```
 ┌─formatDateTime(toDate('2010-01-04'), '%g')─┐
@@ -665,4 +702,43 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g')
 └────────────────────────────────────────────┘
 ```
 
+## FROM\_UNIXTIME {#fromunixtime}
+
+Функция преобразует Unix timestamp в календарную дату и время. 
+
+**Примеры**
+
+Если указан только один аргумент типа [Integer](../../sql-reference/data-types/int-uint.md), то функция действует так же, как [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime), и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md).
+
+Запрос:
+
+```sql
+SELECT FROM_UNIXTIME(423543535);
+```
+
+Результат:
+
+```text
+┌─FROM_UNIXTIME(423543535)─┐
+│      1983-06-04 10:58:55 │
+└──────────────────────────┘
+```
+
+В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает также, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string).
+
+
+Запрос:
+
+```sql
+SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
+```
+
+Результат:
+
+```text
+┌─DateTime────────────┐
+│ 2009-02-11 14:42:23 │
+└─────────────────────┘
+```
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/date_time_functions/) <!--hide-->
diff --git a/docs/ru/sql-reference/functions/encoding-functions.md b/docs/ru/sql-reference/functions/encoding-functions.md
index 6f1c2aad6cb..8c3065e5a77 100644
--- a/docs/ru/sql-reference/functions/encoding-functions.md
+++ b/docs/ru/sql-reference/functions/encoding-functions.md
@@ -75,6 +75,8 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello;
 
 Returns a string containing the argument’s hexadecimal representation.
 
+Синоним: `HEX`.
+
 **Syntax**
 
 ``` sql
diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md
index f1f6516d453..0216a6b2356 100644
--- a/docs/ru/sql-reference/functions/encryption-functions.md
+++ b/docs/ru/sql-reference/functions/encryption-functions.md
@@ -11,7 +11,7 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438 \u0434\u043b\u044f \u0448
 
 Длина инициализирующего вектора всегда 16 байт (лишнии байты игнорируются). 
 
-Обратите внимание, что эти функции работают медленно.
+Обратите внимание, что до версии Clickhouse 21.1 эти функции работали медленно.
 
 ## encrypt {#encrypt}
 
@@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
 
 **Возвращаемое значение**
 
--   Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
+-   Бинарная зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
 
 **Примеры**
 
@@ -52,57 +52,38 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
 ``` sql
 CREATE TABLE encryption_test
 (
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
+    `comment` String,
+    `secret` String
+)
+ENGINE = Memory;
 ```
 
-Вставим эти данные:
+Вставим некоторые данные (замечание: не храните ключи или инициализирующие векторы в базе данных, так как это компрометирует всю концепцию шифрования), также хранение "подсказок" небезопасно и используется только для наглядности: 
 
 Запрос:
 
 ``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
+INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
+('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
+('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
+('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
 ```
 
-Пример без `iv`:
-
 Запрос:
 
 ``` sql
-SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
+SELECT comment, hex(secret) FROM encryption_test;
 ```
 
 Результат:
 
 ``` text
-┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
-│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F                                 │
-│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03                                 │
-│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
-└─────────────┴──────────────────────────────────────────────────────────────────┘
-```
-
-Пример с `iv`:
-
-Запрос:
-
-``` sql
-SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
-```
-
-Результат:
-
-``` text
-┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
-│ aes-256-ctr │                                               │
-│ aes-256-ctr │ 7FB039F7                                      │
-│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949        │
-└─────────────┴───────────────────────────────────────────────┘
+┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
+│ aes-256-cfb128 no IV                │ B4972BDC4459                     │
+│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9                     │
+│ aes-256-cfb128 with IV              │ 5E6CB398F653                     │
+│ aes-256-cbc no IV                   │ 1BC0629A92450D9E73A00E7D02CF4142 │
+└─────────────────────────────────────┴──────────────────────────────────┘
 ```
 
 Пример в режиме `-gcm`:
@@ -110,41 +91,27 @@ SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encrypti
 Запрос:
 
 ``` sql
-SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
+INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \
+('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad'));
+
+SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
 ```
 
 Результат:
 
 ``` text
-┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
-│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA                                       │
-│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414                               │
-│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
-└─────────────┴────────────────────────────────────────────────────────────────────────┘
-```
-
-Пример в режиме `-gcm` и с `aad`:
-
-Запрос:
-
-``` sql
-SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
-```
-
-Результат:
-
-``` text
-┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
-│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB                                       │
-│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447                               │
-│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
-└─────────────┴────────────────────────────────────────────────────────────────────────┘
+┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
+│ aes-256-gcm          │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
+│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
+└──────────────────────┴──────────────────────────────────────────────┘
 ```
 
 ## aes_encrypt_mysql {#aes_encrypt_mysql}
 
 Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt).
 
+При одинаковых входящих значениях зашифрованный текст будет совпадать с результатом, возвращаемым функцией `encrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_encrypt_mysql` будет работать аналогично функции `aes_encrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`.
+
 Функция поддерживает шифрофание данных следующими режимами:
 
 -   aes-128-ecb, aes-192-ecb, aes-256-ecb
@@ -156,7 +123,7 @@ SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM e
 
 **Синтаксис**
 
-```sql
+``` sql
 aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
 ```
 
@@ -164,78 +131,96 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
 
 -   `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
 -   `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string).
--   `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
--   `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string).
+-   `key` — ключ шифрования. Если ключ длиннее, чем требует режим шифрования, производится специфичная для MySQL свертка ключа. [String](../../sql-reference/data-types/string.md#string).
+-   `iv` — инициализирующий вектор. Необязателен, учитываются только первые 16 байтов. [String](../../sql-reference/data-types/string.md#string).
 
 **Возвращаемое значение**
 
--   Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
+-   Бинарная зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
 
 **Примеры**
 
-Создадим такую таблицу:
+При одинаковых входящих значениях результаты шифрования у функций `encrypt` и `aes_encrypt_mysql`  совпадают.
 
 Запрос:
 
 ``` sql
-CREATE TABLE encryption_test
-(
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
-```
-
-Вставим эти данные:
-
-Запрос:
-
-``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
-```
-
-Пример без `iv`:
-
-Запрос:
-
-``` sql
-SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
+SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
 ```
 
 Результат:
 
 ``` text
-┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
-│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83                                 │
-│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A                                 │
-│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
-└─────────────┴──────────────────────────────────────────────────────────────────┘
+┌─ciphertexts_equal─┐
+│                 1 │
+└───────────────────┘
 ```
 
-Пример с `iv`:
+Функция `encrypt` генерирует исключение, если `key` или `iv` длиннее чем нужно:
 
 Запрос:
 
 ``` sql
-SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
+SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
 ```
 
 Результат:
 
 ``` text
-┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
-│ aes-256-cfb128 │                                                            │
-│ aes-256-cfb128 │ 7FB039F7                                                   │
-│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F                     │
-└────────────────┴────────────────────────────────────────────────────────────┘
+Received exception from server (version 21.1.2):
+Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123'). 
+```
+
+Однако функция `aes_encrypt_mysql` в аналогичном случае возвращает результат, который может быть обработан MySQL:
+
+Запрос:
+
+``` sql
+SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
+```
+
+Результат:
+
+```text
+┌─ciphertext───┐
+│ 24E9E4966469 │
+└──────────────┘
+```
+
+Если передать `iv` еще длиннее, результат останется таким же:
+
+Запрос:
+
+``` sql
+SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
+```
+
+Результат:
+
+``` text
+┌─ciphertext───┐
+│ 24E9E4966469 │
+└──────────────┘
+```
+
+Это совпадает с результатом, возвращаемым MySQL при таких же входящих значениях:
+
+``` sql
+mysql> SET  block_encryption_mode='aes-256-cfb128';
+Query OK, 0 rows affected (0.00 sec)
+
+mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
++------------------------+
+| ciphertext             |
++------------------------+
+| 0x24E9E4966469         |
++------------------------+
+1 row in set (0.00 sec)
 ```
 
 ## decrypt {#decrypt}
 
-Функция поддерживает расшифровку данных следующими режимами:
+Функция расшифровывает зашифрованный текст и может работать в следующих режимах:
 
 -   aes-128-ecb, aes-192-ecb, aes-256-ecb
 -   aes-128-cbc, aes-192-cbc, aes-256-cbc
@@ -247,7 +232,7 @@ SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv))
 
 **Синтаксис**
 
-```sql
+``` sql
 decrypt('mode', 'ciphertext', 'key' [, iv, aad])
 ```
 
@@ -265,52 +250,58 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
 
 **Примеры**
 
-Создадим такую таблицу:
+Рассмотрим таблицу из примера для функции [encrypt](#encrypt).
 
 Запрос:
 
 ``` sql
-CREATE TABLE encryption_test
-(
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
-```
-
-Вставим эти данные:
-
-Запрос:
-
-``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
-```
-
-Запрос:
-
-``` sql
-
-SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
+SELECT comment, hex(secret) FROM encryption_test;
 ```
 
 Результат:
 
-```text
-┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
-│ aes-128-ecb │                                                                     │
-│ aes-128-ecb │ text                                                                │
-│ aes-128-ecb │ What Is ClickHouse?                                                 │
-└─────────────┴─────────────────────────────────────────────────────────────────────┘
+``` text
+┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
+│ aes-256-gcm          │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
+│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
+└──────────────────────┴──────────────────────────────────────────────┘
+┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
+│ aes-256-cfb128 no IV                │ B4972BDC4459                     │
+│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9                     │
+│ aes-256-cfb128 with IV              │ 5E6CB398F653                     │
+│ aes-256-cbc no IV                   │ 1BC0629A92450D9E73A00E7D02CF4142 │
+└─────────────────────────────────────┴──────────────────────────────────┘
 ```
 
+Теперь попытаемся расшифровать эти данные:
+
+Запрос:
+
+``` sql
+SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test;
+```
+
+Результат:
+
+``` text
+┌─comment─────────────────────────────┬─plaintext─┐
+│ aes-256-cfb128 no IV                │ Secret    │
+│ aes-256-cfb128 no IV, different key │ �4�
+                                           �         │
+│ aes-256-cfb128 with IV              │ ���6�~        │
+ │aes-256-cbc no IV                   │ �2*4�h3c�4w��@
+└─────────────────────────────────────┴───────────┘
+```
+
+Обратите внимание, что только часть данных была расшифрована верно. Оставшаяся часть расшифрована некорректно, так как при шифровании использовались другие значения `mode`, `key`, или `iv`.
+
 ## aes_decrypt_mysql {#aes_decrypt_mysql}
 
 Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt). 
 
-Функция поддерживает расшифровку данных следующими режимами:
+При одинаковых входящих значениях расшифрованный текст будет совпадать с результатом, возвращаемым функцией `decrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_decrypt_mysql` будет работать аналогично функции `aes_decrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`.
+
+Функция поддерживает расшифровку данных в следующих режимах:
 
 -   aes-128-ecb, aes-192-ecb, aes-256-ecb
 -   aes-128-cbc, aes-192-cbc, aes-256-cbc
@@ -321,7 +312,7 @@ SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16)
 
 **Синтаксис**
 
-```sql
+``` sql
 aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
 ```
 
@@ -332,51 +323,39 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
 -   `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
 -   `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string).
 
-
 **Возвращаемое значение**
 
 -   Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string).
 
 **Примеры**
 
-Создадим такую таблицу:
+Расшифруем данные, которые до этого были зашифрованы в MySQL:
 
-Запрос:
 
 ``` sql
-CREATE TABLE encryption_test
-(
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
-```
+mysql> SET  block_encryption_mode='aes-256-cfb128';
+Query OK, 0 rows affected (0.00 sec)
 
-Вставим эти данные:
-
-Запрос:
-
-``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
+mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
++------------------------+
+| ciphertext             |
++------------------------+
+| 0x24E9E4966469         |
++------------------------+
+1 row in set (0.00 sec)
 ```
 
 Запрос:
 
 ``` sql
-SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
+SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext;
 ```
 
 Результат:
 
 ``` text
-┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
-│ aes-128-cbc │                                                                                     │
-│ aes-128-cbc │ text                                                                                │
-│ aes-128-cbc │ What Is ClickHouse?                                                                 │
-└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
+┌─plaintext─┐
+│ Secret    │
+└───────────┘
 ```
-
 [Original article](https://clickhouse.tech/docs/ru/sql-reference/functions/encryption_functions/) <!--hide-->
diff --git a/docs/ru/sql-reference/functions/functions-for-nulls.md b/docs/ru/sql-reference/functions/functions-for-nulls.md
index 17da1ea9194..0db55847631 100644
--- a/docs/ru/sql-reference/functions/functions-for-nulls.md
+++ b/docs/ru/sql-reference/functions/functions-for-nulls.md
@@ -13,6 +13,8 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
 isNull(x)
 ```
 
+Синоним: `ISNULL`.
+
 **Параметры**
 
 -   `x` — значение с не составным типом данных.
diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md
index 52f0a92bc9f..3b7379e9a65 100644
--- a/docs/ru/sql-reference/functions/ip-address-functions.md
+++ b/docs/ru/sql-reference/functions/ip-address-functions.md
@@ -9,10 +9,14 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
 
 Принимает число типа UInt32. Интерпретирует его, как IPv4-адрес в big endian. Возвращает строку, содержащую соответствующий IPv4-адрес в формате A.B.C.D (числа в десятичной форме через точки).
 
+Синоним: `INET_NTOA`.
+
 ## IPv4StringToNum(s) {#ipv4stringtonums}
 
 Функция, обратная к IPv4NumToString. Если IPv4 адрес в неправильном формате, то возвращает 0.
 
+Синоним: `INET_ATON`.
+
 ## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum}
 
 Похоже на IPv4NumToString, но вместо последнего октета используется xxx.
@@ -49,7 +53,11 @@ LIMIT 10
 ### IPv6NumToString(x) {#ipv6numtostringx}
 
 Принимает значение типа FixedString(16), содержащее IPv6-адрес в бинарном виде. Возвращает строку, содержащую этот адрес в текстовом виде.
-IPv6-mapped IPv4 адреса выводится в формате ::ffff:111.222.33.44. Примеры:
+IPv6-mapped IPv4 адреса выводится в формате ::ffff:111.222.33.44. 
+
+Примеры: `INET6_NTOA`.
+
+Примеры:
 
 ``` sql
 SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr
@@ -118,6 +126,8 @@ LIMIT 10
 Функция, обратная к IPv6NumToString. Если IPv6 адрес в неправильном формате, то возвращает строку из нулевых байт.
 HEX может быть в любом регистре.
 
+Alias: `INET6_ATON`.
+
 ## IPv4ToIPv6(x) {#ipv4toipv6x}
 
 Принимает число типа `UInt32`. Интерпретирует его, как IPv4-адрес в [big endian](https://en.wikipedia.org/wiki/Endianness). Возвращает значение `FixedString(16)`, содержащее адрес IPv6 в двоичном формате. Примеры:
diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md
index 236583c211a..1159a1f5823 100644
--- a/docs/ru/sql-reference/functions/string-functions.md
+++ b/docs/ru/sql-reference/functions/string-functions.md
@@ -95,6 +95,8 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
 
 Повторяет строку определенное количество раз и объединяет повторяемые значения в одну строку.
 
+Синоним: `REPEAT`.
+
 **Синтаксис**
 
 ``` sql
@@ -273,10 +275,14 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2)
 
 Производит кодирование строки s в base64-представление.
 
+Синоним: `TO_BASE64`.
+
 ## base64Decode(s) {#base64decode}
 
 Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение
 
+Синоним: `FROM_BASE64`.
+
 ## tryBase64Decode(s) {#trybase64decode}
 
 Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.
diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md
index a36613280a1..696fdb9e5ae 100644
--- a/docs/ru/sql-reference/functions/tuple-map-functions.md
+++ b/docs/ru/sql-reference/functions/tuple-map-functions.md
@@ -176,4 +176,129 @@ select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type
 └──────────────────────────────┴───────────────────────────────────┘
 ```
 
-[Оригинальная статья](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) <!--hide-->
+## mapContains {#mapcontains}
+
+Определяет, содержит ли контейнер `map` ключ `key`.
+
+**Синтаксис**
+
+``` sql
+mapContains(map, key)
+```
+
+**Параметры** 
+
+-   `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md).
+-   `key` — ключ. Тип соответстует типу ключей параметра  `map`.
+
+**Возвращаемое значение**
+
+-   `1` если `map` включает `key`, иначе `0`.
+
+Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+```sql
+CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
+
+INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
+
+SELECT mapContains(a, 'name') FROM test;
+
+```
+
+Результат:
+
+```text
+┌─mapContains(a, 'name')─┐
+│                      1 │
+│                      0 │
+└────────────────────────┘
+```
+
+## mapKeys {#mapkeys}
+
+Возвращает все ключи контейнера `map`.
+
+**Синтаксис**
+
+```sql
+mapKeys(map)
+```
+
+**Параметры**
+
+-   `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md).
+
+**Возвращаемое значение**
+
+-   Массив со всеми ключами контейнера `map`.
+
+Тип: [Array](../../sql-reference/data-types/array.md).
+
+**Пример**
+
+Запрос:
+
+```sql
+CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
+
+INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
+
+SELECT mapKeys(a) FROM test;
+```
+
+Результат:
+
+```text
+┌─mapKeys(a)────────────┐
+│ ['name','age']        │
+│ ['number','position'] │
+└───────────────────────┘
+```
+
+## mapValues {#mapvalues}
+
+Возвращает все значения контейнера `map`.
+
+**Синтаксис**
+
+```sql
+mapKeys(map)
+```
+
+**Параметры**
+
+-   `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md).
+
+**Возвращаемое значение**
+
+-   Массив со всеми значениями контейнера `map`.
+
+Тип: [Array](../../sql-reference/data-types/array.md).
+
+**Примеры**
+
+Запрос:
+
+```sql
+CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
+
+INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
+
+SELECT mapValues(a) FROM test;
+```
+
+Результат:
+
+```text
+┌─mapValues(a)─────┐
+│ ['eleven','11']  │
+│ ['twelve','6.0'] │
+└──────────────────┘
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/tuple-map-functions/) <!--hide-->
diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md
index 4a314bd22d8..53e7bc1300e 100644
--- a/docs/ru/sql-reference/functions/type-conversion-functions.md
+++ b/docs/ru/sql-reference/functions/type-conversion-functions.md
@@ -36,10 +36,14 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u0440\u0435\u
 
 **Пример**
 
+Запрос:
+
 ``` sql
-SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
+SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8);
 ```
 
+Результат:
+
 ``` text
 ┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
 │ -9223372036854775808 │          32 │            16 │           8 │
@@ -52,10 +56,14 @@ SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
 
 **Пример**
 
+Запрос:
+
 ``` sql
-select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
+SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123');
 ```
 
+Результат:
+
 ``` text
 ┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
 │                  123123 │                         0 │
@@ -68,10 +76,14 @@ select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
 
 **Пример**
 
+Запрос:
+
 ``` sql
-select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
+SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123');
 ```
 
+Результат:
+
 ``` text
 ┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
 │                  123123 │                      ᴺᵁᴸᴸ │
@@ -102,10 +114,14 @@ select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
 
 **Пример**
 
+Запрос:
+
 ``` sql
-SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
+SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
 ```
 
+Результат:
+
 ``` text
 ┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
 │ 9223372036854775808 │    4294967264 │             16 │            8 │
@@ -124,6 +140,8 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
 
 ## toDate {#todate}
 
+Cиноним: `DATE`.
+
 ## toDateOrZero {#todateorzero}
 
 ## toDateOrNull {#todateornull}
@@ -168,20 +186,28 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
 
 **Примеры**
 
+Запрос:
+
 ``` sql
-SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val)
+SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
 ```
 
+Результат:
+
 ``` text
 ┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
 │ -1.11100 │ Nullable(Decimal(9, 5))                            │
 └──────────┴────────────────────────────────────────────────────┘
 ```
 
+Запрос:
+
 ``` sql
-SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
+SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val);
 ```
 
+Результат:
+
 ``` text
 ┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
 │ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2))                            │
@@ -213,20 +239,28 @@ SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
 
 **Пример**
 
+Запрос:
+
 ``` sql
-SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val)
+SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
 ```
 
+Результат:
+
 ``` text
 ┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
 │ -1.11100 │ Decimal(9, 5)                                      │
 └──────────┴────────────────────────────────────────────────────┘
 ```
 
+Запрос:
+
 ``` sql
-SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val)
+SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val);
 ```
 
+Результат:
+
 ``` text
 ┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
 │ 0.00 │ Decimal(9, 2)                                      │
@@ -258,12 +292,18 @@ YYYY-MM-DD hh:mm:ss
 
 Дополнительно, функция toString от аргумента типа DateTime может принимать второй аргумент String - имя тайм-зоны. Пример: `Asia/Yekaterinburg` В этом случае, форматирование времени производится согласно указанной тайм-зоне.
 
+**Пример**
+
+Запрос:
+
 ``` sql
 SELECT
     now() AS now_local,
-    toString(now(), 'Asia/Yekaterinburg') AS now_yekat
+    toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
 ```
 
+Результат:
+
 ``` text
 ┌───────────now_local─┬─now_yekat───────────┐
 │ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
@@ -281,22 +321,30 @@ SELECT
 
 Принимает аргумент типа String или FixedString. Возвращает String, вырезая содержимое строки до первого найденного нулевого байта.
 
-Пример:
+**Примеры**
+
+Запрос:
 
 ``` sql
-SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut
+SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut;
 ```
 
+Результат:
+
 ``` text
 ┌─s─────────────┬─s_cut─┐
 │ foo\0\0\0\0\0 │ foo   │
 └───────────────┴───────┘
 ```
 
+Запрос:
+
 ``` sql
-SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
+SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
 ```
 
+Результат:
+
 ``` text
 ┌─s──────────┬─s_cut─┐
 │ foo\0bar\0 │ foo   │
@@ -344,7 +392,7 @@ reinterpretAsUUID(fixed_string)
 Запрос:
 
 ``` sql
-SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))
+SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')));
 ```
 
 Результат:
@@ -377,10 +425,15 @@ SELECT uuid = uuid2;
 
 ## CAST(x, T) {#type_conversion_function-cast}
 
-Преобразует x в тип данных t.
-Поддерживается также синтаксис CAST(x AS t).
+Преобразует входное значение `x` в указанный тип данных `T`.
 
-Пример:
+Поддерживается также синтаксис `CAST(x AS t)`.
+
+Обратите внимание, что если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255.
+
+**Пример**
+
+Запрос:
 
 ``` sql
 SELECT
@@ -388,9 +441,11 @@ SELECT
     CAST(timestamp AS DateTime) AS datetime,
     CAST(timestamp AS Date) AS date,
     CAST(timestamp, 'String') AS string,
-    CAST(timestamp, 'FixedString(22)') AS fixed_string
+    CAST(timestamp, 'FixedString(22)') AS fixed_string;
 ```
 
+Результат:
+
 ``` text
 ┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐
 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │
@@ -399,12 +454,18 @@ SELECT
 
 Преобразование в FixedString(N) работает только для аргументов типа String или FixedString(N).
 
-Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно. Пример:
+Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно. 
+
+**Примеры**
+
+Запрос:
 
 ``` sql
-SELECT toTypeName(x) FROM t_null
+SELECT toTypeName(x) FROM t_null;
 ```
 
+Результат:
+
 ``` text
 ┌─toTypeName(x)─┐
 │ Int8          │
@@ -412,10 +473,14 @@ SELECT toTypeName(x) FROM t_null
 └───────────────┘
 ```
 
+Запрос:
+
 ``` sql
-SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
+SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null;
 ```
 
+Результат:
+
 ``` text
 ┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐
 │ Nullable(UInt16)                        │
@@ -427,6 +492,93 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
 
 -   Настройка [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable)
 
+## accurateCast(x, T) {#type_conversion_function-accurate-cast}
+
+Преобразует входное значение `x` в указанный тип данных `T`.
+
+В отличие от функции [cast(x, T)](#type_conversion_function-cast), `accurateCast` не допускает переполнения при преобразовании числовых типов. Например, `accurateCast(-1, 'UInt8')` вызовет исключение.
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT cast(-1, 'UInt8') as uint8; 
+```
+
+Результат:
+
+``` text
+┌─uint8─┐
+│   255 │
+└─────
+
+Запрос:
+
+```sql
+SELECT accurateCast(-1, 'UInt8') as uint8;
+```
+
+Результат:
+
+``` text
+Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
+```
+
+## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
+
+Преобразует входное значение `x` в указанный тип данных `T`.
+
+Всегда возвращает тип [Nullable](../../sql-reference/data-types/nullable.md). Если исходное значение не может быть преобразовано к целевому типу, возвращает [NULL](../../sql-reference/syntax.md#null-literal).
+
+**Синтаксис**
+
+```sql
+accurateCastOrNull(x, T)
+```
+
+**Параметры**
+
+-   `x` — входное значение.
+-   `T` — имя возвращаемого типа данных.
+
+**Возвращаемое значение**
+
+-   Значение, преобразованное в указанный тип `T`.
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT toTypeName(accurateCastOrNull(5, 'UInt8'));
+```
+
+Результат:
+
+``` text
+┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
+│ Nullable(UInt8)                            │
+└────────────────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT
+    accurateCastOrNull(-1, 'UInt8') as uint8,
+    accurateCastOrNull(128, 'Int8') as int8,
+    accurateCastOrNull('Test', 'FixedString(2)') as fixed_string;
+```
+
+Результат:
+
+``` text
+┌─uint8─┬─int8─┬─fixed_string─┐
+│  ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ         │
+└───────┴──────┴──────────────┘
+```
+
 ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
 
 Приводит аргумент из числового типа данных к типу данных [IntervalType](../../sql-reference/data-types/special-data-types/interval.md).
@@ -454,6 +606,8 @@ toIntervalYear(number)
 
 **Пример**
 
+Запрос:
+
 ``` sql
 WITH
     toDate('2019-01-01') AS date,
@@ -461,9 +615,11 @@ WITH
     toIntervalWeek(1) AS interval_to_week
 SELECT
     date + interval_week,
-    date + interval_to_week
+    date + interval_to_week;
 ```
 
+Результат:
+
 ``` text
 ┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
 │                2019-01-08 │                   2019-01-08 │
@@ -479,7 +635,7 @@ SELECT
 **Синтаксис**
 
 ``` sql
-parseDateTimeBestEffort(time_string[, time_zone]);
+parseDateTimeBestEffort(time_string[, time_zone])
 ```
 
 **Параметры**
@@ -522,7 +678,7 @@ AS parseDateTimeBestEffort;
 
 ``` sql
 SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
-AS parseDateTimeBestEffort
+AS parseDateTimeBestEffort;
 ```
 
 Результат:
@@ -537,7 +693,7 @@ AS parseDateTimeBestEffort
 
 ``` sql
 SELECT parseDateTimeBestEffort('1284101485')
-AS parseDateTimeBestEffort
+AS parseDateTimeBestEffort;
 ```
 
 Результат:
@@ -552,7 +708,7 @@ AS parseDateTimeBestEffort
 
 ``` sql
 SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
-AS parseDateTimeBestEffort
+AS parseDateTimeBestEffort;
 ```
 
 Результат:
@@ -566,7 +722,7 @@ AS parseDateTimeBestEffort
 Запрос:
 
 ``` sql
-SELECT parseDateTimeBestEffort('10 20:19')
+SELECT parseDateTimeBestEffort('10 20:19');
 ```
 
 Результат:
@@ -591,7 +747,7 @@ SELECT parseDateTimeBestEffort('10 20:19')
 **Синтаксис**
 
 ``` sql
-parseDateTimeBestEffortUS(time_string [, time_zone]);
+parseDateTimeBestEffortUS(time_string [, time_zone])
 ```
 
 **Параметры**
@@ -620,7 +776,7 @@ SELECT parseDateTimeBestEffortUS('09/12/2020 12:12:57')
 AS parseDateTimeBestEffortUS;
 ```
 
-Ответ:
+Результат:
 
 ``` text
 ┌─parseDateTimeBestEffortUS─┐
@@ -635,7 +791,7 @@ SELECT parseDateTimeBestEffortUS('09-12-2020 12:12:57')
 AS parseDateTimeBestEffortUS;
 ```
 
-Ответ:
+Результат:
 
 ``` text
 ┌─parseDateTimeBestEffortUS─┐
@@ -650,7 +806,7 @@ SELECT parseDateTimeBestEffortUS('09.12.2020 12:12:57')
 AS parseDateTimeBestEffortUS;
 ```
 
-Ответ:
+Результат:
 
 ``` text
 ┌─parseDateTimeBestEffortUS─┐
@@ -658,6 +814,178 @@ AS parseDateTimeBestEffortUS;
 └─────────────────────────——┘
 ```
 
+## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull}
+
+Работает аналогично функции [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS), но в отличие от нее возвращает `NULL`, если входная строка не может быть преобразована в тип данных [DateTime](../../sql-reference/data-types/datetime.md).
+
+**Синтаксис**
+
+``` sql
+parseDateTimeBestEffortUSOrNull(time_string[, time_zone])
+```
+
+**Параметры**
+
+-   `time_string` — строка, содержащая дату или дату со временем для преобразования. Дата должна быть в американском формате (`MM/DD/YYYY` и т.д.). [String](../../sql-reference/data-types/string.md).
+-   `time_zone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция анализирует `time_string` в соответствии с заданным часовым поясом. Опциональный параметр. [String](../../sql-reference/data-types/string.md).
+
+**Поддерживаемые нестандартные форматы**
+
+-   Строка в формате [unix timestamp](https://en.wikipedia.org/wiki/Unix_time), содержащая 9-10 цифр.
+-   Строка, содержащая дату и время: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss` и т.д.
+-   Строка, содержащая дату без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` и т.д.
+-   Строка, содержащая день и время: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` заменяется на `2000-01`.
+-   Строка, содержащая дату и время, а также информацию о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm` и т.д. Например, `2020-12-12 17:36:00 -5:00`.
+
+**Возвращаемые значения**
+
+-   `time_string`, преобразованная в тип данных `DateTime`.
+-   `NULL`, если входная строка не может быть преобразована в тип данных `DateTime`.
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrNull('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrNull;
+```
+
+Результат:
+
+``` text
+┌─parseDateTimeBestEffortUSOrNull─┐
+│             2021-02-10 21:12:57 │
+└─────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrNull('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrNull;
+```
+
+Результат:
+
+``` text
+┌─parseDateTimeBestEffortUSOrNull─┐
+│             2021-02-11 00:12:57 │
+└─────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrNull('02.10.2021') AS parseDateTimeBestEffortUSOrNull;
+```
+
+Результат:
+
+``` text
+┌─parseDateTimeBestEffortUSOrNull─┐
+│             2021-02-10 00:00:00 │
+└─────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrNull('10.2021') AS parseDateTimeBestEffortUSOrNull;
+```
+
+Результат:
+
+``` text
+┌─parseDateTimeBestEffortUSOrNull─┐
+│                            ᴺᵁᴸᴸ │
+└─────────────────────────────────┘
+```
+
+## parseDateTimeBestEffortUSOrZero {#parsedatetimebesteffortusorzero}
+
+Работает аналогично функции [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS), но в отличие от нее возвращает нулевую дату (`1970-01-01`) или нулевую дату со временем (`1970-01-01 00:00:00`), если входная строка не может быть преобразована в тип данных [DateTime](../../sql-reference/data-types/datetime.md).
+
+**Синтаксис**
+
+``` sql
+parseDateTimeBestEffortUSOrZero(time_string[, time_zone])
+```
+
+**Параметры**
+
+-   `time_string` — строка, содержащая дату или дату со временем для преобразования. Дата должна быть в американском формате (`MM/DD/YYYY` и т.д.). [String](../../sql-reference/data-types/string.md).
+-   `time_zone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция анализирует `time_string` в соответствии с заданным часовым поясом. Опциональный параметр. [String](../../sql-reference/data-types/string.md).
+
+**Поддерживаемые нестандартные форматы**
+
+-   Строка в формате [unix timestamp](https://en.wikipedia.org/wiki/Unix_time), содержащая 9-10 цифр.
+-   Строка, содержащая дату и время: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss` и т.д.
+-   Строка, содержащая дату без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` и т.д.
+-   Строка, содержащая день и время: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` заменяется на `2000-01`.
+-   Строка, содержащая дату и время, а также информацию о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm` и т.д. Например, `2020-12-12 17:36:00 -5:00`.
+
+**Возвращаемые значения**
+
+-   `time_string`, преобразованная в тип данных `DateTime`.
+-   Нулевая дата или нулевая дата со временем, если входная строка не может быть преобразована в тип данных `DateTime`.
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrZero('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrZero;
+```
+
+Результат:
+
+``` text
+┌─parseDateTimeBestEffortUSOrZero─┐
+│             2021-02-10 21:12:57 │
+└─────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrZero('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrZero;
+```
+
+Результат:
+
+``` text
+┌─parseDateTimeBestEffortUSOrZero─┐
+│             2021-02-11 00:12:57 │
+└─────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrZero('02.10.2021') AS parseDateTimeBestEffortUSOrZero;
+```
+
+Результат:
+
+``` text
+┌─parseDateTimeBestEffortUSOrZero─┐
+│             2021-02-10 00:00:00 │
+└─────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOrZero;
+```
+
+Результат:
+
+``` text
+┌─parseDateTimeBestEffortUSOrZero─┐
+│             1970-01-01 00:00:00 │
+└─────────────────────────────────┘
+```
+
 ## toUnixTimestamp64Milli
 ## toUnixTimestamp64Micro
 ## toUnixTimestamp64Nano
@@ -685,10 +1013,10 @@ toUnixTimestamp64Milli(value)
 
 ``` sql
 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
-SELECT toUnixTimestamp64Milli(dt64)
+SELECT toUnixTimestamp64Milli(dt64);
 ```
 
-Ответ:
+Результат:
 
 ``` text
 ┌─toUnixTimestamp64Milli(dt64)─┐
@@ -700,10 +1028,10 @@ SELECT toUnixTimestamp64Milli(dt64)
 
 ``` sql
 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
-SELECT toUnixTimestamp64Nano(dt64)
+SELECT toUnixTimestamp64Nano(dt64);
 ```
 
-Ответ:
+Результат:
 
 ``` text
 ┌─toUnixTimestamp64Nano(dt64)─┐
@@ -738,10 +1066,10 @@ fromUnixTimestamp64Milli(value [, ti])
 
 ``` sql
 WITH CAST(1234567891011, 'Int64') AS i64
-SELECT fromUnixTimestamp64Milli(i64, 'UTC')
+SELECT fromUnixTimestamp64Milli(i64, 'UTC');
 ```
 
-Ответ:
+Результат:
 
 ``` text
 ┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
@@ -772,12 +1100,12 @@ toLowCardinality(expr)
 
 Тип: `LowCardinality(expr_result_type)`
 
-**Example**
+**Пример**
 
 Запрос:
 
 ```sql
-SELECT toLowCardinality('1')
+SELECT toLowCardinality('1');
 ```
 
 Результат:
@@ -813,10 +1141,10 @@ formatRow(format, x, y, ...)
 
 ``` sql
 SELECT formatRow('CSV', number, 'good')
-FROM numbers(3)
+FROM numbers(3);
 ```
 
-Ответ:
+Результат:
 
 ``` text
 ┌─formatRow('CSV', number, 'good')─┐
@@ -854,10 +1182,10 @@ formatRowNoNewline(format, x, y, ...)
 
 ``` sql
 SELECT formatRowNoNewline('CSV', number, 'good')
-FROM numbers(3)
+FROM numbers(3);
 ```
 
-Ответ:
+Результат:
 
 ``` text
 ┌─formatRowNoNewline('CSV', number, 'good')─┐
diff --git a/docs/ru/sql-reference/operators/in.md b/docs/ru/sql-reference/operators/in.md
index 4c1290df166..e0412747898 100644
--- a/docs/ru/sql-reference/operators/in.md
+++ b/docs/ru/sql-reference/operators/in.md
@@ -13,10 +13,28 @@ SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ...
 
 Если слева стоит один столбец, входящий в индекс, а справа - множество констант, то при выполнении запроса, система воспользуется индексом.
 
-Не перечисляйте слишком большое количество значений (миллионы) явно. Если множество большое - лучше загрузить его во временную таблицу (например, смотрите раздел «Внешние данные для обработки запроса»), и затем воспользоваться подзапросом.
+Не перечисляйте слишком большое количество значений (миллионы) явно. Если множество большое - лучше загрузить его во временную таблицу (например, смотрите раздел [Внешние данные для обработки запроса](../../engines/table-engines/special/external-data.md)), и затем воспользоваться подзапросом.
 
 В качестве правой части оператора может быть множество константных выражений, множество кортежей с константными выражениями (показано в примерах выше), а также имя таблицы или подзапрос SELECT в скобках.
 
+Если типы данных в левой и правой частях подзапроса `IN` различаются, ClickHouse преобразует значение в левой части к типу данных из правой части. Преобразование выполняется по аналогии с функцией [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null), т.е. тип данных становится [Nullable](../../sql-reference/data-types/nullable.md), а если преобразование не может быть выполнено, возвращается значение [NULL](../../sql-reference/syntax.md#null-literal).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT '1' IN (SELECT 1);
+```
+
+Результат:
+
+``` text
+┌─in('1', _subquery49)─┐
+│                    1 │
+└──────────────────────┘
+```
+
 Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемым вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию.
 
 Если в качестве правой части оператора, указано имя таблицы, имеющий движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе.
diff --git a/docs/ru/sql-reference/statements/select/all.md b/docs/ru/sql-reference/statements/select/all.md
new file mode 100644
index 00000000000..4049d77a173
--- /dev/null
+++ b/docs/ru/sql-reference/statements/select/all.md
@@ -0,0 +1,22 @@
+---
+toc_title: ALL
+---
+
+# Секция ALL {#select-all}
+
+Если в таблице несколько совпадающих строк, то `ALL` возвращает все из них. Поведение запроса `SELECT ALL` точно такое же, как и `SELECT` без аргумента `DISTINCT`. Если указаны оба аргумента: `ALL` и `DISTINCT`, функция вернет исключение.
+
+
+`ALL` может быть указан внутри агрегатной функции, например, результат выполнения запроса:
+
+```sql
+SELECT sum(ALL number) FROM numbers(10);
+```
+
+равен результату выполнения запроса:
+
+```sql
+SELECT sum(number) FROM numbers(10);
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/all) <!--hide-->
diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md
index d3e6e106125..f9bdf902ad8 100644
--- a/docs/ru/sql-reference/table-functions/file.md
+++ b/docs/ru/sql-reference/table-functions/file.md
@@ -5,23 +5,27 @@ toc_title: file
 
 # file {#file}
 
-Создаёт таблицу из файла. Данная табличная функция похожа на табличные функции [file](file.md) и [hdfs](hdfs.md).
+Создаёт таблицу из файла. Данная табличная функция похожа на табличные функции [url](../../sql-reference/table-functions/url.md) и [hdfs](../../sql-reference/table-functions/hdfs.md).
+
+Функция `file` может использоваться в запросах `SELECT` и `INSERT` при работе с движком таблиц [File](../../engines/table-engines/special/file.md).
+
+**Синтаксис**
 
 ``` sql
 file(path, format, structure)
 ```
 
-**Входные параметры**
+**Параметры**
 
--   `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, \``'abc', 'def'` — строки.
+-   `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, `'abc', 'def'` — строки.
 -   `format` — [формат](../../interfaces/formats.md#formats) файла.
--   `structure` — структура таблицы. Формат `'colunmn1_name column1_ype, column2_name column2_type, ...'`.
+-   `structure` — структура таблицы. Формат: `'colunmn1_name column1_ype, column2_name column2_type, ...'`.
 
 **Возвращаемое значение**
 
 Таблица с указанной структурой, предназначенная для чтения или записи данных в указанном файле.
 
-**Пример**
+**Примеры**
 
 Настройка `user_files_path` и содержимое файла `test.csv`:
 
@@ -35,12 +39,10 @@ $ cat /var/lib/clickhouse/user_files/test.csv
     78,43,45
 ```
 
-Таблица из `test.csv` и выборка первых двух строк из неё:
+Получение данных из таблицы в файле `test.csv` и выборка первых двух строк из неё:
 
 ``` sql
-SELECT *
-FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
-LIMIT 2
+SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2;
 ```
 
 ``` text
@@ -50,45 +52,61 @@ LIMIT 2
 └─────────┴─────────┴─────────┘
 ```
 
-Шаблоны могут содержаться в нескольких компонентах пути. Обрабатываются только существующие файлы, название которых целиком удовлетворяет шаблону (не только суффиксом или префиксом).
+Получение первых 10 строк таблицы, содержащей 3 столбца типа [UInt32](../../sql-reference/data-types/int-uint.md), из CSV-файла:
 
--   `*` — Заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
--   `?` — Заменяет ровно один любой символ.
--   `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
--   `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
+``` sql
+SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
+```
+
+Вставка данных из файла в таблицу:
+
+``` sql
+INSERT INTO FUNCTION file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') VALUES (1, 2, 3), (3, 2, 1);
+SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
+```
+
+``` text
+┌─column1─┬─column2─┬─column3─┐
+│       1 │       2 │       3 │
+│       3 │       2 │       1 │
+└─────────┴─────────┴─────────┘
+```
+
+## Шаблоны поиска в компонентах пути {#globs-in-path}
+
+При описании пути к файлу могут использоваться шаблоны поиска. Обрабатываются только те файлы, у которых путь и название соответствуют шаблону полностью (а не только префикс или суффикс).
+
+-   `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
+-   `?` — заменяет ровно один любой символ.
+-   `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
+-   `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
 
 Конструкция с `{}` аналогична табличной функции [remote](remote.md).
 
 **Пример**
 
-1.  Предположим у нас есть несколько файлов со следующими относительными путями:
+Предположим, у нас есть несколько файлов со следующими относительными путями:
 
--   ‘some_dir/some_file_1’
--   ‘some_dir/some_file_2’
--   ‘some_dir/some_file_3’
--   ‘another_dir/some_file_1’
--   ‘another_dir/some_file_2’
--   ‘another_dir/some_file_3’
+-   'some_dir/some_file_1'
+-   'some_dir/some_file_2'
+-   'some_dir/some_file_3'
+-   'another_dir/some_file_1'
+-   'another_dir/some_file_2'
+-   'another_dir/some_file_3'
 
-1.  Запросим количество строк в этих файлах:
-
-<!-- -->
+Запросим количество строк в этих файлах:
 
 ``` sql
-SELECT count(*)
-FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32')
+SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32');
 ```
 
-1.  Запросим количество строк во всех файлах этих двух директорий:
-
-<!-- -->
+Запросим количество строк во всех файлах этих двух директорий:
 
 ``` sql
-SELECT count(*)
-FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32')
+SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
 ```
 
-!!! warning "Warning"
+!!! warning "Предупреждение"
     Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`.
 
 **Пример**
@@ -96,17 +114,16 @@ FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32')
 Запрос данных из файлов с именами `file000`, `file001`, … , `file999`:
 
 ``` sql
-SELECT count(*)
-FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32')
+SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32');
 ```
 
 ## Виртуальные столбцы {#virtualnye-stolbtsy}
 
--   `_path` — Путь к файлу.
--   `_file` — Имя файла.
+-   `_path` — путь к файлу.
+-   `_file` — имя файла.
 
 **Смотрите также**
 
 -   [Виртуальные столбцы](index.md#table_engines-virtual_columns)
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/file/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/file/) <!--hide-->
diff --git a/docs/ru/sql-reference/table-functions/mysql.md b/docs/ru/sql-reference/table-functions/mysql.md
index 21841eee67a..18b34d0bf6c 100644
--- a/docs/ru/sql-reference/table-functions/mysql.md
+++ b/docs/ru/sql-reference/table-functions/mysql.md
@@ -7,6 +7,8 @@ toc_title: mysql
 
 Позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом MySQL сервере.
 
+**Синтаксис**
+
 ``` sql
 mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
 ```
@@ -23,13 +25,13 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
 
 -   `password` — пароль пользователя.
 
--   `replace_query` — флаг, отвечающий за преобразование запросов `INSERT INTO` в `REPLACE INTO`. Если `replace_query=1`, то запрос заменяется.
+-   `replace_query` — флаг, отвечающий за преобразование запросов `INSERT INTO` в `REPLACE INTO`. Возможные значения:
+    - `0` - выполняется запрос `INSERT INTO`.
+    - `1` - выполняется запрос `REPLACE INTO`.
 
--   `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`.
+-   `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`. Может быть передано только с помощью  `replace_query = 0` (если вы одновременно передадите `replace_query = 1` и `on_duplicate_clause`, будет сгенерировано исключение).
 
-        Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1`. Чтобы узнать какие `on_duplicate_clause` можно использовать с секцией `ON DUPLICATE KEY`  обратитесь к документации MySQL.
-
-        Чтобы указать `'on_duplicate_clause'` необходимо передать `0` в параметр `replace_query`. Если одновременно передать `replace_query = 1` и `'on_duplicate_clause'`, то ClickHouse сгенерирует исключение.
+        Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1;`
 
 Простые условия `WHERE` такие как `=, !=, >, >=, <, =` выполняются на стороне сервера MySQL.
 
@@ -39,46 +41,59 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
 
 Объект таблицы с теми же столбцами, что и в исходной таблице MySQL.
 
-## Пример использования {#primer-ispolzovaniia}
+!!! note "Примечание"
+    Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком имен столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже.
+
+**Примеры**
 
 Таблица в MySQL:
 
 ``` text
 mysql> CREATE TABLE `test`.`test` (
     ->   `int_id` INT NOT NULL AUTO_INCREMENT,
-    ->   `int_nullable` INT NULL DEFAULT NULL,
     ->   `float` FLOAT NOT NULL,
-    ->   `float_nullable` FLOAT NULL DEFAULT NULL,
     ->   PRIMARY KEY (`int_id`));
-Query OK, 0 rows affected (0,09 sec)
 
-mysql> insert into test (`int_id`, `float`) VALUES (1,2);
-Query OK, 1 row affected (0,00 sec)
+mysql> INSERT INTO test (`int_id`, `float`) VALUES (1,2);
 
-mysql> select * from test;
-+--------+--------------+-------+----------------+
-| int_id | int_nullable | float | float_nullable |
-+--------+--------------+-------+----------------+
-|      1 |         NULL |     2 |           NULL |
-+--------+--------------+-------+----------------+
-1 row in set (0,00 sec)
+mysql> SELECT * FROM test;
++--------+-------+
+| int_id | float |
++--------+-------+
+|      1 |     2 |
++--------+-------+
 ```
 
 Получение данных в ClickHouse:
 
 ``` sql
-SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123')
+SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
 ```
 
 ``` text
-┌─int_id─┬─int_nullable─┬─float─┬─float_nullable─┐
-│      1 │         ᴺᵁᴸᴸ │     2 │           ᴺᵁᴸᴸ │
-└────────┴──────────────┴───────┴────────────────┘
+┌─int_id─┬─float─┐
+│      1 │     2 │
+└────────┴───────┘
 ```
 
-## Смотрите также {#smotrite-takzhe}
+Замена и вставка:
+
+```sql
+INSERT INTO FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 1) (int_id, float) VALUES (1, 3);
+INSERT INTO TABLE FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 0, 'UPDATE int_id = int_id + 1') (int_id, float) VALUES (1, 4);
+SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
+```
+
+``` text
+┌─int_id─┬─float─┐
+│      1 │     3 │
+│      2 │     4 │
+└────────┴───────┘
+```
+
+**Смотрите также** 
 
 -   [Движок таблиц ‘MySQL’](../../sql-reference/table-functions/mysql.md)
 -   [Использование MySQL как источника данных для внешнего словаря](../../sql-reference/table-functions/mysql.md#dicts-external_dicts_dict_sources-mysql)
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/mysql/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table_functions/mysql/) <!--hide-->
diff --git a/docs/ru/sql-reference/table-functions/remote.md b/docs/ru/sql-reference/table-functions/remote.md
index 901317a805d..83b3687f61d 100644
--- a/docs/ru/sql-reference/table-functions/remote.md
+++ b/docs/ru/sql-reference/table-functions/remote.md
@@ -5,9 +5,11 @@ toc_title: remote
 
 # remote, remoteSecure {#remote-remotesecure}
 
-Позволяет обратиться к удалённым серверам без создания таблицы типа `Distributed`.
+Позволяет обратиться к удалённым серверам без создания таблицы типа [Distributed](../../engines/table-engines/special/distributed.md). Функция `remoteSecure` работает аналогично `remote`, но использует защищенное соединение.
 
-Сигнатуры:
+Обе функции могут использоваться в запросах `SELECT` и `INSERT`.
+
+**Синтаксис**
 
 ``` sql
 remote('addresses_expr', db, table[, 'user'[, 'password']])
@@ -16,12 +18,40 @@ remoteSecure('addresses_expr', db, table[, 'user'[, 'password']])
 remoteSecure('addresses_expr', db.table[, 'user'[, 'password']])
 ```
 
-`addresses_expr` - выражение, генерирующее адреса удалённых серверов. Это может быть просто один адрес сервера. Адрес сервера - это `хост:порт`, или только `хост`. Хост может быть указан в виде имени сервера, или в виде IPv4 или IPv6 адреса. IPv6 адрес указывается в квадратных скобках. Порт - TCP-порт удалённого сервера. Если порт не указан, используется `tcp_port` из конфигурационного файла сервера (по умолчанию - 9000).
+**Параметры**
+
+- `addresses_expr` — выражение, генерирующее адреса удалённых серверов. Это может быть просто один адрес сервера. Адрес сервера — это `host:port` или только `host`.
+
+    Вместо параметра `host` может быть указано имя сервера или его адрес в формате IPv4 или IPv6. IPv6 адрес указывается в квадратных скобках.
+	
+	`port` — TCP-порт удалённого сервера. Если порт не указан, используется [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) из конфигурационного файла сервера, к которому обратились через функцию `remote` (по умолчанию - 9000), и [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure), к которому обратились через функцию `remoteSecure` (по умолчанию — 9440).
 
-!!! important "Важно"
     С IPv6-адресом обязательно нужно указывать порт.
 
-Примеры:
+    Тип: [String](../../sql-reference/data-types/string.md).
+
+- `db` — имя базы данных. Тип: [String](../../sql-reference/data-types/string.md).
+- `table` — имя таблицы. Тип: [String](../../sql-reference/data-types/string.md).
+- `user` — имя пользователя. Если пользователь не указан, то по умолчанию `default`. Тип: [String](../../sql-reference/data-types/string.md).
+- `password` — пароль. Если пароль не указан, то используется пустой пароль. Тип: [String](../../sql-reference/data-types/string.md).
+- `sharding_key` — ключ шардирования для поддержки распределения данных между узлами. Например: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Тип: [UInt32](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемое значение**
+
+Набор данных с удаленных серверов.
+
+**Использование**
+
+Использование табличной функции `remote` менее оптимально, чем создание таблицы типа `Distributed`, так как в этом случае соединения с серверами устанавливаются заново при каждом запросе. Если указываются имена серверов, то приходится также выполнять поиск сервера по имени. Кроме того, не ведётся сквозной подсчёт ошибок при работе с разными репликами. При обработке большого количества запросов всегда создавайте таблицу типа `Distributed`, использовать табличную функцию `remote` в таких случаях не рекомендуется.
+
+Табличная функция `remote` может быть полезна в следующих случаях:
+
+-   Обращение на конкретный сервер для сравнения данных, отладки и тестирования.
+-   Запросы между разными кластерами ClickHouse для исследований.
+-   Нечастые распределённые запросы, задаваемые вручную.
+-   Распределённые запросы, где набор серверов определяется каждый раз заново.
+
+**Адреса**
 
 ``` text
 example01-01-1
@@ -32,9 +62,7 @@ localhost
 [2a02:6b8:0:1111::11]:9000
 ```
 
-Адреса можно указать через запятую, в этом случае ClickHouse обработает запрос как распределённый, т.е. отправит его по всем указанным адресам как на шарды с разными данными.
-
-Пример:
+Адреса можно указать через запятую. В этом случае ClickHouse обработает запрос как распределённый, т.е. отправит его по всем указанным адресам как на шарды с разными данными. Пример:
 
 ``` text
 example01-01-1,example01-02-1
@@ -46,38 +74,36 @@ example01-01-1,example01-02-1
 example01-0{1,2}-1
 ```
 
-В фигурных скобках может быть указан диапазон (неотрицательных целых) чисел через две точки. В этом случае, диапазон раскрывается в множество значений, генерирующих адреса шардов. Если запись первого числа начинается с нуля, то значения формируются с таким же выравниванием нулями. Предыдущий пример может быть записан следующим образом:
+В фигурных скобках может быть указан диапазон (неотрицательных целых) чисел через две точки. В этом случае диапазон раскрывается в множество значений, генерирующих адреса шардов. Если запись первого числа начинается с нуля, то значения формируются с таким же выравниванием нулями. Предыдущий пример может быть записан следующим образом:
 
 ``` text
 example01-{01..02}-1
 ```
 
-При наличии нескольких пар фигурных скобок, генерируется прямое произведение соответствующих множеств.
+При наличии нескольких пар фигурных скобок генерируется прямое произведение соответствующих множеств.
 
-Адреса или их фрагменты в фигурных скобках можно указать через символ \|. В этом случае, соответствующие множества адресов понимаются как реплики - запрос будет отправлен на первую живую реплику. При этом, реплики перебираются в порядке, согласно текущей настройке [load_balancing](../../operations/settings/settings.md).
-
-Пример:
+Адреса или их фрагменты в фигурных скобках можно указать через символ \|. В этом случае соответствующие множества адресов понимаются как реплики — запрос будет отправлен на первую живую реплику. При этом реплики перебираются в порядке, согласно текущей настройке [load_balancing](../../operations/settings/settings.md#settings-load_balancing). В этом примере указаны два шарда, в каждом из которых имеются две реплики:
 
 ``` text
 example01-{01..02}-{1|2}
 ```
 
-В этом примере указано два шарда, в каждом из которых имеется две реплики.
+Количество генерируемых адресов ограничено константой. Сейчас это 1000 адресов.
 
-Количество генерируемых адресов ограничено константой - сейчас это 1000 штук.
+**Примеры**
 
-Использование табличной функции `remote` менее оптимально, чем создание таблицы типа `Distributed`, так как в этом случае, соединения с серверами устанавливаются заново при каждом запросе, в случае задания имён хостов, делается резолвинг имён, а также не ведётся подсчёт ошибок при работе с разными репликами. При обработке большого количества запросов, всегда создавайте `Distributed` таблицу заранее, не используйте табличную функцию `remote`.
+Выборка данных с удаленного сервера:
 
-Табличная функция `remote` может быть полезна для следующих случаях:
+``` sql
+SELECT * FROM remote('127.0.0.1', db.remote_engine_table) LIMIT 3;
+```
 
--   обращение на конкретный сервер в целях сравнения данных, отладки и тестирования;
--   запросы между разными кластерами ClickHouse в целях исследований;
--   нечастых распределённых запросов, задаваемых вручную;
--   распределённых запросов, где набор серверов определяется каждый раз заново.
+Вставка данных с удаленного сервера в таблицу:
 
-Если пользователь не задан,то используется `default`.
-Если пароль не задан, то используется пустой пароль.
+``` sql
+CREATE TABLE remote_table (name String, value UInt32) ENGINE=Memory;
+INSERT INTO FUNCTION remote('127.0.0.1', currentDatabase(), 'remote_table') VALUES ('test', 42);
+SELECT * FROM remote_table;
+```
 
-`remoteSecure` - аналогично функции `remote`, но с соединением по шифрованному каналу. Порт по умолчанию - `tcp_port_secure` из конфига или 9440.
-
-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/remote/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/remote/) <!--hide-->
diff --git a/docs/ru/sql-reference/table-functions/url.md b/docs/ru/sql-reference/table-functions/url.md
index 0cd7c24c663..043a9231e75 100644
--- a/docs/ru/sql-reference/table-functions/url.md
+++ b/docs/ru/sql-reference/table-functions/url.md
@@ -5,21 +5,40 @@ toc_title: url
 
 # url {#url}
 
-`url(URL, format, structure)` - возвращает таблицу со столбцами, указанными в
-`structure`, созданную из данных находящихся по `URL` в формате `format`.
+Функция `url` берет данные по указанному адресу `URL` и создает из них таблицу указанной структуры со столбцами указанного формата. 
 
-URL - адрес, по которому сервер принимает `GET` и/или `POST` запросы по
-протоколу HTTP или HTTPS.
+Функция `url` может быть использована в запросах `SELECT` и `INSERT` с таблицами на движке [URL](../../engines/table-engines/special/url.md).
 
-format - [формат](../../interfaces/formats.md#formats) данных.
-
-structure - структура таблицы в форме `'UserID UInt64, Name String'`. Определяет имена и типы столбцов.
-
-**Пример**
+**Синтаксис**
 
 ``` sql
--- получение 3-х строк таблицы, состоящей из двух колонк типа String и UInt32 от сервера, отдающего данные в формате CSV
-SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3
+url(URL, format, structure)
 ```
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/url/) <!--hide-->
+**Параметры**
+
+- `URL` — HTTP или HTTPS-адрес сервера, который может принимать запросы `GET` или `POST` (для запросов `SELECT` или `INSERT` соответственно). Тип: [String](../../sql-reference/data-types/string.md).
+- `format` — [формат](../../interfaces/formats.md#formats) данных. Тип: [String](../../sql-reference/data-types/string.md).
+- `structure` — структура таблицы в формате `'UserID UInt64, Name String'`. Определяет имена и типы столбцов. Тип: [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+Таблица с указанными форматом и структурой, а также с данными, полученными из указанного адреса `URL`.
+
+**Примеры**
+
+Получение с HTTP-сервера первых 3 строк таблицы с данными в формате [CSV](../../interfaces/formats.md/#csv), содержащей столбцы типа [String](../../sql-reference/data-types/string.md) и [UInt32](../../sql-reference/data-types/int-uint.md).
+
+``` sql
+SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3;
+```
+
+Вставка данных в таблицу:
+
+``` sql
+CREATE TABLE test_table (column1 String, column2 UInt32) ENGINE=Memory;
+INSERT INTO FUNCTION url('http://127.0.0.1:8123/?query=INSERT+INTO+test_table+FORMAT+CSV', 'CSV', 'column1 String, column2 UInt32') VALUES ('http interface', 42);
+SELECT * FROM test_table;
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/url/) <!--hide-->
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index 8404586d394..ef72624e7ab 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -66,6 +66,7 @@ namespace ErrorCodes
     extern const int CANNOT_OPEN_FILE;
     extern const int SYSTEM_ERROR;
     extern const int NOT_ENOUGH_SPACE;
+    extern const int CANNOT_KILL;
 }
 
 }
@@ -886,6 +887,27 @@ namespace
                 fmt::print("Sent kill signal.\n", pid);
             else
                 throwFromErrno("Cannot send kill signal", ErrorCodes::SYSTEM_ERROR);
+
+            /// Wait for the process (100 seconds).
+            constexpr size_t num_kill_check_tries = 1000;
+            constexpr size_t kill_check_delay_ms = 100;
+            for (size_t i = 0; i < num_kill_check_tries; ++i)
+            {
+                fmt::print("Waiting for server to be killed\n");
+                if (!isRunning(pid_file))
+                {
+                    fmt::print("Server exited\n");
+                    break;
+                }
+                sleepForMilliseconds(kill_check_delay_ms);
+            }
+
+            if (isRunning(pid_file))
+            {
+                throw Exception(ErrorCodes::CANNOT_KILL,
+                    "The server process still exists after %zu ms",
+                    num_kill_check_tries, kill_check_delay_ms);
+            }
         }
 
         return 0;
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index f501e182cb7..72db8f59494 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -82,6 +82,7 @@
 
 #if defined(OS_LINUX)
 #    include <sys/mman.h>
+#    include <sys/ptrace.h>
 #    include <Common/hasLinuxCapability.h>
 #endif
 
@@ -480,16 +481,26 @@ int Server::main(const std::vector<std::string> & /*args*/)
         }
         else
         {
-            throw Exception(ErrorCodes::CORRUPTED_DATA,
-                "Calculated checksum of the ClickHouse binary ({0}) does not correspond"
-                " to the reference checksum stored in the binary ({1})."
-                " It may indicate one of the following:"
-                " - the file {2} was changed just after startup;"
-                " - the file {2} is damaged on disk due to faulty hardware;"
-                " - the loaded executable is damaged in memory due to faulty hardware;"
-                " - the file {2} was intentionally modified;"
-                " - logical error in code."
-                , calculated_binary_hash, stored_binary_hash, executable_path);
+            /// If program is run under debugger, ptrace will fail.
+            if (ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == -1)
+            {
+                /// Program is run under debugger. Modification of it's binary image is ok for breakpoints.
+                LOG_WARNING(log, "Server is run under debugger and its binary image is modified (most likely with breakpoints).",
+                    calculated_binary_hash);
+            }
+            else
+            {
+                throw Exception(ErrorCodes::CORRUPTED_DATA,
+                    "Calculated checksum of the ClickHouse binary ({0}) does not correspond"
+                    " to the reference checksum stored in the binary ({1})."
+                    " It may indicate one of the following:"
+                    " - the file {2} was changed just after startup;"
+                    " - the file {2} is damaged on disk due to faulty hardware;"
+                    " - the loaded executable is damaged in memory due to faulty hardware;"
+                    " - the file {2} was intentionally modified;"
+                    " - logical error in code."
+                    , calculated_binary_hash, stored_binary_hash, executable_path);
+            }
         }
     }
     else
@@ -1006,17 +1017,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
         LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created"
             " (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe).");
 
-    if (has_zookeeper && config().has("distributed_ddl"))
-    {
-        /// DDL worker should be started after all tables were loaded
-        String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/");
-        int pool_size = config().getInt("distributed_ddl.pool_size", 1);
-        if (pool_size < 1)
-            throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
-        global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(),
-                                                                 "distributed_ddl", "DDLWorker", &CurrentMetrics::MaxDDLEntryID));
-    }
-
     std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
     if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache"))
     {
@@ -1298,6 +1298,37 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 std::thread::hardware_concurrency());
         }
 
+        /// try to load dictionaries immediately, throw on error and die
+        ext::scope_guard dictionaries_xmls, models_xmls;
+        try
+        {
+            if (!config().getBool("dictionaries_lazy_load", true))
+            {
+                global_context->tryCreateEmbeddedDictionaries();
+                global_context->getExternalDictionariesLoader().enableAlwaysLoadEverything(true);
+            }
+            dictionaries_xmls = global_context->getExternalDictionariesLoader().addConfigRepository(
+                std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "dictionaries_config"));
+            models_xmls = global_context->getExternalModelsLoader().addConfigRepository(
+                std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "models_config"));
+        }
+        catch (...)
+        {
+            LOG_ERROR(log, "Caught exception while loading dictionaries.");
+            throw;
+        }
+
+        if (has_zookeeper && config().has("distributed_ddl"))
+        {
+            /// DDL worker should be started after all tables were loaded
+            String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/");
+            int pool_size = config().getInt("distributed_ddl.pool_size", 1);
+            if (pool_size < 1)
+                throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+            global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(),
+                                                                     "distributed_ddl", "DDLWorker", &CurrentMetrics::MaxDDLEntryID));
+        }
+
         LOG_INFO(log, "Ready for connections.");
 
         SCOPE_EXIT({
@@ -1347,26 +1378,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
             }
         });
 
-        /// try to load dictionaries immediately, throw on error and die
-        ext::scope_guard dictionaries_xmls, models_xmls;
-        try
-        {
-            if (!config().getBool("dictionaries_lazy_load", true))
-            {
-                global_context->tryCreateEmbeddedDictionaries();
-                global_context->getExternalDictionariesLoader().enableAlwaysLoadEverything(true);
-            }
-            dictionaries_xmls = global_context->getExternalDictionariesLoader().addConfigRepository(
-                std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "dictionaries_config"));
-            models_xmls = global_context->getExternalModelsLoader().addConfigRepository(
-                std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "models_config"));
-        }
-        catch (...)
-        {
-            LOG_ERROR(log, "Caught exception while loading dictionaries.");
-            throw;
-        }
-
         std::vector<std::unique_ptr<MetricsTransmitter>> metrics_transmitters;
         for (const auto & graphite_key : DB::getMultipleKeysFromConfig(config(), "", "graphite"))
         {
diff --git a/programs/server/config.xml b/programs/server/config.xml
index ba9b8b04b05..b72cf53ca03 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -892,6 +892,19 @@
 
         <!-- Controls how much ON CLUSTER queries can be run simultaneously. -->
         <!-- <pool_size>1</pool_size> -->
+
+        <!--
+             Cleanup settings (active tasks will not be removed)
+        -->
+
+        <!-- Controls task TTL (default 1 week) -->
+        <!-- <task_max_lifetime>604800</task_max_lifetime> -->
+
+        <!-- Controls how often cleanup should be performed (in seconds) -->
+        <!-- <cleanup_delay_period>60</cleanup_delay_period> -->
+
+        <!-- Controls how many tasks could be in the queue -->
+        <!-- <max_tasks_in_queue>1000</max_tasks_in_queue> -->
     </distributed_ddl>
 
     <!-- Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp
index 061077dd8fa..c0011b6ebec 100644
--- a/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp
@@ -106,6 +106,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
     bool has_null_arguments) const
 {
     String name = getAliasToOrName(name_param);
+    bool is_case_insensitive = false;
     Value found;
 
     /// Find by exact match.
@@ -115,7 +116,10 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
     }
 
     if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
+    {
         found = jt->second;
+        is_case_insensitive = true;
+    }
 
     const Context * query_context = nullptr;
     if (CurrentThread::isInitialized())
@@ -126,7 +130,8 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
         out_properties = found.properties;
 
         if (query_context && query_context->getSettingsRef().log_queries)
-            query_context->addQueryFactoriesInfo(Context::QueryLogFactories::AggregateFunction, name);
+            query_context->addQueryFactoriesInfo(
+                    Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? Poco::toLower(name) : name);
 
         /// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method.
         if (!out_properties.returns_default_when_only_null && has_null_arguments)
diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h
index f6a473546f9..3233199f01e 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.h
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.h
@@ -118,6 +118,8 @@ public:
                 WhichDataType value_type_to_check(value_type);
 
                 /// Do not promote decimal because of implementation issues of this function design
+                /// Currently we cannot get result column type in case of decimal we cannot get decimal scale
+                /// in method void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
                 /// If we decide to make this function more efficient we should promote decimal type during summ
                 if (value_type_to_check.isDecimal())
                     result_type = value_type_without_nullable;
diff --git a/src/AggregateFunctions/ReservoirSamplerDeterministic.h b/src/AggregateFunctions/ReservoirSamplerDeterministic.h
index 3b7817e9308..3013a17e1ca 100644
--- a/src/AggregateFunctions/ReservoirSamplerDeterministic.h
+++ b/src/AggregateFunctions/ReservoirSamplerDeterministic.h
@@ -56,7 +56,7 @@ class ReservoirSamplerDeterministic
 {
     bool good(const UInt32 hash)
     {
-        return hash == ((hash >> skip_degree) << skip_degree);
+        return !(hash & skip_mask);
     }
 
 public:
@@ -135,11 +135,8 @@ public:
             throw Poco::Exception("Cannot merge ReservoirSamplerDeterministic's with different max sample size");
         sorted = false;
 
-        if (b.skip_degree > skip_degree)
-        {
-            skip_degree = b.skip_degree;
-            thinOut();
-        }
+        if (skip_degree < b.skip_degree)
+            setSkipDegree(b.skip_degree);
 
         for (const auto & sample : b.samples)
             if (good(sample.second))
@@ -184,22 +181,39 @@ private:
     size_t total_values = 0;   /// How many values were inserted (regardless if they remain in sample or not).
     bool sorted = false;
     Array samples;
-    UInt8 skip_degree = 0;     /// The number N determining that we save only one per 2^N elements in average.
+
+    /// The number N determining that we store only one per 2^N elements in average.
+    UInt8 skip_degree = 0;
+
+    /// skip_mask is calculated as (2 ^ skip_degree - 1). We store an element only if (hash & skip_mask) == 0.
+    /// For example, if skip_degree==0 then skip_mask==0 means we store each element;
+    /// if skip_degree==1 then skip_mask==0b0001 means we store one per 2 elements in average;
+    /// if skip_degree==4 then skip_mask==0b1111 means we store one per 16 elements in average.
+    UInt32 skip_mask = 0;
 
     void insertImpl(const T & v, const UInt32 hash)
     {
         /// Make a room for plus one element.
         while (samples.size() >= max_sample_size)
-        {
-            ++skip_degree;
-            if (skip_degree > detail::MAX_SKIP_DEGREE)
-                throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED};
-            thinOut();
-        }
+            setSkipDegree(skip_degree + 1);
 
         samples.emplace_back(v, hash);
     }
 
+    void setSkipDegree(UInt8 skip_degree_)
+    {
+        if (skip_degree_ == skip_degree)
+            return;
+        if (skip_degree_ > detail::MAX_SKIP_DEGREE)
+            throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED};
+        skip_degree = skip_degree_;
+        if (skip_degree == detail::MAX_SKIP_DEGREE)
+            skip_mask = static_cast<UInt32>(-1);
+        else
+            skip_mask = (1 << skip_degree) - 1;
+        thinOut();
+    }
+
     void thinOut()
     {
         samples.resize(std::distance(samples.begin(),
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 215a13cce1a..b80bcfdf4d4 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -101,8 +101,8 @@ endif()
 list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD})
 list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
 
-list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp)
-list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h)
+list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp Functions/FunctionsLogical.cpp)
+list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h Functions/FunctionsLogical.h)
 
 list (APPEND dbms_sources
     AggregateFunctions/AggregateFunctionFactory.cpp
diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 164b9565633..80d44a336a5 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -109,6 +109,8 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
         }
 
         in = std::make_shared<ReadBufferFromPocoSocket>(*socket);
+        in->setAsyncCallback(std::move(async_callback));
+
         out = std::make_shared<WriteBufferFromPocoSocket>(*socket);
 
         connected = true;
@@ -753,15 +755,8 @@ std::optional<UInt64> Connection::checkPacket(size_t timeout_microseconds)
 }
 
 
-Packet Connection::receivePacket(std::function<void(Poco::Net::Socket &)> async_callback)
+Packet Connection::receivePacket()
 {
-    in->setAsyncCallback(std::move(async_callback));
-    SCOPE_EXIT({
-        /// disconnect() will reset "in".
-        if (in)
-            in->setAsyncCallback({});
-    });
-
     try
     {
         Packet res;
diff --git a/src/Client/Connection.h b/src/Client/Connection.h
index 2d24b143d7a..8e653dc4153 100644
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@@ -27,7 +27,6 @@
 #include <atomic>
 #include <optional>
 
-
 namespace DB
 {
 
@@ -175,8 +174,7 @@ public:
     std::optional<UInt64> checkPacket(size_t timeout_microseconds = 0);
 
     /// Receive packet from server.
-    /// Each time read blocks and async_callback is set, it will be called. You can poll socket inside it.
-    Packet receivePacket(std::function<void(Poco::Net::Socket &)> async_callback = {});
+    Packet receivePacket();
 
     /// If not connected yet, or if connection is broken - then connect. If cannot connect - throw an exception.
     void forceConnected(const ConnectionTimeouts & timeouts);
@@ -195,6 +193,16 @@ public:
     size_t outBytesCount() const { return out ? out->count() : 0; }
     size_t inBytesCount() const { return in ? in->count() : 0; }
 
+    Poco::Net::Socket * getSocket() { return socket.get(); }
+
+    /// Each time read from socket blocks and async_callback is set, it will be called. You can poll socket inside it.
+    void setAsyncCallback(AsyncCallback async_callback_)
+    {
+        async_callback = std::move(async_callback_);
+        if (in)
+            in->setAsyncCallback(std::move(async_callback));
+    }
+
 private:
     String host;
     UInt16 port;
@@ -282,6 +290,8 @@ private:
 
     LoggerWrapper log_wrapper;
 
+    AsyncCallback async_callback = {};
+
     void connect(const ConnectionTimeouts & timeouts);
     void sendHello();
     void receiveHello();
@@ -307,4 +317,20 @@ private:
     [[noreturn]] void throwUnexpectedPacket(UInt64 packet_type, const char * expected) const;
 };
 
+class AsyncCallbackSetter
+{
+public:
+    AsyncCallbackSetter(Connection * connection_, AsyncCallback async_callback) : connection(connection_)
+    {
+        connection->setAsyncCallback(std::move(async_callback));
+    }
+
+    ~AsyncCallbackSetter()
+    {
+        connection->setAsyncCallback({});
+    }
+private:
+    Connection * connection;
+};
+
 }
diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp
new file mode 100644
index 00000000000..2532035fabd
--- /dev/null
+++ b/src/Client/ConnectionEstablisher.cpp
@@ -0,0 +1,239 @@
+#include <Client/ConnectionEstablisher.h>
+#include <Common/quoteString.h>
+#include <Common/ProfileEvents.h>
+
+namespace ProfileEvents
+{
+    extern const Event DistributedConnectionMissingTable;
+    extern const Event DistributedConnectionStaleReplica;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ATTEMPT_TO_READ_AFTER_EOF;
+    extern const int NETWORK_ERROR;
+    extern const int SOCKET_TIMEOUT;
+}
+
+ConnectionEstablisher::ConnectionEstablisher(
+    IConnectionPool * pool_,
+    const ConnectionTimeouts * timeouts_,
+    const Settings * settings_,
+    Poco::Logger * log_,
+    const QualifiedTableName * table_to_check_)
+    : pool(pool_), timeouts(timeouts_), settings(settings_), log(log_), table_to_check(table_to_check_), is_finished(false)
+{
+}
+
+void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::string & fail_message)
+{
+    is_finished = false;
+    SCOPE_EXIT(is_finished = true);
+    try
+    {
+        result.entry = pool->get(*timeouts, settings, /* force_connected = */ false);
+        AsyncCallbackSetter async_setter(&*result.entry, std::move(async_callback));
+
+        UInt64 server_revision = 0;
+        if (table_to_check)
+            server_revision = result.entry->getServerRevision(*timeouts);
+
+        if (!table_to_check || server_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
+        {
+            result.entry->forceConnected(*timeouts);
+            result.is_usable = true;
+            result.is_up_to_date = true;
+            return;
+        }
+
+        /// Only status of the remote table corresponding to the Distributed table is taken into account.
+        /// TODO: request status for joined tables also.
+        TablesStatusRequest status_request;
+        status_request.tables.emplace(*table_to_check);
+
+        TablesStatusResponse status_response = result.entry->getTablesStatus(*timeouts, status_request);
+        auto table_status_it = status_response.table_states_by_id.find(*table_to_check);
+        if (table_status_it == status_response.table_states_by_id.end())
+        {
+            const char * message_pattern = "There is no table {}.{} on server: {}";
+            fail_message = fmt::format(message_pattern, backQuote(table_to_check->database), backQuote(table_to_check->table), result.entry->getDescription());
+            LOG_WARNING(log, fail_message);
+            ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable);
+            return;
+        }
+
+        result.is_usable = true;
+
+        UInt64 max_allowed_delay = settings ? UInt64(settings->max_replica_delay_for_distributed_queries) : 0;
+        if (!max_allowed_delay)
+        {
+            result.is_up_to_date = true;
+            return;
+        }
+
+        UInt32 delay = table_status_it->second.absolute_delay;
+
+        if (delay < max_allowed_delay)
+            result.is_up_to_date = true;
+        else
+        {
+            result.is_up_to_date = false;
+            result.staleness = delay;
+
+            LOG_TRACE(log, "Server {} has unacceptable replica delay for table {}.{}: {}", result.entry->getDescription(), table_to_check->database, table_to_check->table, delay);
+            ProfileEvents::increment(ProfileEvents::DistributedConnectionStaleReplica);
+        }
+    }
+    catch (const Exception & e)
+    {
+        if (e.code() != ErrorCodes::NETWORK_ERROR && e.code() != ErrorCodes::SOCKET_TIMEOUT
+            && e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
+            throw;
+
+        fail_message = getCurrentExceptionMessage(/* with_stacktrace = */ false);
+
+        if (!result.entry.isNull())
+        {
+            result.entry->disconnect();
+            result.reset();
+        }
+    }
+}
+
+#if defined(OS_LINUX)
+
+ConnectionEstablisherAsync::ConnectionEstablisherAsync(
+    IConnectionPool * pool_,
+    const ConnectionTimeouts * timeouts_,
+    const Settings * settings_,
+    Poco::Logger * log_,
+    const QualifiedTableName * table_to_check_)
+    : connection_establisher(pool_, timeouts_, settings_, log_, table_to_check_)
+{
+    epoll.add(receive_timeout.getDescriptor());
+}
+
+void ConnectionEstablisherAsync::Routine::ReadCallback::operator()(int fd, const Poco::Timespan & timeout, const std::string &)
+{
+    /// Check if it's the first time and we need to add socket fd to epoll.
+    if (connection_establisher_async.socket_fd == -1)
+    {
+        connection_establisher_async.epoll.add(fd);
+        connection_establisher_async.socket_fd = fd;
+    }
+
+    connection_establisher_async.receive_timeout.setRelative(timeout);
+    fiber = std::move(fiber).resume();
+    connection_establisher_async.receive_timeout.reset();
+}
+
+Fiber ConnectionEstablisherAsync::Routine::operator()(Fiber && sink)
+{
+    try
+    {
+        connection_establisher_async.connection_establisher.setAsyncCallback(ReadCallback{connection_establisher_async, sink});
+        connection_establisher_async.connection_establisher.run(connection_establisher_async.result, connection_establisher_async.fail_message);
+    }
+    catch (const boost::context::detail::forced_unwind &)
+    {
+        /// This exception is thrown by fiber implementation in case if fiber is being deleted but hasn't exited
+        /// It should not be caught or it will segfault.
+        /// Other exceptions must be caught
+        throw;
+    }
+    catch (...)
+    {
+        connection_establisher_async.exception = std::current_exception();
+    }
+
+    return std::move(sink);
+}
+
+std::variant<int, ConnectionEstablisher::TryResult> ConnectionEstablisherAsync::resume()
+{
+    if (!fiber_created)
+    {
+        reset();
+        fiber = boost::context::fiber(std::allocator_arg_t(), fiber_stack, Routine{*this});
+        fiber_created = true;
+    } else if (!checkReceiveTimeout())
+        return result;
+
+    fiber = std::move(fiber).resume();
+
+    if (exception)
+        std::rethrow_exception(std::move(exception));
+
+    if (connection_establisher.isFinished())
+    {
+        destroyFiber();
+        return result;
+    }
+
+    return epoll.getFileDescriptor();
+}
+
+bool ConnectionEstablisherAsync::checkReceiveTimeout()
+{
+    bool is_socket_ready = false;
+    bool is_receive_timeout_alarmed = false;
+
+    epoll_event events[2];
+    events[0].data.fd = events[1].data.fd = -1;
+    size_t ready_count = epoll.getManyReady(2, events, false);
+    for (size_t i = 0; i != ready_count; ++i)
+    {
+        if (events[i].data.fd == socket_fd)
+            is_socket_ready = true;
+        if (events[i].data.fd == receive_timeout.getDescriptor())
+            is_receive_timeout_alarmed = true;
+    }
+
+    if (is_receive_timeout_alarmed && !is_socket_ready)
+    {
+        destroyFiber();
+        /// In not async case this exception would be thrown and caught in ConnectionEstablisher::run,
+        /// but in async case we process timeout outside and cannot throw exception. So, we just save fail message.
+        fail_message = "Timeout exceeded while reading from socket (" + result.entry->getDescription() + ")";
+        epoll.remove(socket_fd);
+        resetResult();
+        return false;
+    }
+
+    return true;
+}
+
+void ConnectionEstablisherAsync::cancel()
+{
+    destroyFiber();
+    reset();
+}
+
+void ConnectionEstablisherAsync::reset()
+{
+    resetResult();
+    fail_message.clear();
+    socket_fd = -1;
+}
+
+void ConnectionEstablisherAsync::resetResult()
+{
+    if (!result.entry.isNull())
+    {
+        result.entry->disconnect();
+        result.reset();
+    }
+}
+
+void ConnectionEstablisherAsync::destroyFiber()
+{
+    Fiber to_destroy = std::move(fiber);
+    fiber_created = false;
+}
+
+#endif
+
+}
diff --git a/src/Client/ConnectionEstablisher.h b/src/Client/ConnectionEstablisher.h
new file mode 100644
index 00000000000..1096452ebce
--- /dev/null
+++ b/src/Client/ConnectionEstablisher.h
@@ -0,0 +1,131 @@
+#pragma once
+
+#include <variant>
+
+#include <Common/Epoll.h>
+#include <Common/Fiber.h>
+#include <Common/FiberStack.h>
+#include <Common/TimerDescriptor.h>
+#include <Common/PoolWithFailoverBase.h>
+#include <Client/ConnectionPool.h>
+
+namespace DB
+{
+
+/// Class for establishing connection to the replica. It supports setting up
+/// an async callback that will be called when reading from socket blocks.
+class ConnectionEstablisher
+{
+public:
+    using TryResult = PoolWithFailoverBase<IConnectionPool>::TryResult;
+
+    ConnectionEstablisher(IConnectionPool * pool_,
+                          const ConnectionTimeouts * timeouts_,
+                          const Settings * settings_,
+                          Poco::Logger * log,
+                          const QualifiedTableName * table_to_check = nullptr);
+
+    /// Establish connection and save it in result, write possible exception message in fail_message.
+    void run(TryResult & result, std::string & fail_message);
+
+    /// Set async callback that will be called when reading from socket blocks.
+    void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); }
+
+    bool isFinished() const { return is_finished; }
+
+private:
+    IConnectionPool * pool;
+    const ConnectionTimeouts * timeouts;
+    const Settings * settings;
+    Poco::Logger * log;
+    const QualifiedTableName * table_to_check;
+
+    bool is_finished;
+    AsyncCallback async_callback = {};
+
+};
+
+#if defined(OS_LINUX)
+
+/// Class for nonblocking establishing connection to the replica.
+/// It runs establishing connection process in fiber and sets special
+/// read callback which is called when reading from socket blocks.
+/// When read callback is called, socket and receive timeout are added in epoll
+/// and execution returns to the main program.
+/// So, you can poll this epoll file descriptor to determine when to resume.
+class ConnectionEstablisherAsync
+{
+public:
+    using TryResult = PoolWithFailoverBase<IConnectionPool>::TryResult;
+
+    ConnectionEstablisherAsync(IConnectionPool * pool_,
+                          const ConnectionTimeouts * timeouts_,
+                          const Settings * settings_,
+                          Poco::Logger * log_,
+                          const QualifiedTableName * table_to_check = nullptr);
+
+    /// Resume establishing connection. If the process was not finished,
+    /// return file descriptor (you can add it in epoll and poll it,
+    /// when this fd become ready, call resume again),
+    /// if the process was failed or finished, return it's result,
+    std::variant<int, TryResult> resume();
+
+    /// Cancel establishing connections. Fiber will be destroyed,
+    /// class will be set in initial stage.
+    void cancel();
+
+    TryResult getResult() const { return result; }
+
+    const std::string & getFailMessage() const { return fail_message; }
+
+private:
+    /// When epoll file descriptor is ready, check if it's an expired timeout.
+    /// Return false if receive timeout expired and socket is not ready, return true otherwise.
+    bool checkReceiveTimeout();
+
+    struct Routine
+    {
+        ConnectionEstablisherAsync & connection_establisher_async;
+
+        struct ReadCallback
+        {
+            ConnectionEstablisherAsync & connection_establisher_async;
+            Fiber & fiber;
+
+            void operator()(int fd, const Poco::Timespan & timeout, const std::string &);
+        };
+
+        Fiber operator()(Fiber && sink);
+    };
+
+    void reset();
+
+    void resetResult();
+
+    void destroyFiber();
+
+    ConnectionEstablisher connection_establisher;
+    TryResult result;
+    std::string fail_message;
+
+    Fiber fiber;
+    FiberStack fiber_stack;
+
+    /// We use timer descriptor for checking socket receive timeout.
+    TimerDescriptor receive_timeout;
+
+    /// In read callback we add socket file descriptor and timer descriptor with receive timeout
+    /// in epoll, so we can return epoll file descriptor outside for polling.
+    Epoll epoll;
+    int socket_fd = -1;
+    std::string socket_description;
+
+    /// If and exception occurred in fiber resume, we save it and rethrow.
+    std::exception_ptr exception;
+
+    bool fiber_created = false;
+};
+
+#endif
+
+}
diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp
index 1ca61dc8059..0c61b2bb49d 100644
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@@ -1,4 +1,5 @@
 #include <Client/ConnectionPoolWithFailover.h>
+#include <Client/ConnectionEstablisher.h>
 
 #include <Poco/Net/NetException.h>
 #include <Poco/Net/DNS.h>
@@ -23,9 +24,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int ATTEMPT_TO_READ_AFTER_EOF;
-    extern const int NETWORK_ERROR;
-    extern const int SOCKET_TIMEOUT;
     extern const int LOGICAL_ERROR;
 }
 
@@ -172,6 +170,43 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
     return getManyImpl(settings, pool_mode, try_get_entry);
 }
 
+ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings)
+{
+    size_t offset = 0;
+    if (settings)
+        offset = settings->load_balancing_first_offset % nested_pools.size();
+
+    GetPriorityFunc get_priority;
+    switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
+    {
+        case LoadBalancing::NEAREST_HOSTNAME:
+            get_priority = [&](size_t i) { return hostname_differences[i]; };
+            break;
+        case LoadBalancing::IN_ORDER:
+            get_priority = [](size_t i) { return i; };
+            break;
+        case LoadBalancing::RANDOM:
+            break;
+        case LoadBalancing::FIRST_OR_RANDOM:
+            get_priority = [offset](size_t i) -> size_t { return i != offset; };
+            break;
+        case LoadBalancing::ROUND_ROBIN:
+            if (last_used >= nested_pools.size())
+                last_used = 0;
+            ++last_used;
+            /* Consider nested_pools.size() equals to 5
+             * last_used = 1 -> get_priority: 0 1 2 3 4
+             * last_used = 2 -> get_priority: 5 0 1 2 3
+             * last_used = 3 -> get_priority: 5 4 0 1 2
+             * ...
+             * */
+            get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
+            break;
+    }
+
+    return get_priority;
+}
+
 std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(
         const Settings * settings,
         PoolMode pool_mode,
@@ -194,36 +229,7 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
     else
         throw DB::Exception("Unknown pool allocation mode", DB::ErrorCodes::LOGICAL_ERROR);
 
-    size_t offset = 0;
-    if (settings)
-        offset = settings->load_balancing_first_offset % nested_pools.size();
-    GetPriorityFunc get_priority;
-    switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
-    {
-    case LoadBalancing::NEAREST_HOSTNAME:
-        get_priority = [&](size_t i) { return hostname_differences[i]; };
-        break;
-    case LoadBalancing::IN_ORDER:
-        get_priority = [](size_t i) { return i; };
-        break;
-    case LoadBalancing::RANDOM:
-        break;
-    case LoadBalancing::FIRST_OR_RANDOM:
-        get_priority = [offset](size_t i) -> size_t { return i != offset; };
-        break;
-    case LoadBalancing::ROUND_ROBIN:
-        if (last_used >= nested_pools.size())
-            last_used = 0;
-        ++last_used;
-        /* Consider nested_pools.size() equals to 5
-         * last_used = 1 -> get_priority: 0 1 2 3 4
-         * last_used = 2 -> get_priority: 5 0 1 2 3
-         * last_used = 3 -> get_priority: 5 4 0 1 2
-         * ...
-         * */
-        get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
-        break;
-    }
+    GetPriorityFunc get_priority = makeGetPriorityFunc(settings);
 
     UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
     bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;
@@ -241,77 +247,17 @@ ConnectionPoolWithFailover::tryGetEntry(
         const Settings * settings,
         const QualifiedTableName * table_to_check)
 {
+    ConnectionEstablisher connection_establisher(&pool, &timeouts, settings, log, table_to_check);
     TryResult result;
-    try
-    {
-        result.entry = pool.get(timeouts, settings, /* force_connected = */ false);
-
-        UInt64 server_revision = 0;
-        if (table_to_check)
-            server_revision = result.entry->getServerRevision(timeouts);
-
-        if (!table_to_check || server_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
-        {
-            result.entry->forceConnected(timeouts);
-            result.is_usable = true;
-            result.is_up_to_date = true;
-            return result;
-        }
-
-        /// Only status of the remote table corresponding to the Distributed table is taken into account.
-        /// TODO: request status for joined tables also.
-        TablesStatusRequest status_request;
-        status_request.tables.emplace(*table_to_check);
-
-        TablesStatusResponse status_response = result.entry->getTablesStatus(timeouts, status_request);
-        auto table_status_it = status_response.table_states_by_id.find(*table_to_check);
-        if (table_status_it == status_response.table_states_by_id.end())
-        {
-            const char * message_pattern = "There is no table {}.{} on server: {}";
-            fail_message = fmt::format(message_pattern, backQuote(table_to_check->database), backQuote(table_to_check->table), result.entry->getDescription());
-            LOG_WARNING(log, fail_message);
-            ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable);
-
-            return result;
-        }
-
-        result.is_usable = true;
-
-        UInt64 max_allowed_delay = settings ? UInt64(settings->max_replica_delay_for_distributed_queries) : 0;
-        if (!max_allowed_delay)
-        {
-            result.is_up_to_date = true;
-            return result;
-        }
-
-        UInt32 delay = table_status_it->second.absolute_delay;
-
-        if (delay < max_allowed_delay)
-            result.is_up_to_date = true;
-        else
-        {
-            result.is_up_to_date = false;
-            result.staleness = delay;
-
-            LOG_TRACE(log, "Server {} has unacceptable replica delay for table {}.{}: {}", result.entry->getDescription(), table_to_check->database, table_to_check->table, delay);
-            ProfileEvents::increment(ProfileEvents::DistributedConnectionStaleReplica);
-        }
-    }
-    catch (const Exception & e)
-    {
-        if (e.code() != ErrorCodes::NETWORK_ERROR && e.code() != ErrorCodes::SOCKET_TIMEOUT
-            && e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
-            throw;
-
-        fail_message = getCurrentExceptionMessage(/* with_stacktrace = */ false);
-
-        if (!result.entry.isNull())
-        {
-            result.entry->disconnect();
-            result.reset();
-        }
-    }
+    connection_establisher.run(result, fail_message);
     return result;
 }
 
+std::vector<ConnectionPoolWithFailover::Base::ShuffledPool> ConnectionPoolWithFailover::getShuffledPools(const Settings * settings)
+{
+    GetPriorityFunc get_priority = makeGetPriorityFunc(settings);
+    UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
+    return Base::getShuffledPools(max_ignored_errors, get_priority);
+}
+
 }
diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h
index 7d5f713f6a9..023ef863bdf 100644
--- a/src/Client/ConnectionPoolWithFailover.h
+++ b/src/Client/ConnectionPoolWithFailover.h
@@ -80,6 +80,15 @@ public:
     using Status = std::vector<NestedPoolStatus>;
     Status getStatus() const;
 
+    std::vector<Base::ShuffledPool> getShuffledPools(const Settings * settings);
+
+    size_t getMaxErrorCup() const { return Base::max_error_cap; }
+
+    void updateSharedError(std::vector<ShuffledPool> & shuffled_pools)
+    {
+        Base::updateSharedErrorCounts(shuffled_pools);
+    }
+
 private:
     /// Get the values of relevant settings and call Base::getMany()
     std::vector<TryResult> getManyImpl(
@@ -97,6 +106,8 @@ private:
             const Settings * settings,
             const QualifiedTableName * table_to_check = nullptr);
 
+    GetPriorityFunc makeGetPriorityFunc(const Settings * settings);
+
 private:
     std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
     size_t last_used = 0; /// Last used for round_robin policy.
diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp
new file mode 100644
index 00000000000..41397df2e79
--- /dev/null
+++ b/src/Client/HedgedConnections.cpp
@@ -0,0 +1,524 @@
+#if defined(OS_LINUX)
+
+#include <Client/HedgedConnections.h>
+#include <Interpreters/ClientInfo.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int MISMATCH_REPLICAS_DATA_SOURCES;
+    extern const int LOGICAL_ERROR;
+    extern const int SOCKET_TIMEOUT;
+    extern const int ALL_CONNECTION_TRIES_FAILED;
+}
+
+HedgedConnections::HedgedConnections(
+    const ConnectionPoolWithFailoverPtr & pool_,
+    const Settings & settings_,
+    const ConnectionTimeouts & timeouts_,
+    const ThrottlerPtr & throttler_,
+    PoolMode pool_mode,
+    std::shared_ptr<QualifiedTableName> table_to_check_)
+    : hedged_connections_factory(pool_, &settings_, timeouts_, table_to_check_)
+    , settings(settings_)
+    , throttler(throttler_)
+{
+    std::vector<Connection *> connections = hedged_connections_factory.getManyConnections(pool_mode);
+
+    if (connections.empty())
+        return;
+
+    offset_states.reserve(connections.size());
+    for (size_t i = 0; i != connections.size(); ++i)
+    {
+        offset_states.emplace_back();
+        offset_states[i].replicas.emplace_back(connections[i]);
+        offset_states[i].active_connection_count = 1;
+
+        ReplicaState & replica = offset_states[i].replicas.back();
+        replica.connection->setThrottler(throttler_);
+
+        epoll.add(replica.packet_receiver->getFileDescriptor());
+        fd_to_replica_location[replica.packet_receiver->getFileDescriptor()] = ReplicaLocation{i, 0};
+
+        epoll.add(replica.change_replica_timeout.getDescriptor());
+        timeout_fd_to_replica_location[replica.change_replica_timeout.getDescriptor()] = ReplicaLocation{i, 0};
+    }
+
+    active_connection_count = connections.size();
+    offsets_with_disabled_changing_replica = 0;
+    pipeline_for_new_replicas.add([throttler_](ReplicaState & replica_) { replica_.connection->setThrottler(throttler_); });
+}
+
+void HedgedConnections::Pipeline::add(std::function<void(ReplicaState & replica)> send_function)
+{
+    pipeline.push_back(send_function);
+}
+
+void HedgedConnections::Pipeline::run(ReplicaState & replica)
+{
+    for (auto & send_func : pipeline)
+        send_func(replica);
+}
+
+void HedgedConnections::sendScalarsData(Scalars & data)
+{
+    std::lock_guard lock(cancel_mutex);
+
+    if (!sent_query)
+        throw Exception("Cannot send scalars data: query not yet sent.", ErrorCodes::LOGICAL_ERROR);
+
+    auto send_scalars_data = [&data](ReplicaState & replica) { replica.connection->sendScalarsData(data); };
+
+    for (auto & offset_state : offset_states)
+        for (auto & replica : offset_state.replicas)
+            if (replica.connection)
+                send_scalars_data(replica);
+
+    pipeline_for_new_replicas.add(send_scalars_data);
+}
+
+void HedgedConnections::sendExternalTablesData(std::vector<ExternalTablesData> & data)
+{
+    std::lock_guard lock(cancel_mutex);
+
+    if (!sent_query)
+        throw Exception("Cannot send external tables data: query not yet sent.", ErrorCodes::LOGICAL_ERROR);
+
+    if (data.size() != size())
+        throw Exception("Mismatch between replicas and data sources", ErrorCodes::MISMATCH_REPLICAS_DATA_SOURCES);
+
+    auto send_external_tables_data = [&data](ReplicaState & replica) { replica.connection->sendExternalTablesData(data[0]); };
+
+    for (auto & offset_state : offset_states)
+        for (auto & replica : offset_state.replicas)
+            if (replica.connection)
+                send_external_tables_data(replica);
+
+    pipeline_for_new_replicas.add(send_external_tables_data);
+}
+
+void HedgedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
+{
+    std::lock_guard lock(cancel_mutex);
+
+    if (sent_query)
+        throw Exception("Cannot send uuids after query is sent.", ErrorCodes::LOGICAL_ERROR);
+
+    auto send_ignored_part_uuids = [&uuids](ReplicaState & replica) { replica.connection->sendIgnoredPartUUIDs(uuids); };
+
+    for (auto & offset_state : offset_states)
+        for (auto & replica : offset_state.replicas)
+            if (replica.connection)
+                send_ignored_part_uuids(replica);
+
+    pipeline_for_new_replicas.add(send_ignored_part_uuids);
+}
+
+void HedgedConnections::sendQuery(
+    const ConnectionTimeouts & timeouts,
+    const String & query,
+    const String & query_id,
+    UInt64 stage,
+    const ClientInfo & client_info,
+    bool with_pending_data)
+{
+    std::lock_guard lock(cancel_mutex);
+
+    if (sent_query)
+        throw Exception("Query already sent.", ErrorCodes::LOGICAL_ERROR);
+
+    for (auto & offset_state : offset_states)
+    {
+        for (auto & replica : offset_state.replicas)
+        {
+            if (replica.connection->getServerRevision(timeouts) < DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD)
+            {
+                disable_two_level_aggregation = true;
+                break;
+            }
+        }
+        if (disable_two_level_aggregation)
+            break;
+    }
+
+    if (!disable_two_level_aggregation)
+    {
+        /// Tell hedged_connections_factory to skip replicas that doesn't support two-level aggregation.
+        hedged_connections_factory.skipReplicasWithTwoLevelAggregationIncompatibility();
+    }
+
+    auto send_query = [this, timeouts, query, query_id, stage, client_info, with_pending_data](ReplicaState & replica)
+    {
+        Settings modified_settings = settings;
+
+        if (disable_two_level_aggregation)
+        {
+            /// Disable two-level aggregation due to version incompatibility.
+            modified_settings.group_by_two_level_threshold = 0;
+            modified_settings.group_by_two_level_threshold_bytes = 0;
+        }
+
+        if (offset_states.size() > 1)
+        {
+            modified_settings.parallel_replicas_count = offset_states.size();
+            modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset;
+        }
+
+        replica.connection->sendQuery(timeouts, query, query_id, stage, &modified_settings, &client_info, with_pending_data);
+        replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout);
+    };
+
+    for (auto & offset_status : offset_states)
+        for (auto & replica : offset_status.replicas)
+            send_query(replica);
+
+    pipeline_for_new_replicas.add(send_query);
+    sent_query = true;
+}
+
+void HedgedConnections::disconnect()
+{
+    std::lock_guard lock(cancel_mutex);
+
+    for (auto & offset_status : offset_states)
+        for (auto & replica : offset_status.replicas)
+            if (replica.connection)
+                finishProcessReplica(replica, true);
+
+    if (hedged_connections_factory.hasEventsInProcess())
+    {
+        if (hedged_connections_factory.numberOfProcessingReplicas() > 0)
+            epoll.remove(hedged_connections_factory.getFileDescriptor());
+
+        hedged_connections_factory.stopChoosingReplicas();
+    }
+}
+
+std::string HedgedConnections::dumpAddresses() const
+{
+    std::lock_guard lock(cancel_mutex);
+
+    std::string addresses;
+    bool is_first = true;
+
+    for (const auto & offset_state : offset_states)
+    {
+        for (const auto & replica : offset_state.replicas)
+        {
+            if (replica.connection)
+            {
+                addresses += (is_first ? "" : "; ") + replica.connection->getDescription();
+                is_first = false;
+            }
+        }
+    }
+
+    return addresses;
+}
+
+void HedgedConnections::sendCancel()
+{
+    std::lock_guard lock(cancel_mutex);
+
+    if (!sent_query || cancelled)
+        throw Exception("Cannot cancel. Either no query sent or already cancelled.", ErrorCodes::LOGICAL_ERROR);
+
+    for (auto & offset_status : offset_states)
+        for (auto & replica : offset_status.replicas)
+            if (replica.connection)
+                replica.connection->sendCancel();
+
+    cancelled = true;
+}
+
+Packet HedgedConnections::drain()
+{
+    std::lock_guard lock(cancel_mutex);
+
+    if (!cancelled)
+        throw Exception("Cannot drain connections: cancel first.", ErrorCodes::LOGICAL_ERROR);
+
+    Packet res;
+    res.type = Protocol::Server::EndOfStream;
+
+    while (!epoll.empty())
+    {
+        ReplicaLocation location = getReadyReplicaLocation();
+        Packet packet = receivePacketFromReplica(location);
+        switch (packet.type)
+        {
+            case Protocol::Server::PartUUIDs:
+            case Protocol::Server::Data:
+            case Protocol::Server::Progress:
+            case Protocol::Server::ProfileInfo:
+            case Protocol::Server::Totals:
+            case Protocol::Server::Extremes:
+            case Protocol::Server::EndOfStream:
+                break;
+
+            case Protocol::Server::Exception:
+            default:
+                /// If we receive an exception or an unknown packet, we save it.
+                res = std::move(packet);
+                break;
+        }
+    }
+
+    return res;
+}
+
+Packet HedgedConnections::receivePacket()
+{
+    std::lock_guard lock(cancel_mutex);
+    return receivePacketUnlocked({});
+}
+
+Packet HedgedConnections::receivePacketUnlocked(AsyncCallback async_callback)
+{
+    if (!sent_query)
+        throw Exception("Cannot receive packets: no query sent.", ErrorCodes::LOGICAL_ERROR);
+    if (!hasActiveConnections())
+        throw Exception("No more packets are available.", ErrorCodes::LOGICAL_ERROR);
+
+    if (epoll.empty())
+        throw Exception("No pending events in epoll.", ErrorCodes::LOGICAL_ERROR);
+
+    ReplicaLocation location = getReadyReplicaLocation(std::move(async_callback));
+    return receivePacketFromReplica(location);
+}
+
+HedgedConnections::ReplicaLocation HedgedConnections::getReadyReplicaLocation(AsyncCallback async_callback)
+{
+    /// Firstly, resume replica with the last received packet if it has pending data.
+    if (replica_with_last_received_packet)
+    {
+        ReplicaLocation location = replica_with_last_received_packet.value();
+        replica_with_last_received_packet.reset();
+        if (offset_states[location.offset].replicas[location.index].connection->hasReadPendingData() && resumePacketReceiver(location))
+            return location;
+    }
+
+    int event_fd;
+    while (true)
+    {
+        /// Get ready file descriptor from epoll and process it.
+        event_fd = getReadyFileDescriptor(async_callback);
+
+        if (event_fd == hedged_connections_factory.getFileDescriptor())
+            checkNewReplica();
+        else if (fd_to_replica_location.contains(event_fd))
+        {
+            ReplicaLocation location = fd_to_replica_location[event_fd];
+            if (resumePacketReceiver(location))
+                return location;
+        }
+        else if (timeout_fd_to_replica_location.contains(event_fd))
+        {
+            ReplicaLocation location = timeout_fd_to_replica_location[event_fd];
+            offset_states[location.offset].replicas[location.index].change_replica_timeout.reset();
+            offset_states[location.offset].replicas[location.index].is_change_replica_timeout_expired = true;
+            offset_states[location.offset].next_replica_in_process = true;
+            offsets_queue.push(location.offset);
+            startNewReplica();
+        }
+        else
+            throw Exception("Unknown event from epoll", ErrorCodes::LOGICAL_ERROR);
+    }
+};
+
+bool HedgedConnections::resumePacketReceiver(const HedgedConnections::ReplicaLocation & location)
+{
+    ReplicaState & replica_state = offset_states[location.offset].replicas[location.index];
+    auto res = replica_state.packet_receiver->resume();
+
+    if (std::holds_alternative<Packet>(res))
+    {
+        last_received_packet = std::move(std::get<Packet>(res));
+        return true;
+    }
+    else if (std::holds_alternative<Poco::Timespan>(res))
+    {
+        finishProcessReplica(replica_state, true);
+
+        /// Check if there is no more active connections with the same offset and there is no new replica in process.
+        if (offset_states[location.offset].active_connection_count == 0 && !offset_states[location.offset].next_replica_in_process)
+            throw NetException("Receive timeout expired", ErrorCodes::SOCKET_TIMEOUT);
+    }
+
+    return false;
+}
+
+int HedgedConnections::getReadyFileDescriptor(AsyncCallback async_callback)
+{
+    epoll_event event;
+    event.data.fd = -1;
+    size_t events_count = 0;
+    while (events_count == 0)
+    {
+        events_count = epoll.getManyReady(1, &event, false);
+        if (!events_count && async_callback)
+            async_callback(epoll.getFileDescriptor(), 0, epoll.getDescription());
+    }
+    return event.data.fd;
+}
+
+Packet HedgedConnections::receivePacketFromReplica(const ReplicaLocation & replica_location)
+{
+    ReplicaState & replica = offset_states[replica_location.offset].replicas[replica_location.index];
+    Packet packet = std::move(last_received_packet);
+    switch (packet.type)
+    {
+        case Protocol::Server::Data:
+            /// If we received the first not empty data packet and still can change replica,
+            /// disable changing replica with this offset.
+            if (offset_states[replica_location.offset].can_change_replica && packet.block.rows() > 0)
+                disableChangingReplica(replica_location);
+            replica_with_last_received_packet = replica_location;
+            break;
+        case Protocol::Server::Progress:
+            /// Check if we have made some progress and still can change replica.
+            if (offset_states[replica_location.offset].can_change_replica && packet.progress.read_bytes > 0)
+            {
+                /// If we are allowed to change replica until the first data packet,
+                /// just restart timeout (if it hasn't expired yet). Otherwise disable changing replica with this offset.
+                if (settings.allow_changing_replica_until_first_data_packet && !replica.is_change_replica_timeout_expired)
+                    replica.change_replica_timeout.setRelative(hedged_connections_factory.getConnectionTimeouts().receive_data_timeout);
+                else
+                    disableChangingReplica(replica_location);
+            }
+            replica_with_last_received_packet = replica_location;
+            break;
+        case Protocol::Server::PartUUIDs:
+        case Protocol::Server::ProfileInfo:
+        case Protocol::Server::Totals:
+        case Protocol::Server::Extremes:
+        case Protocol::Server::Log:
+            replica_with_last_received_packet = replica_location;
+            break;
+
+        case Protocol::Server::EndOfStream:
+            finishProcessReplica(replica, false);
+            break;
+
+        case Protocol::Server::Exception:
+        default:
+            finishProcessReplica(replica, true);
+            break;
+    }
+
+    return packet;
+}
+
+void HedgedConnections::disableChangingReplica(const ReplicaLocation & replica_location)
+{
+    /// Stop working with replicas, that are responsible for the same offset.
+    OffsetState & offset_state = offset_states[replica_location.offset];
+    offset_state.replicas[replica_location.index].change_replica_timeout.reset();
+    ++offsets_with_disabled_changing_replica;
+    offset_state.can_change_replica = false;
+
+    for (size_t i = 0; i != offset_state.replicas.size(); ++i)
+    {
+        if (i != replica_location.index && offset_state.replicas[i].connection)
+        {
+            offset_state.replicas[i].connection->sendCancel();
+            finishProcessReplica(offset_state.replicas[i], true);
+        }
+    }
+
+    /// If we disabled changing replica with all offsets, we need to stop choosing new replicas.
+    if (hedged_connections_factory.hasEventsInProcess() && offsets_with_disabled_changing_replica == offset_states.size())
+    {
+        if (hedged_connections_factory.numberOfProcessingReplicas() > 0)
+            epoll.remove(hedged_connections_factory.getFileDescriptor());
+        hedged_connections_factory.stopChoosingReplicas();
+    }
+}
+
+void HedgedConnections::startNewReplica()
+{
+    Connection * connection = nullptr;
+    HedgedConnectionsFactory::State state = hedged_connections_factory.startNewConnection(connection);
+
+    /// Check if we need to add hedged_connections_factory file descriptor to epoll.
+    if (state == HedgedConnectionsFactory::State::NOT_READY && hedged_connections_factory.numberOfProcessingReplicas() == 1)
+        epoll.add(hedged_connections_factory.getFileDescriptor());
+
+    processNewReplicaState(state, connection);
+}
+
+void HedgedConnections::checkNewReplica()
+{
+    Connection * connection = nullptr;
+    HedgedConnectionsFactory::State state = hedged_connections_factory.waitForReadyConnections(connection);
+
+    processNewReplicaState(state, connection);
+
+    /// Check if we don't need to listen hedged_connections_factory file descriptor in epoll anymore.
+    if (hedged_connections_factory.numberOfProcessingReplicas() == 0)
+        epoll.remove(hedged_connections_factory.getFileDescriptor());
+}
+
+void HedgedConnections::processNewReplicaState(HedgedConnectionsFactory::State state, Connection * connection)
+{
+    switch (state)
+    {
+        case HedgedConnectionsFactory::State::READY:
+        {
+            size_t offset = offsets_queue.front();
+            offsets_queue.pop();
+
+            offset_states[offset].replicas.emplace_back(connection);
+            ++offset_states[offset].active_connection_count;
+            offset_states[offset].next_replica_in_process = false;
+            ++active_connection_count;
+
+            ReplicaState & replica = offset_states[offset].replicas.back();
+            epoll.add(replica.packet_receiver->getFileDescriptor());
+            fd_to_replica_location[replica.packet_receiver->getFileDescriptor()] = ReplicaLocation{offset, offset_states[offset].replicas.size() - 1};
+            epoll.add(replica.change_replica_timeout.getDescriptor());
+            timeout_fd_to_replica_location[replica.change_replica_timeout.getDescriptor()] = ReplicaLocation{offset, offset_states[offset].replicas.size() - 1};
+
+            pipeline_for_new_replicas.run(replica);
+            break;
+        }
+        case HedgedConnectionsFactory::State::CANNOT_CHOOSE:
+        {
+            while (!offsets_queue.empty())
+            {
+                /// Check if there is no active replica with needed offsets.
+                if (offset_states[offsets_queue.front()].active_connection_count == 0)
+                    throw Exception("Cannot find enough connections to replicas", ErrorCodes::ALL_CONNECTION_TRIES_FAILED);
+                offset_states[offsets_queue.front()].next_replica_in_process = false;
+                offsets_queue.pop();
+            }
+            break;
+        }
+        case HedgedConnectionsFactory::State::NOT_READY:
+            break;
+    }
+}
+
+void HedgedConnections::finishProcessReplica(ReplicaState & replica, bool disconnect)
+{
+    replica.packet_receiver->cancel();
+    replica.change_replica_timeout.reset();
+
+    epoll.remove(replica.packet_receiver->getFileDescriptor());
+    --offset_states[fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset].active_connection_count;
+    fd_to_replica_location.erase(replica.packet_receiver->getFileDescriptor());
+
+    epoll.remove(replica.change_replica_timeout.getDescriptor());
+    timeout_fd_to_replica_location.erase(replica.change_replica_timeout.getDescriptor());
+
+    --active_connection_count;
+
+    if (disconnect)
+        replica.connection->disconnect();
+    replica.connection = nullptr;
+}
+
+}
+#endif
diff --git a/src/Client/HedgedConnections.h b/src/Client/HedgedConnections.h
new file mode 100644
index 00000000000..f1675108349
--- /dev/null
+++ b/src/Client/HedgedConnections.h
@@ -0,0 +1,189 @@
+#pragma once
+#if defined(OS_LINUX)
+
+#include <functional>
+#include <queue>
+#include <optional>
+
+#include <Client/HedgedConnectionsFactory.h>
+#include <Client/IConnections.h>
+#include <Client/PacketReceiver.h>
+#include <Common/FiberStack.h>
+#include <Common/Fiber.h>
+
+namespace DB
+{
+
+/** To receive data from multiple replicas (connections) from one shard asynchronously.
+  * The principe of Hedged Connections is used to reduce tail latency:
+  * if we don't receive data from replica and there is no progress in query execution
+  * for a long time, we try to get new replica and send query to it,
+  * without cancelling working with previous replica. This class
+  * supports all functionality that MultipleConnections has.
+  */
+class HedgedConnections : public IConnections
+{
+public:
+    using PacketReceiverPtr = std::unique_ptr<PacketReceiver>;
+    struct ReplicaState
+    {
+        explicit ReplicaState(Connection * connection_) : connection(connection_), packet_receiver(std::make_unique<PacketReceiver>(connection_))
+        {
+        }
+
+        Connection * connection = nullptr;
+        PacketReceiverPtr packet_receiver;
+        TimerDescriptor change_replica_timeout;
+        bool is_change_replica_timeout_expired = false;
+    };
+
+    struct OffsetState
+    {
+        /// Replicas with the same offset.
+        std::vector<ReplicaState> replicas;
+        /// An amount of active replicas. When can_change_replica is false,
+        /// active_connection_count is always <= 1 (because we stopped working with
+        /// other replicas with the same offset)
+        size_t active_connection_count = 0;
+        bool can_change_replica = true;
+
+        /// This flag is true when this offset is in queue for
+        /// new replicas. It's needed to process receive timeout
+        /// (throw an exception when receive timeout expired and there is no
+        /// new replica in process)
+        bool next_replica_in_process = false;
+    };
+
+    /// We process events in epoll, so we need to determine replica by it's
+    /// file descriptor. We store map fd -> replica location. To determine
+    /// where replica is, we need a replica offset
+    /// (the same as parallel_replica_offset), and index, which is needed because
+    /// we can have many replicas with same offset (when receive_data_timeout has expired).
+    struct ReplicaLocation
+    {
+        size_t offset;
+        size_t index;
+    };
+
+    HedgedConnections(const ConnectionPoolWithFailoverPtr & pool_,
+                      const Settings & settings_,
+                      const ConnectionTimeouts & timeouts_,
+                      const ThrottlerPtr & throttler,
+                      PoolMode pool_mode,
+                      std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr);
+
+    void sendScalarsData(Scalars & data) override;
+
+    void sendExternalTablesData(std::vector<ExternalTablesData> & data) override;
+
+    void sendQuery(
+        const ConnectionTimeouts & timeouts,
+        const String & query,
+        const String & query_id,
+        UInt64 stage,
+        const ClientInfo & client_info,
+        bool with_pending_data) override;
+
+    Packet receivePacket() override;
+
+    Packet receivePacketUnlocked(AsyncCallback async_callback) override;
+
+    void disconnect() override;
+
+    void sendCancel() override;
+
+    void sendIgnoredPartUUIDs(const std::vector<UUID> & uuids) override;
+
+    Packet drain() override;
+
+    std::string dumpAddresses() const override;
+
+    size_t size() const override { return offset_states.size(); }
+
+    bool hasActiveConnections() const override { return active_connection_count > 0; }
+
+private:
+    /// If we don't receive data from replica and there is no progress in query
+    /// execution for receive_data_timeout, we are trying to get new
+    /// replica and send query to it. Beside sending query, there are some
+    /// additional actions like sendScalarsData or sendExternalTablesData and we need
+    /// to perform these actions in the same order on the new replica. So, we will
+    /// save actions with replicas in pipeline to perform them on the new replicas.
+    class Pipeline
+    {
+    public:
+        void add(std::function<void(ReplicaState &)> send_function);
+
+        void run(ReplicaState & replica);
+    private:
+        std::vector<std::function<void(ReplicaState &)>> pipeline;
+    };
+
+    Packet receivePacketFromReplica(const ReplicaLocation & replica_location);
+
+    ReplicaLocation getReadyReplicaLocation(AsyncCallback async_callback = {});
+
+    bool resumePacketReceiver(const ReplicaLocation & replica_location);
+
+    void disableChangingReplica(const ReplicaLocation & replica_location);
+
+    void startNewReplica();
+
+    void checkNewReplica();
+
+    void processNewReplicaState(HedgedConnectionsFactory::State state, Connection * connection);
+
+    void finishProcessReplica(ReplicaState & replica, bool disconnect);
+
+    int getReadyFileDescriptor(AsyncCallback async_callback = {});
+
+    HedgedConnectionsFactory hedged_connections_factory;
+
+    /// All replicas in offset_states[offset] is responsible for process query
+    /// with setting parallel_replica_offset = offset. In common situations
+    /// replica_states[offset].replicas.size() = 1 (like in MultiplexedConnections).
+    std::vector<OffsetState> offset_states;
+
+    /// Map socket file descriptor to replica location (it's offset and index in OffsetState.replicas).
+    std::unordered_map<int, ReplicaLocation> fd_to_replica_location;
+
+    /// Map receive data timeout file descriptor to replica location.
+    std::unordered_map<int, ReplicaLocation> timeout_fd_to_replica_location;
+
+    /// A queue of offsets for new replicas. When we get RECEIVE_DATA_TIMEOUT from
+    /// the replica, we push it's offset to this queue and start trying to get
+    /// new replica.
+    std::queue<int> offsets_queue;
+
+    /// The current number of valid connections to the replicas of this shard.
+    size_t active_connection_count;
+
+    /// We count offsets in which we can't change replica anymore,
+    /// it's needed to cancel choosing new replicas when we
+    /// disabled replica changing in all offsets.
+    size_t offsets_with_disabled_changing_replica;
+
+    Pipeline pipeline_for_new_replicas;
+
+    /// New replica may not support two-level aggregation due to version incompatibility.
+    /// If we didn't disabled it, we need to skip this replica.
+    bool disable_two_level_aggregation = false;
+
+    /// We will save replica with last received packet
+    /// (except cases when packet type is EndOfStream or Exception)
+    /// to resume it's packet receiver when new packet is needed.
+    std::optional<ReplicaLocation> replica_with_last_received_packet;
+
+    Packet last_received_packet;
+
+    Epoll epoll;
+    const Settings & settings;
+    ThrottlerPtr throttler;
+    bool sent_query = false;
+    bool cancelled = false;
+
+    mutable std::mutex cancel_mutex;
+};
+
+}
+#endif
diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp
new file mode 100644
index 00000000000..a7f3bfb04d7
--- /dev/null
+++ b/src/Client/HedgedConnectionsFactory.cpp
@@ -0,0 +1,387 @@
+#if defined(OS_LINUX)
+
+#include <Client/HedgedConnectionsFactory.h>
+#include <Common/typeid_cast.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ALL_CONNECTION_TRIES_FAILED;
+    extern const int ALL_REPLICAS_ARE_STALE;
+    extern const int LOGICAL_ERROR;
+}
+
+HedgedConnectionsFactory::HedgedConnectionsFactory(
+    const ConnectionPoolWithFailoverPtr & pool_,
+    const Settings * settings_,
+    const ConnectionTimeouts & timeouts_,
+    std::shared_ptr<QualifiedTableName> table_to_check_)
+    : pool(pool_), settings(settings_), timeouts(timeouts_), table_to_check(table_to_check_), log(&Poco::Logger::get("HedgedConnectionsFactory"))
+{
+    shuffled_pools = pool->getShuffledPools(settings);
+    for (size_t i = 0; i != shuffled_pools.size(); ++i)
+        replicas.emplace_back(ConnectionEstablisherAsync(shuffled_pools[i].pool, &timeouts, settings, log, table_to_check.get()));
+
+    max_tries
+        = (settings ? size_t{settings->connections_with_failover_max_tries} : size_t{DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES});
+
+    fallback_to_stale_replicas = settings && settings->fallback_to_stale_replicas_for_distributed_queries;
+}
+
+HedgedConnectionsFactory::~HedgedConnectionsFactory()
+{
+    pool->updateSharedError(shuffled_pools);
+}
+
+std::vector<Connection *> HedgedConnectionsFactory::getManyConnections(PoolMode pool_mode)
+{
+    size_t min_entries = (settings && settings->skip_unavailable_shards) ? 0 : 1;
+
+    size_t max_entries;
+    switch (pool_mode)
+    {
+        case PoolMode::GET_ALL:
+        {
+            min_entries = shuffled_pools.size();
+            max_entries = shuffled_pools.size();
+            break;
+        }
+        case PoolMode::GET_ONE:
+        {
+            max_entries = 1;
+            break;
+        }
+        case PoolMode::GET_MANY:
+        {
+            max_entries = settings ? size_t(settings->max_parallel_replicas) : 1;
+            break;
+        }
+    }
+
+    std::vector<Connection *> connections;
+    connections.reserve(max_entries);
+    Connection * connection = nullptr;
+
+    /// Try to start establishing connections with max_entries replicas.
+    for (size_t i = 0; i != max_entries; ++i)
+    {
+        ++requested_connections_count;
+        State state = startNewConnectionImpl(connection);
+        if (state == State::READY)
+            connections.push_back(connection);
+        if (state == State::CANNOT_CHOOSE)
+            break;
+    }
+
+    /// Process connections until we get enough READY connections
+    /// (work asynchronously with all connections we started).
+    /// TODO: when we get GET_ALL mode we can start reading packets from ready
+    /// TODO: connection as soon as we got it, not even waiting for the others.
+    while (connections.size() < max_entries)
+    {
+        /// Set blocking = true to avoid busy-waiting here.
+        auto state = waitForReadyConnectionsImpl(/*blocking = */true, connection);
+        if (state == State::READY)
+            connections.push_back(connection);
+        else if (state == State::CANNOT_CHOOSE)
+        {
+            if (connections.size() >= min_entries)
+                break;
+
+            /// Determine the reason of not enough replicas.
+            if (!fallback_to_stale_replicas && up_to_date_count < min_entries)
+                throw Exception(
+                    "Could not find enough connections to up-to-date replicas. Got: " + std::to_string(connections.size())
+                    + ", needed: " + std::to_string(min_entries),
+                    DB::ErrorCodes::ALL_REPLICAS_ARE_STALE);
+            if (usable_count < min_entries)
+                throw NetException(
+                    "All connection tries failed. Log: \n\n" + fail_messages + "\n",
+                    DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED);
+
+            throw Exception("Unknown reason of not enough replicas.", ErrorCodes::LOGICAL_ERROR);
+        }
+    }
+
+    return connections;
+}
+
+HedgedConnectionsFactory::State HedgedConnectionsFactory::startNewConnection(Connection *& connection_out)
+{
+    ++requested_connections_count;
+    State state = startNewConnectionImpl(connection_out);
+    /// If we cannot start new connection but there are connections in epoll, return NOT_READY.
+    if (state == State::CANNOT_CHOOSE && !epoll.empty())
+        state = State::NOT_READY;
+
+    return state;
+}
+
+HedgedConnectionsFactory::State HedgedConnectionsFactory::waitForReadyConnections(Connection *& connection_out)
+{
+    return waitForReadyConnectionsImpl(false, connection_out);
+}
+
+HedgedConnectionsFactory::State HedgedConnectionsFactory::waitForReadyConnectionsImpl(bool blocking, Connection *& connection_out)
+{
+    State state = processEpollEvents(blocking, connection_out);
+    if (state != State::CANNOT_CHOOSE)
+        return state;
+
+    /// We reach this point only if there was no free up to date replica.
+    /// We will try to use usable replica.
+
+    /// Check if we are not allowed to use usable replicas or there is no even a free usable replica.
+    if (!fallback_to_stale_replicas)
+        return State::CANNOT_CHOOSE;
+
+    return setBestUsableReplica(connection_out);
+}
+
+int HedgedConnectionsFactory::getNextIndex()
+{
+    /// Check if there is no free replica.
+    if (entries_count + replicas_in_process_count + failed_pools_count >= shuffled_pools.size())
+        return -1;
+
+    /// Check if it's the first time.
+    if (last_used_index == -1)
+    {
+        last_used_index = 0;
+        return 0;
+    }
+
+    bool finish = false;
+    int next_index = last_used_index;
+    while (!finish)
+    {
+        next_index = (next_index + 1) % shuffled_pools.size();
+
+        /// Check if we can try this replica.
+        if (replicas[next_index].connection_establisher.getResult().entry.isNull()
+            && (max_tries == 0 || shuffled_pools[next_index].error_count < max_tries))
+            finish = true;
+
+        /// If we made a complete round, there is no replica to connect.
+        else if (next_index == last_used_index)
+            return -1;
+    }
+
+    last_used_index = next_index;
+    return next_index;
+}
+
+HedgedConnectionsFactory::State HedgedConnectionsFactory::startNewConnectionImpl(Connection *& connection_out)
+{
+    int index;
+    State state;
+    do
+    {
+        index = getNextIndex();
+        if (index == -1)
+            return State::CANNOT_CHOOSE;
+
+        state = resumeConnectionEstablisher(index, connection_out);
+    }
+    while (state == State::CANNOT_CHOOSE);
+
+    return state;
+}
+
+HedgedConnectionsFactory::State HedgedConnectionsFactory::processEpollEvents(bool blocking, Connection *& connection_out)
+{
+    int event_fd;
+    while (!epoll.empty())
+    {
+        event_fd = getReadyFileDescriptor(blocking);
+
+        if (event_fd == -1)
+            return State::NOT_READY;
+
+        if (fd_to_replica_index.contains(event_fd))
+        {
+            int index = fd_to_replica_index[event_fd];
+            State state = resumeConnectionEstablisher(index, connection_out);
+            if (state == State::NOT_READY)
+                continue;
+
+            /// Connection establishing not in process now, remove all
+            /// information about it from epoll.
+            removeReplicaFromEpoll(index, event_fd);
+
+            if (state == State::READY)
+                return state;
+        }
+        else if (timeout_fd_to_replica_index.contains(event_fd))
+            replicas[timeout_fd_to_replica_index[event_fd]].change_replica_timeout.reset();
+        else
+            throw Exception("Unknown event from epoll", ErrorCodes::LOGICAL_ERROR);
+
+        /// We reach this point only if we need to start new connection
+        /// (Special timeout expired or one of the previous connections failed).
+        /// Return only if replica is ready.
+        if (startNewConnectionImpl(connection_out) == State::READY)
+            return State::READY;
+    }
+
+    return State::CANNOT_CHOOSE;
+}
+
+int HedgedConnectionsFactory::getReadyFileDescriptor(bool blocking)
+{
+    epoll_event event;
+    event.data.fd = -1;
+    epoll.getManyReady(1, &event, blocking);
+    return event.data.fd;
+}
+
+HedgedConnectionsFactory::State HedgedConnectionsFactory::resumeConnectionEstablisher(int index, Connection *& connection_out)
+{
+    auto res = replicas[index].connection_establisher.resume();
+
+    if (std::holds_alternative<TryResult>(res))
+        return processFinishedConnection(index, std::get<TryResult>(res), connection_out);
+
+    int fd = std::get<int>(res);
+    if (!fd_to_replica_index.contains(fd))
+        addNewReplicaToEpoll(index, fd);
+
+    return State::NOT_READY;
+}
+
+HedgedConnectionsFactory::State HedgedConnectionsFactory::processFinishedConnection(int index, TryResult result, Connection *& connection_out)
+{
+    const std::string & fail_message = replicas[index].connection_establisher.getFailMessage();
+    if (!fail_message.empty())
+        fail_messages += fail_message + "\n";
+
+    if (!result.entry.isNull())
+    {
+        ++entries_count;
+
+        if (result.is_usable)
+        {
+            ++usable_count;
+            if (result.is_up_to_date)
+            {
+                ++up_to_date_count;
+                if (!skip_replicas_with_two_level_aggregation_incompatibility || !isTwoLevelAggregationIncompatible(&*result.entry))
+                {
+                    replicas[index].is_ready = true;
+                    ++ready_replicas_count;
+                    connection_out = &*result.entry;
+                    return State::READY;
+                }
+            }
+        }
+    }
+    else
+    {
+        ShuffledPool & shuffled_pool = shuffled_pools[index];
+        LOG_WARNING(
+            log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
+        ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
+
+        shuffled_pool.error_count = std::min(pool->getMaxErrorCup(), shuffled_pool.error_count + 1);
+
+        if (shuffled_pool.error_count >= max_tries)
+        {
+            ++failed_pools_count;
+            ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll);
+        }
+    }
+
+    return State::CANNOT_CHOOSE;
+}
+
+void HedgedConnectionsFactory::stopChoosingReplicas()
+{
+    for (auto & [fd, index] : fd_to_replica_index)
+    {
+        --replicas_in_process_count;
+        epoll.remove(fd);
+        replicas[index].connection_establisher.cancel();
+    }
+
+    for (auto & [timeout_fd, index] : timeout_fd_to_replica_index)
+    {
+        replicas[index].change_replica_timeout.reset();
+        epoll.remove(timeout_fd);
+    }
+
+    fd_to_replica_index.clear();
+    timeout_fd_to_replica_index.clear();
+}
+
+void HedgedConnectionsFactory::addNewReplicaToEpoll(int index, int fd)
+{
+    ++replicas_in_process_count;
+    epoll.add(fd);
+    fd_to_replica_index[fd] = index;
+
+    /// Add timeout for changing replica.
+    replicas[index].change_replica_timeout.setRelative(timeouts.hedged_connection_timeout);
+    epoll.add(replicas[index].change_replica_timeout.getDescriptor());
+    timeout_fd_to_replica_index[replicas[index].change_replica_timeout.getDescriptor()] = index;
+}
+
+void HedgedConnectionsFactory::removeReplicaFromEpoll(int index, int fd)
+{
+    --replicas_in_process_count;
+    epoll.remove(fd);
+    fd_to_replica_index.erase(fd);
+
+    replicas[index].change_replica_timeout.reset();
+    epoll.remove(replicas[index].change_replica_timeout.getDescriptor());
+    timeout_fd_to_replica_index.erase(replicas[index].change_replica_timeout.getDescriptor());
+}
+
+int HedgedConnectionsFactory::numberOfProcessingReplicas() const
+{
+    if (epoll.empty())
+        return 0;
+
+    return requested_connections_count - ready_replicas_count;
+}
+
+HedgedConnectionsFactory::State HedgedConnectionsFactory::setBestUsableReplica(Connection *& connection_out)
+{
+    std::vector<int> indexes;
+    for (size_t i = 0; i != replicas.size(); ++i)
+    {
+        /// Don't add unusable, failed replicas and replicas that are ready or in process.
+        TryResult result = replicas[i].connection_establisher.getResult();
+        if (!result.entry.isNull()
+            && result.is_usable
+            && !replicas[i].is_ready
+            && (!skip_replicas_with_two_level_aggregation_incompatibility || !isTwoLevelAggregationIncompatible(&*result.entry)))
+            indexes.push_back(i);
+    }
+
+    if (indexes.empty())
+        return State::CANNOT_CHOOSE;
+
+    /// Sort replicas by staleness.
+    std::stable_sort(
+        indexes.begin(),
+        indexes.end(),
+        [&](size_t lhs, size_t rhs)
+        {
+            return replicas[lhs].connection_establisher.getResult().staleness < replicas[rhs].connection_establisher.getResult().staleness;
+        });
+
+    replicas[indexes[0]].is_ready = true;
+    TryResult result = replicas[indexes[0]].connection_establisher.getResult();
+    connection_out = &*result.entry;
+    return State::READY;
+}
+
+bool HedgedConnectionsFactory::isTwoLevelAggregationIncompatible(Connection * connection)
+{
+    return connection->getServerRevision(timeouts) < DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD;
+}
+
+}
+#endif
diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h
new file mode 100644
index 00000000000..c5e8d493efa
--- /dev/null
+++ b/src/Client/HedgedConnectionsFactory.h
@@ -0,0 +1,158 @@
+#pragma once
+
+#if defined(OS_LINUX)
+
+#include <Common/TimerDescriptor.h>
+#include <Common/Epoll.h>
+#include <Common/FiberStack.h>
+#include <Common/Fiber.h>
+#include <Client/ConnectionEstablisher.h>
+#include <Client/ConnectionPoolWithFailover.h>
+#include <Core/Settings.h>
+#include <unordered_map>
+#include <memory>
+
+namespace DB
+{
+
+/** Class for establishing hedged connections with replicas.
+  * The process of establishing connection is divided on stages, on each stage if
+  * replica doesn't respond for a long time, we start establishing connection with
+  * the next replica, without cancelling working with previous one.
+  * It works with multiple replicas simultaneously without blocking by using epoll.
+  */
+class HedgedConnectionsFactory
+{
+public:
+    using ShuffledPool = ConnectionPoolWithFailover::Base::ShuffledPool;
+    using TryResult = PoolWithFailoverBase<IConnectionPool>::TryResult;
+
+    enum class State
+    {
+        READY,
+        NOT_READY,
+        CANNOT_CHOOSE,
+    };
+
+    struct ReplicaStatus
+    {
+        explicit ReplicaStatus(ConnectionEstablisherAsync connection_stablisher_) : connection_establisher(std::move(connection_stablisher_))
+        {
+        }
+
+        ConnectionEstablisherAsync connection_establisher;
+        TimerDescriptor change_replica_timeout;
+        bool is_ready = false;
+    };
+
+    HedgedConnectionsFactory(const ConnectionPoolWithFailoverPtr & pool_,
+                        const Settings * settings_,
+                        const ConnectionTimeouts & timeouts_,
+                        std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr);
+
+    /// Create and return active connections according to pool_mode.
+    std::vector<Connection *> getManyConnections(PoolMode pool_mode);
+
+    /// Try to get connection to the new replica without blocking. Process all current events in epoll (connections, timeouts),
+    /// Returned state might be READY (connection established successfully),
+    /// NOT_READY (there are no ready events now) and CANNOT_CHOOSE (cannot produce new connection anymore).
+    /// If state is READY, replica connection will be written in connection_out.
+    State waitForReadyConnections(Connection *& connection_out);
+
+    State startNewConnection(Connection *& connection_out);
+
+    /// Stop working with all replicas that are not READY.
+    void stopChoosingReplicas();
+
+    bool hasEventsInProcess() const { return !epoll.empty(); }
+
+    int getFileDescriptor() const { return epoll.getFileDescriptor(); }
+
+    const ConnectionTimeouts & getConnectionTimeouts() const { return timeouts; }
+
+    int numberOfProcessingReplicas() const;
+
+    /// Tell Factory to not return connections with two level aggregation incompatibility.
+    void skipReplicasWithTwoLevelAggregationIncompatibility() { skip_replicas_with_two_level_aggregation_incompatibility = true; }
+
+    ~HedgedConnectionsFactory();
+
+private:
+    State waitForReadyConnectionsImpl(bool blocking, Connection *& connection_out);
+
+    /// Try to start establishing connection to the new replica. Return
+    /// the index of the new replica or -1 if cannot start new connection.
+    State startNewConnectionImpl(Connection *& connection_out);
+
+    /// Find an index of the next free replica to start connection.
+    /// Return -1 if there is no free replica.
+    int getNextIndex();
+
+    int getReadyFileDescriptor(bool blocking);
+
+    void processFailedConnection(int index, const std::string & fail_message);
+
+    State resumeConnectionEstablisher(int index, Connection *& connection_out);
+
+    State processFinishedConnection(int index, TryResult result, Connection *& connection_out);
+
+    void removeReplicaFromEpoll(int index, int fd);
+
+    void addNewReplicaToEpoll(int index, int fd);
+
+    /// Return NOT_READY state if there is no ready events, READY if replica is ready
+    /// and CANNOT_CHOOSE if there is no more events in epoll.
+    State processEpollEvents(bool blocking, Connection *& connection_out);
+
+    State setBestUsableReplica(Connection *& connection_out);
+
+    bool isTwoLevelAggregationIncompatible(Connection * connection);
+
+    const ConnectionPoolWithFailoverPtr pool;
+    const Settings * settings;
+    const ConnectionTimeouts timeouts;
+
+    std::vector<ShuffledPool> shuffled_pools;
+    std::vector<ReplicaStatus> replicas;
+
+    /// Map socket file descriptor to replica index.
+    std::unordered_map<int, int> fd_to_replica_index;
+
+    /// Map timeout for changing replica to replica index.
+    std::unordered_map<int, int> timeout_fd_to_replica_index;
+
+    /// If this flag is true, don't return connections with
+    /// two level aggregation incompatibility
+    bool skip_replicas_with_two_level_aggregation_incompatibility = false;
+
+    std::shared_ptr<QualifiedTableName> table_to_check;
+    int last_used_index = -1;
+    bool fallback_to_stale_replicas;
+    Epoll epoll;
+    Poco::Logger * log;
+    std::string fail_messages;
+
+    /// The maximum number of attempts to connect to replicas.
+    size_t max_tries;
+    /// Total number of established connections.
+    size_t entries_count = 0;
+    /// The number of established connections that are usable.
+    size_t usable_count = 0;
+    /// The number of established connections that are up to date.
+    size_t up_to_date_count = 0;
+    /// The number of failed connections (replica is considered failed after max_tries attempts to connect).
+    size_t failed_pools_count= 0;
+
+    /// The number of replicas that are in process of connection.
+    size_t replicas_in_process_count = 0;
+    /// The number of ready replicas (replica is considered ready when it's
+    /// connection returns outside).
+    size_t ready_replicas_count = 0;
+
+    /// The number of requested in startNewConnection replicas (it's needed for
+    /// checking the number of requested replicas that are still in process).
+    size_t requested_connections_count = 0;
+};
+
+}
+#endif
diff --git a/src/Client/IConnections.h b/src/Client/IConnections.h
new file mode 100644
index 00000000000..38730922456
--- /dev/null
+++ b/src/Client/IConnections.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <Client/Connection.h>
+
+namespace DB
+{
+
+/// Base class for working with multiple replicas (connections)
+/// from one shard within a single thread
+class IConnections : boost::noncopyable
+{
+public:
+    /// Send all scalars to replicas.
+    virtual void sendScalarsData(Scalars & data) = 0;
+    /// Send all content of external tables to replicas.
+    virtual void sendExternalTablesData(std::vector<ExternalTablesData> & data) = 0;
+
+    /// Send request to replicas.
+    virtual void sendQuery(
+        const ConnectionTimeouts & timeouts,
+        const String & query,
+        const String & query_id,
+        UInt64 stage,
+        const ClientInfo & client_info,
+        bool with_pending_data) = 0;
+
+    /// Get packet from any replica.
+    virtual Packet receivePacket() = 0;
+
+    /// Version of `receivePacket` function without locking.
+    virtual Packet receivePacketUnlocked(AsyncCallback async_callback) = 0;
+
+    /// Break all active connections.
+    virtual void disconnect() = 0;
+
+    /// Send a request to replicas to cancel the request
+    virtual void sendCancel() = 0;
+
+    /// Send parts' uuids to replicas to exclude them from query processing
+    virtual void sendIgnoredPartUUIDs(const std::vector<UUID> & uuids) = 0;
+
+    /** On each replica, read and skip all packets to EndOfStream or Exception.
+      * Returns EndOfStream if no exception has been received. Otherwise
+      * returns the last received packet of type Exception.
+      */
+    virtual Packet drain() = 0;
+
+    /// Get the replica addresses as a string.
+    virtual std::string dumpAddresses() const = 0;
+
+    /// Returns the number of replicas.
+    virtual size_t size() const = 0;
+
+    /// Check if there are any valid replicas.
+    virtual bool hasActiveConnections() const = 0;
+
+    virtual ~IConnections() = default;
+};
+
+}
diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp
index c50dd7b6454..8b2b7c49f26 100644
--- a/src/Client/MultiplexedConnections.cpp
+++ b/src/Client/MultiplexedConnections.cpp
@@ -158,7 +158,7 @@ void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuid
 Packet MultiplexedConnections::receivePacket()
 {
     std::lock_guard lock(cancel_mutex);
-    Packet packet = receivePacketUnlocked();
+    Packet packet = receivePacketUnlocked({});
     return packet;
 }
 
@@ -206,7 +206,7 @@ Packet MultiplexedConnections::drain()
 
     while (hasActiveConnections())
     {
-        Packet packet = receivePacketUnlocked();
+        Packet packet = receivePacketUnlocked({});
 
         switch (packet.type)
         {
@@ -253,7 +253,7 @@ std::string MultiplexedConnections::dumpAddressesUnlocked() const
     return buf.str();
 }
 
-Packet MultiplexedConnections::receivePacketUnlocked(std::function<void(Poco::Net::Socket &)> async_callback)
+Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callback)
 {
     if (!sent_query)
         throw Exception("Cannot receive packets: no query sent.", ErrorCodes::LOGICAL_ERROR);
@@ -265,7 +265,11 @@ Packet MultiplexedConnections::receivePacketUnlocked(std::function<void(Poco::Ne
     if (current_connection == nullptr)
         throw Exception("Logical error: no available replica", ErrorCodes::NO_AVAILABLE_REPLICA);
 
-    Packet packet = current_connection->receivePacket(std::move(async_callback));
+    Packet packet;
+    {
+        AsyncCallbackSetter async_setter(current_connection, std::move(async_callback));
+        packet = current_connection->receivePacket();
+    }
 
     switch (packet.type)
     {
diff --git a/src/Client/MultiplexedConnections.h b/src/Client/MultiplexedConnections.h
index da0326fa6c0..c04b06e525e 100644
--- a/src/Client/MultiplexedConnections.h
+++ b/src/Client/MultiplexedConnections.h
@@ -5,6 +5,7 @@
 #include <Client/Connection.h>
 #include <Client/ConnectionPoolWithFailover.h>
 #include <IO/ConnectionTimeouts.h>
+#include <Client/IConnections.h>
 
 namespace DB
 {
@@ -16,7 +17,7 @@ namespace DB
   *
   * The interface is almost the same as Connection.
   */
-class MultiplexedConnections final : private boost::noncopyable
+class MultiplexedConnections final : public IConnections
 {
 public:
     /// Accepts ready connection.
@@ -27,52 +28,38 @@ public:
         std::vector<IConnectionPool::Entry> && connections,
         const Settings & settings_, const ThrottlerPtr & throttler_);
 
-    /// Send all scalars to replicas.
-    void sendScalarsData(Scalars & data);
-    /// Send all content of external tables to replicas.
-    void sendExternalTablesData(std::vector<ExternalTablesData> & data);
+    void sendScalarsData(Scalars & data) override;
+    void sendExternalTablesData(std::vector<ExternalTablesData> & data) override;
 
-    /// Send request to replicas.
     void sendQuery(
         const ConnectionTimeouts & timeouts,
         const String & query,
         const String & query_id,
         UInt64 stage,
         const ClientInfo & client_info,
-        bool with_pending_data);
+        bool with_pending_data) override;
 
-    /// Get packet from any replica.
-    Packet receivePacket();
+    Packet receivePacket() override;
 
-    /// Break all active connections.
-    void disconnect();
+    void disconnect() override;
 
-    /// Send a request to the replica to cancel the request
-    void sendCancel();
+    void sendCancel() override;
 
     /// Send parts' uuids to replicas to exclude them from query processing
-    void sendIgnoredPartUUIDs(const std::vector<UUID> & uuids);
+    void sendIgnoredPartUUIDs(const std::vector<UUID> & uuids) override;
 
-    /** On each replica, read and skip all packets to EndOfStream or Exception.
-      * Returns EndOfStream if no exception has been received. Otherwise
-      * returns the last received packet of type Exception.
-      */
-    Packet drain();
+    Packet drain() override;
 
-    /// Get the replica addresses as a string.
-    std::string dumpAddresses() const;
+    std::string dumpAddresses() const override;
 
-    /// Returns the number of replicas.
     /// Without locking, because sendCancel() does not change this number.
-    size_t size() const { return replica_states.size(); }
+    size_t size() const override { return replica_states.size(); }
 
-    /// Check if there are any valid replicas.
     /// Without locking, because sendCancel() does not change the state of the replicas.
-    bool hasActiveConnections() const { return active_connection_count > 0; }
+    bool hasActiveConnections() const override { return active_connection_count > 0; }
 
 private:
-    /// Internal version of `receivePacket` function without locking.
-    Packet receivePacketUnlocked(std::function<void(Poco::Net::Socket &)> async_callback = {});
+    Packet receivePacketUnlocked(AsyncCallback async_callback) override;
 
     /// Internal version of `dumpAddresses` function without locking.
     std::string dumpAddressesUnlocked() const;
diff --git a/src/Client/PacketReceiver.h b/src/Client/PacketReceiver.h
new file mode 100644
index 00000000000..2252e63a2f6
--- /dev/null
+++ b/src/Client/PacketReceiver.h
@@ -0,0 +1,161 @@
+#pragma once
+
+#if defined(OS_LINUX)
+
+#include <variant>
+
+#include <Client/IConnections.h>
+#include <Common/FiberStack.h>
+#include <Common/Fiber.h>
+#include <Common/Epoll.h>
+#include <Common/TimerDescriptor.h>
+
+namespace DB
+{
+
+/// Class for nonblocking packet receiving. It runs connection->receivePacket
+/// in fiber and sets special read callback which is called when
+/// reading from socket blocks. When read callback is called,
+/// socket and receive timeout are added in epoll and execution returns to the main program.
+/// So, you can poll this epoll file descriptor to determine when to resume
+/// packet receiving.
+class PacketReceiver
+{
+public:
+    explicit PacketReceiver(Connection * connection_) : connection(connection_)
+    {
+        epoll.add(receive_timeout.getDescriptor());
+        epoll.add(connection->getSocket()->impl()->sockfd());
+
+        fiber = boost::context::fiber(std::allocator_arg_t(), fiber_stack, Routine{*this});
+    }
+
+    /// Resume packet receiving.
+    std::variant<int, Packet, Poco::Timespan> resume()
+    {
+        /// If there is no pending data, check receive timeout.
+        if (!connection->hasReadPendingData() && !checkReceiveTimeout())
+        {
+            /// Receive timeout expired.
+            return Poco::Timespan();
+        }
+
+        /// Resume fiber.
+        fiber = std::move(fiber).resume();
+        if (exception)
+            std::rethrow_exception(std::move(exception));
+
+        if (is_read_in_process)
+            return epoll.getFileDescriptor();
+
+        /// Receiving packet was finished.
+        return std::move(packet);
+    }
+
+    void cancel()
+    {
+        Fiber to_destroy = std::move(fiber);
+        connection = nullptr;
+    }
+
+    int getFileDescriptor() const { return epoll.getFileDescriptor(); }
+
+private:
+    /// When epoll file descriptor is ready, check if it's an expired timeout.
+    /// Return false if receive timeout expired and socket is not ready, return true otherwise.
+    bool checkReceiveTimeout()
+    {
+        bool is_socket_ready = false;
+        bool is_receive_timeout_expired = false;
+
+        epoll_event events[2];
+        events[0].data.fd = events[1].data.fd = -1;
+        size_t ready_count = epoll.getManyReady(2, events, true);
+
+        for (size_t i = 0; i != ready_count; ++i)
+        {
+            if (events[i].data.fd == connection->getSocket()->impl()->sockfd())
+                is_socket_ready = true;
+            if (events[i].data.fd == receive_timeout.getDescriptor())
+                is_receive_timeout_expired = true;
+        }
+
+        if (is_receive_timeout_expired && !is_socket_ready)
+        {
+            receive_timeout.reset();
+            return false;
+        }
+
+        return true;
+    }
+
+    struct Routine
+    {
+        PacketReceiver & receiver;
+
+        struct ReadCallback
+        {
+            PacketReceiver & receiver;
+            Fiber & sink;
+
+            void operator()(int, const Poco::Timespan & timeout, const std::string &)
+            {
+                receiver.receive_timeout.setRelative(timeout);
+                receiver.is_read_in_process = true;
+                sink = std::move(sink).resume();
+                receiver.is_read_in_process = false;
+                receiver.receive_timeout.reset();
+            }
+        };
+
+        Fiber operator()(Fiber && sink)
+        {
+            try
+            {
+                while (true)
+                {
+                    {
+                        AsyncCallbackSetter async_setter(receiver.connection, ReadCallback{receiver, sink});
+                        receiver.packet = receiver.connection->receivePacket();
+                    }
+                    sink = std::move(sink).resume();
+                }
+
+            }
+            catch (const boost::context::detail::forced_unwind &)
+            {
+                /// This exception is thrown by fiber implementation in case if fiber is being deleted but hasn't exited
+                /// It should not be caught or it will segfault.
+                /// Other exceptions must be caught
+                throw;
+            }
+            catch (...)
+            {
+                receiver.exception = std::current_exception();
+            }
+
+            return std::move(sink);
+        }
+    };
+
+    Connection * connection;
+    Packet packet;
+
+    Fiber fiber;
+    FiberStack fiber_stack;
+
+    /// We use timer descriptor for checking socket receive timeout.
+    TimerDescriptor receive_timeout;
+
+    /// In read callback we add socket file descriptor and timer descriptor with receive timeout
+    /// in epoll, so we can return epoll file descriptor outside for polling.
+    Epoll epoll;
+
+    /// If and exception occurred in fiber resume, we save it and rethrow.
+    std::exception_ptr exception;
+
+    bool is_read_in_process = false;
+};
+
+}
+#endif
diff --git a/src/Client/ya.make b/src/Client/ya.make
index 87a0cea102a..af1dd05f1d4 100644
--- a/src/Client/ya.make
+++ b/src/Client/ya.make
@@ -11,7 +11,10 @@ PEERDIR(
 
 SRCS(
     Connection.cpp
+    ConnectionEstablisher.cpp
     ConnectionPoolWithFailover.cpp
+    HedgedConnections.cpp
+    HedgedConnectionsFactory.cpp
     MultiplexedConnections.cpp
     TimeoutSetter.cpp
 
diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h
index cd45cf583a0..f023177d7f2 100644
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@@ -198,6 +198,11 @@ public:
         throw Exception("Method compareColumn is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED);
     }
 
+    bool hasEqualValues() const override
+    {
+        throw Exception("Method hasEqualValues is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED);
+    }
+
     void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
     void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override;
 
diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index e8a48672435..d8821a646ae 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -370,6 +370,10 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
                                         compare_results, direction, nan_direction_hint);
 }
 
+bool ColumnArray::hasEqualValues() const
+{
+    return hasEqualValuesImpl<ColumnArray>();
+}
 
 namespace
 {
diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h
index 1caaf672d49..7d01d04735b 100644
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@@ -78,6 +78,7 @@ public:
                        PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
                        int direction, int nan_direction_hint) const override;
     int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override;
+    bool hasEqualValues() const override;
     void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
     void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
     void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h
index f6b6bf22177..a31147b0702 100644
--- a/src/Columns/ColumnCompressed.h
+++ b/src/Columns/ColumnCompressed.h
@@ -96,6 +96,10 @@ public:
     {
         throwMustBeDecompressed();
     }
+    bool hasEqualValues() const override
+    {
+        throwMustBeDecompressed();
+    }
     void getPermutation(bool, size_t, int, Permutation &) const override { throwMustBeDecompressed(); }
     void updatePermutation(bool, size_t, int, Permutation &, EqualRanges &) const override { throwMustBeDecompressed(); }
     ColumnPtr replicate(const Offsets &) const override { throwMustBeDecompressed(); }
diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h
index 99c997ab269..9441f339085 100644
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@@ -206,6 +206,8 @@ public:
                        PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
                        int direction, int nan_direction_hint) const override;
 
+    bool hasEqualValues() const override { return true; }
+
     MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
 
     void gather(ColumnGathererStream &) override
diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index bad3a4c3402..4a47919adf1 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -58,6 +58,12 @@ void ColumnDecimal<T>::compareColumn(const IColumn & rhs, size_t rhs_row_num,
                                                          compare_results, direction, nan_direction_hint);
 }
 
+template <typename T>
+bool ColumnDecimal<T>::hasEqualValues() const
+{
+    return this->template hasEqualValuesImpl<ColumnDecimal<T>>();
+}
+
 template <typename T>
 StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
 {
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index 5016ddca791..33eb2946122 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -136,6 +136,7 @@ public:
     void compareColumn(const IColumn & rhs, size_t rhs_row_num,
                        PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
                        int direction, int nan_direction_hint) const override;
+    bool hasEqualValues() const override;
     void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
     void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges& equal_range) const override;
 
diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp
index c4a7f923867..84bd0561f01 100644
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@@ -474,19 +474,4 @@ ColumnPtr ColumnFixedString::compress() const
         });
 }
 
-
-void ColumnFixedString::alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size)
-{
-    size_t length = data.size() - old_size;
-    if (length < n)
-    {
-        data.resize_fill(old_size + n);
-    }
-    else if (length > n)
-    {
-        data.resize_assume_reserved(old_size);
-        throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
-    }
-}
-
 }
diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h
index d9f6619b2d1..58f6d8142fb 100644
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@@ -132,6 +132,11 @@ public:
                                                compare_results, direction, nan_direction_hint);
     }
 
+    bool hasEqualValues() const override
+    {
+        return hasEqualValuesImpl<ColumnFixedString>();
+    }
+
     void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
 
     void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
@@ -184,8 +189,6 @@ public:
     const Chars & getChars() const { return chars; }
 
     size_t getN() const { return n; }
-
-    static void alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size);
 };
 
 }
diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h
index f97f41a8627..6080a94d1fb 100644
--- a/src/Columns/ColumnFunction.h
+++ b/src/Columns/ColumnFunction.h
@@ -128,6 +128,11 @@ public:
         throw Exception("compareColumn is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
+    bool hasEqualValues() const override
+    {
+        throw Exception("hasEqualValues is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
+    }
+
     void getPermutation(bool, size_t, int, Permutation &) const override
     {
         throw Exception("getPermutation is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp
index 8af3b240cb9..e420fd78a39 100644
--- a/src/Columns/ColumnLowCardinality.cpp
+++ b/src/Columns/ColumnLowCardinality.cpp
@@ -311,6 +311,13 @@ void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num
             compare_results, direction, nan_direction_hint);
 }
 
+bool ColumnLowCardinality::hasEqualValues() const
+{
+    if (getDictionary().size() <= 1)
+        return true;
+    return getIndexes().hasEqualValues();
+}
+
 void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator) const
 {
     if (limit == 0)
diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index a497be8847d..54ddb8ce68b 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -126,6 +126,8 @@ public:
 
     int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override;
 
+    bool hasEqualValues() const override;
+
     void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
 
     void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override;
diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp
index cc2640a9cf6..883a70db435 100644
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@@ -187,6 +187,11 @@ void ColumnMap::compareColumn(const IColumn & rhs, size_t rhs_row_num,
                                         compare_results, direction, nan_direction_hint);
 }
 
+bool ColumnMap::hasEqualValues() const
+{
+    return hasEqualValuesImpl<ColumnMap>();
+}
+
 void ColumnMap::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
 {
     nested->getPermutation(reverse, limit, nan_direction_hint, res);
@@ -229,7 +234,21 @@ void ColumnMap::protect()
 
 void ColumnMap::getExtremes(Field & min, Field & max) const
 {
-    nested->getExtremes(min, max);
+    Field nested_min;
+    Field nested_max;
+
+    nested->getExtremes(nested_min, nested_max);
+
+    /// Convert result Array fields to Map fields because client expect min and max field to have type Map
+
+    Array nested_min_value = nested_min.get<Array>();
+    Array nested_max_value = nested_max.get<Array>();
+
+    Map map_min_value(nested_min_value.begin(), nested_min_value.end());
+    Map map_max_value(nested_max_value.begin(), nested_max_value.end());
+
+    min = std::move(map_min_value);
+    max = std::move(map_max_value);
 }
 
 void ColumnMap::forEachSubcolumn(ColumnCallback callback)
diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h
index acae1574f4c..3987d36b19d 100644
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@@ -72,6 +72,7 @@ public:
     void compareColumn(const IColumn & rhs, size_t rhs_row_num,
                        PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
                        int direction, int nan_direction_hint) const override;
+    bool hasEqualValues() const override;
     void getExtremes(Field & min, Field & max) const override;
     void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
     void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override;
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 4e5cc2b4cf7..df5b8789bfc 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -271,6 +271,11 @@ void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
                                            compare_results, direction, nan_direction_hint);
 }
 
+bool ColumnNullable::hasEqualValues() const
+{
+    return hasEqualValuesImpl<ColumnNullable>();
+}
+
 void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const
 {
     /// Cannot pass limit because of unknown amount of NULLs.
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 8d267de8644..0d68a6a0a3f 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -94,6 +94,7 @@ public:
                        PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
                        int direction, int nan_direction_hint) const override;
     int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator &) const override;
+    bool hasEqualValues() const override;
     void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
     void updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
     void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp
index 8fd22e85e10..31f2b2f9275 100644
--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@@ -287,6 +287,11 @@ void ColumnString::compareColumn(
                                          compare_results, direction, nan_direction_hint);
 }
 
+bool ColumnString::hasEqualValues() const
+{
+    return hasEqualValuesImpl<ColumnString>();
+}
+
 template <bool positive>
 struct ColumnString::Cmp
 {
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index 843e445d1a0..cf053d59b4d 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -240,6 +240,8 @@ public:
                        PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
                        int direction, int nan_direction_hint) const override;
 
+    bool hasEqualValues() const override;
+
     /// Variant of compareAt for string comparison with respect of collation.
     int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override;
 
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index c7c5f7b97c6..7128b428b1a 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -312,6 +312,11 @@ int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs,
     return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
 }
 
+bool ColumnTuple::hasEqualValues() const
+{
+    return hasEqualValuesImpl<ColumnTuple>();
+}
+
 template <bool positive>
 struct ColumnTuple::Less
 {
diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h
index 818b29937bd..858eff7a75a 100644
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@@ -76,6 +76,7 @@ public:
                        PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
                        int direction, int nan_direction_hint) const override;
     int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
+    bool hasEqualValues() const override;
     void getExtremes(Field & min, Field & max) const override;
     void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
     void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index f0aa4a3bab5..5af5ef20310 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -205,6 +205,11 @@ public:
                                                     compare_results, direction, nan_direction_hint);
     }
 
+    bool hasEqualValues() const override
+    {
+        return this->template hasEqualValuesImpl<Self>();
+    }
+
     void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
 
     void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_range) const override;
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index 2b4b633f9a5..9ed064ede14 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -266,6 +266,9 @@ public:
                                PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
                                int direction, int nan_direction_hint) const = 0;
 
+    /// Check if all elements in the column have equal values. Return true if column is empty.
+    virtual bool hasEqualValues() const = 0;
+
     /** Returns a permutation that sorts elements of this column,
       *  i.e. perm[i]-th element of source column should be i-th element of sorted column.
       * reverse - reverse ordering (acsending).
@@ -467,6 +470,9 @@ protected:
                          PaddedPODArray<UInt64> * row_indexes,
                          PaddedPODArray<Int8> & compare_results,
                          int direction, int nan_direction_hint) const;
+
+    template <typename Derived>
+    bool hasEqualValuesImpl() const;
 };
 
 using ColumnPtr = IColumn::Ptr;
diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h
index 10ef692dc6a..bb08e86bb30 100644
--- a/src/Columns/IColumnDummy.h
+++ b/src/Columns/IColumnDummy.h
@@ -40,6 +40,8 @@ public:
     {
     }
 
+    bool hasEqualValues() const override { return true; }
+
     Field operator[](size_t) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
     void get(size_t, Field &) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
     void insert(const Field &) override { throw Exception("Cannot insert element into " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h
index d2286981ac7..a1ee6a17982 100644
--- a/src/Columns/IColumnImpl.h
+++ b/src/Columns/IColumnImpl.h
@@ -127,4 +127,16 @@ void IColumn::doCompareColumn(const Derived & rhs, size_t rhs_row_num,
     }
 }
 
+template <typename Derived>
+bool IColumn::hasEqualValuesImpl() const
+{
+    size_t num_rows = size();
+    for (size_t i = 1; i < num_rows; ++i)
+    {
+        if (compareAt(i, 0, static_cast<const Derived &>(*this), false) != 0)
+            return false;
+    }
+    return true;
+}
+
 }
diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h
index 99e134675f6..5558f493b92 100644
--- a/src/Columns/IColumnUnique.h
+++ b/src/Columns/IColumnUnique.h
@@ -172,6 +172,11 @@ public:
     {
         throw Exception("Method compareColumn is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
     }
+
+    bool hasEqualValues() const override
+    {
+        throw Exception("Method hasEqualValues is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
+    }
 };
 
 using ColumnUniquePtr = IColumnUnique::ColumnUniquePtr;
diff --git a/src/Common/Epoll.cpp b/src/Common/Epoll.cpp
new file mode 100644
index 00000000000..a17cce8545c
--- /dev/null
+++ b/src/Common/Epoll.cpp
@@ -0,0 +1,86 @@
+#if defined(OS_LINUX)
+
+#include "Epoll.h"
+#include <Common/Exception.h>
+#include <unistd.h>
+#include <common/logger_useful.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int EPOLL_ERROR;
+    extern const int LOGICAL_ERROR;
+}
+
+Epoll::Epoll() : events_count(0)
+{
+    epoll_fd = epoll_create1(0);
+    if (epoll_fd == -1)
+        throwFromErrno("Cannot open epoll descriptor", DB::ErrorCodes::EPOLL_ERROR);
+}
+
+Epoll::Epoll(Epoll && other) : epoll_fd(other.epoll_fd), events_count(other.events_count.load())
+{
+    other.epoll_fd = -1;
+}
+
+Epoll & Epoll::operator=(Epoll && other)
+{
+    epoll_fd = other.epoll_fd;
+    other.epoll_fd = -1;
+    events_count.store(other.events_count.load());
+    return *this;
+}
+
+void Epoll::add(int fd, void * ptr)
+{
+    epoll_event event;
+    event.events = EPOLLIN | EPOLLPRI;
+    if (ptr)
+        event.data.ptr = ptr;
+    else
+        event.data.fd = fd;
+
+    ++events_count;
+
+    if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1)
+        throwFromErrno("Cannot add new descriptor to epoll", DB::ErrorCodes::EPOLL_ERROR);
+}
+
+void Epoll::remove(int fd)
+{
+    --events_count;
+
+    if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, nullptr) == -1)
+        throwFromErrno("Cannot remove descriptor from epoll", DB::ErrorCodes::EPOLL_ERROR);
+}
+
+size_t Epoll::getManyReady(int max_events, epoll_event * events_out, bool blocking) const
+{
+    if (events_count == 0)
+        throw Exception("There is no events in epoll", ErrorCodes::LOGICAL_ERROR);
+
+    int ready_size;
+    int timeout = blocking ? -1 : 0;
+    do
+    {
+        ready_size = epoll_wait(epoll_fd, events_out, max_events, timeout);
+
+        if (ready_size == -1 && errno != EINTR)
+            throwFromErrno("Error in epoll_wait", DB::ErrorCodes::EPOLL_ERROR);
+    }
+    while (ready_size <= 0 && (ready_size != 0 || blocking));
+
+    return ready_size;
+}
+
+Epoll::~Epoll()
+{
+    if (epoll_fd != -1)
+        close(epoll_fd);
+}
+
+}
+#endif
diff --git a/src/Common/Epoll.h b/src/Common/Epoll.h
new file mode 100644
index 00000000000..5d9aef9ef66
--- /dev/null
+++ b/src/Common/Epoll.h
@@ -0,0 +1,54 @@
+#pragma once
+#if defined(OS_LINUX)
+
+#include <sys/epoll.h>
+#include <vector>
+#include <boost/noncopyable.hpp>
+#include <Poco/Logger.h>
+
+namespace DB
+{
+
+using AsyncCallback = std::function<void(int, const Poco::Timespan &, const std::string &)>;
+
+class Epoll
+{
+public:
+    Epoll();
+
+    Epoll(const Epoll &) = delete;
+    Epoll & operator=(const Epoll &) = delete;
+
+    Epoll & operator=(Epoll && other);
+    Epoll(Epoll && other);
+
+    /// Add new file descriptor to epoll. If ptr set to nullptr, epoll_event.data.fd = fd,
+    /// otherwise epoll_event.data.ptr = ptr.
+    void add(int fd, void * ptr = nullptr);
+
+    /// Remove file descriptor to epoll.
+    void remove(int fd);
+
+    /// Get events from epoll. Events are written in events_out, this function returns an amount of ready events.
+    /// If blocking is false and there are no ready events,
+    /// return empty vector, otherwise wait for ready events.
+    size_t getManyReady(int max_events, epoll_event * events_out, bool blocking) const;
+
+    int getFileDescriptor() const { return epoll_fd; }
+
+    int size() const { return events_count; }
+
+    bool empty() const { return events_count == 0; }
+
+    const std::string & getDescription() const { return fd_description; }
+
+    ~Epoll();
+
+private:
+    int epoll_fd;
+    std::atomic<int> events_count;
+    const std::string fd_description = "epoll";
+};
+
+}
+#endif
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index ba8741efae7..f81e377da2b 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -538,12 +538,14 @@
     M(569, MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD) \
     M(570, DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD) \
     M(571, DATABASE_REPLICATION_FAILED) \
+    M(572, TOO_MANY_QUERY_PLAN_OPTIMIZATIONS) \
+    M(573, EPOLL_ERROR) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
     M(1001, STD_EXCEPTION) \
     M(1002, UNKNOWN_EXCEPTION) \
-    M(1003, INVALID_SHARD_ID)
+    M(1003, INVALID_SHARD_ID) \
 
 /* See END */
 
diff --git a/src/Common/MemorySanitizer.h b/src/Common/MemorySanitizer.h
index 54a92ea3a19..9e34e454090 100644
--- a/src/Common/MemorySanitizer.h
+++ b/src/Common/MemorySanitizer.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <common/defines.h>
+
 #ifdef __clang__
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wreserved-id-macro"
@@ -9,14 +11,15 @@
 #define __msan_test_shadow(X, Y) (false)
 #define __msan_print_shadow(X, Y)
 #define __msan_unpoison_string(X)
-#if defined(__has_feature)
-#   if __has_feature(memory_sanitizer)
-#       undef __msan_unpoison
-#       undef __msan_test_shadow
-#       undef __msan_print_shadow
-#       undef __msan_unpoison_string
-#       include <sanitizer/msan_interface.h>
-#   endif
+
+#if defined(ch_has_feature)
+#    if ch_has_feature(memory_sanitizer)
+#        undef __msan_unpoison
+#        undef __msan_test_shadow
+#        undef __msan_print_shadow
+#        undef __msan_unpoison_string
+#        include <sanitizer/msan_interface.h>
+#    endif
 #endif
 
 #ifdef __clang__
diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h
index 7779d18d969..6bb6f4a94dd 100644
--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@@ -93,6 +93,18 @@ public:
         double staleness = 0.0; /// Helps choosing the "least stale" option when all replicas are stale.
     };
 
+    struct PoolState;
+
+    using PoolStates = std::vector<PoolState>;
+
+    struct ShuffledPool
+    {
+        NestedPool * pool{};
+        const PoolState * state{};
+        size_t index = 0;
+        size_t error_count = 0;
+    };
+
     /// This functor must be provided by a client. It must perform a single try that takes a connection
     /// from the provided pool and checks that it is good.
     using TryGetEntryFunc = std::function<TryResult(NestedPool & pool, std::string & fail_message)>;
@@ -113,9 +125,6 @@ public:
             const GetPriorityFunc & get_priority = GetPriorityFunc());
 
 protected:
-    struct PoolState;
-
-    using PoolStates = std::vector<PoolState>;
 
     /// Returns a single connection.
     Entry get(size_t max_ignored_errors, bool fallback_to_stale_replicas,
@@ -124,6 +133,10 @@ protected:
     /// This function returns a copy of pool states to avoid race conditions when modifying shared pool states.
     PoolStates updatePoolStates(size_t max_ignored_errors);
 
+    std::vector<ShuffledPool> getShuffledPools(size_t max_ignored_errors, const GetPriorityFunc & get_priority);
+
+    inline void updateSharedErrorCounts(std::vector<ShuffledPool> & shuffled_pools);
+
     auto getPoolExtendedStates() const
     {
         std::lock_guard lock(pool_states_mutex);
@@ -143,6 +156,46 @@ protected:
     Poco::Logger * log;
 };
 
+
+template <typename TNestedPool>
+std::vector<typename PoolWithFailoverBase<TNestedPool>::ShuffledPool>
+PoolWithFailoverBase<TNestedPool>::getShuffledPools(
+    size_t max_ignored_errors, const PoolWithFailoverBase::GetPriorityFunc & get_priority)
+{
+    /// Update random numbers and error counts.
+    PoolStates pool_states = updatePoolStates(max_ignored_errors);
+    if (get_priority)
+    {
+        for (size_t i = 0; i < pool_states.size(); ++i)
+            pool_states[i].priority = get_priority(i);
+    }
+
+    /// Sort the pools into order in which they will be tried (based on respective PoolStates).
+    std::vector<ShuffledPool> shuffled_pools;
+    shuffled_pools.reserve(nested_pools.size());
+    for (size_t i = 0; i < nested_pools.size(); ++i)
+        shuffled_pools.push_back(ShuffledPool{nested_pools[i].get(), &pool_states[i], i, 0});
+    std::sort(
+        shuffled_pools.begin(), shuffled_pools.end(),
+        [](const ShuffledPool & lhs, const ShuffledPool & rhs)
+        {
+            return PoolState::compare(*lhs.state, *rhs.state);
+        });
+
+    return shuffled_pools;
+}
+
+template <typename TNestedPool>
+inline void PoolWithFailoverBase<TNestedPool>::updateSharedErrorCounts(std::vector<ShuffledPool> & shuffled_pools)
+{
+    std::lock_guard lock(pool_states_mutex);
+    for (const ShuffledPool & pool: shuffled_pools)
+    {
+        auto & pool_state = shared_pool_states[pool.index];
+        pool_state.error_count = std::min<UInt64>(max_error_cap, pool_state.error_count + pool.error_count);
+    }
+}
+
 template <typename TNestedPool>
 typename TNestedPool::Entry
 PoolWithFailoverBase<TNestedPool>::get(size_t max_ignored_errors, bool fallback_to_stale_replicas,
@@ -168,33 +221,7 @@ PoolWithFailoverBase<TNestedPool>::getMany(
         const TryGetEntryFunc & try_get_entry,
         const GetPriorityFunc & get_priority)
 {
-    /// Update random numbers and error counts.
-    PoolStates pool_states = updatePoolStates(max_ignored_errors);
-    if (get_priority)
-    {
-        for (size_t i = 0; i < pool_states.size(); ++i)
-            pool_states[i].priority = get_priority(i);
-    }
-
-    struct ShuffledPool
-    {
-        NestedPool * pool{};
-        const PoolState * state{};
-        size_t index = 0;
-        size_t error_count = 0;
-    };
-
-    /// Sort the pools into order in which they will be tried (based on respective PoolStates).
-    std::vector<ShuffledPool> shuffled_pools;
-    shuffled_pools.reserve(nested_pools.size());
-    for (size_t i = 0; i < nested_pools.size(); ++i)
-        shuffled_pools.push_back(ShuffledPool{nested_pools[i].get(), &pool_states[i], i, 0});
-    std::sort(
-            shuffled_pools.begin(), shuffled_pools.end(),
-            [](const ShuffledPool & lhs, const ShuffledPool & rhs)
-            {
-                return PoolState::compare(*lhs.state, *rhs.state);
-            });
+    std::vector<ShuffledPool> shuffled_pools = getShuffledPools(max_ignored_errors, get_priority);
 
     /// We will try to get a connection from each pool until a connection is produced or max_tries is reached.
     std::vector<TryResult> try_results(shuffled_pools.size());
@@ -206,12 +233,7 @@ PoolWithFailoverBase<TNestedPool>::getMany(
     /// At exit update shared error counts with error counts occurred during this call.
     SCOPE_EXIT(
     {
-        std::lock_guard lock(pool_states_mutex);
-        for (const ShuffledPool & pool: shuffled_pools)
-        {
-            auto & pool_state = shared_pool_states[pool.index];
-            pool_state.error_count = std::min<UInt64>(max_error_cap, pool_state.error_count + pool.error_count);
-        }
+        updateSharedErrorCounts(shuffled_pools);
     });
 
     std::string fail_messages;
diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h
index 58660f9e4da..35df77b6b8a 100644
--- a/src/Common/StackTrace.h
+++ b/src/Common/StackTrace.h
@@ -37,8 +37,12 @@ public:
 
     static constexpr size_t capacity =
 #ifndef NDEBUG
-        /* The stacks are normally larger in debug version due to less inlining. */
-        64
+        /* The stacks are normally larger in debug version due to less inlining.
+         *
+         * NOTE: it cannot be larger then 56 right now, since otherwise it will
+         * not fit into minimal PIPE_BUF (512) in TraceCollector.
+         */
+        56
 #else
         32
 #endif
diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp
index bd6c5d9eda0..a23184c9c0a 100644
--- a/src/Common/SymbolIndex.cpp
+++ b/src/Common/SymbolIndex.cpp
@@ -60,11 +60,11 @@ Otherwise you will get only exported symbols from program headers.
 #endif
 
 #define __msan_unpoison_string(X) // NOLINT
-#if defined(__has_feature)
-#   if __has_feature(memory_sanitizer)
-#       undef __msan_unpoison_string
-#       include <sanitizer/msan_interface.h>
-#   endif
+#if defined(ch_has_feature)
+#    if ch_has_feature(memory_sanitizer)
+#        undef __msan_unpoison_string
+#        include <sanitizer/msan_interface.h>
+#    endif
 #endif
 
 
diff --git a/src/Common/TimerDescriptor.cpp b/src/Common/TimerDescriptor.cpp
index f4c3ec35588..791e6380a89 100644
--- a/src/Common/TimerDescriptor.cpp
+++ b/src/Common/TimerDescriptor.cpp
@@ -27,10 +27,16 @@ TimerDescriptor::TimerDescriptor(int clockid, int flags)
         throwFromErrno("Cannot set O_NONBLOCK for timer_fd", ErrorCodes::CANNOT_FCNTL);
 }
 
+TimerDescriptor::TimerDescriptor(TimerDescriptor && other) : timer_fd(other.timer_fd)
+{
+    other.timer_fd = -1;
+}
+
 TimerDescriptor::~TimerDescriptor()
 {
     /// Do not check for result cause cannot throw exception.
-    close(timer_fd);
+    if (timer_fd != -1)
+        close(timer_fd);
 }
 
 void TimerDescriptor::reset() const
@@ -74,7 +80,7 @@ void TimerDescriptor::setRelative(const Poco::Timespan & timespan) const
     spec.it_interval.tv_nsec = 0;
     spec.it_interval.tv_sec = 0;
     spec.it_value.tv_sec = timespan.totalSeconds();
-    spec.it_value.tv_nsec = timespan.useconds();
+    spec.it_value.tv_nsec = timespan.useconds() * 1000;
 
     if (-1 == timerfd_settime(timer_fd, 0 /*relative timer */, &spec, nullptr))
         throwFromErrno("Cannot set time for timer_fd", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
diff --git a/src/Common/TimerDescriptor.h b/src/Common/TimerDescriptor.h
index ddb8f2a1367..42f8eb386af 100644
--- a/src/Common/TimerDescriptor.h
+++ b/src/Common/TimerDescriptor.h
@@ -12,12 +12,12 @@ private:
     int timer_fd;
 
 public:
-    explicit TimerDescriptor(int clockid, int flags);
+    explicit TimerDescriptor(int clockid = CLOCK_MONOTONIC, int flags = 0);
     ~TimerDescriptor();
 
     TimerDescriptor(const TimerDescriptor &) = delete;
     TimerDescriptor & operator=(const TimerDescriptor &) = delete;
-    TimerDescriptor(TimerDescriptor &&) = default;
+    TimerDescriptor(TimerDescriptor && other);
     TimerDescriptor & operator=(TimerDescriptor &&) = default;
 
     int getDescriptor() const { return timer_fd; }
diff --git a/src/Common/TraceCollector.cpp b/src/Common/TraceCollector.cpp
index cbac9cd1a19..ab1845ebbd2 100644
--- a/src/Common/TraceCollector.cpp
+++ b/src/Common/TraceCollector.cpp
@@ -22,7 +22,9 @@ namespace
 {
     /// Normally query_id is a UUID (string with a fixed length) but user can provide custom query_id.
     /// Thus upper bound on query_id length should be introduced to avoid buffer overflow in signal handler.
-    constexpr size_t QUERY_ID_MAX_LEN = 1024;
+    ///
+    /// And it cannot be large, since otherwise it will not fit into PIPE_BUF.
+    constexpr size_t QUERY_ID_MAX_LEN = sizeof("00000000-0000-0000-0000-000000000000") - 1; // 36
 }
 
 LazyPipeFDs pipe;
@@ -60,10 +62,14 @@ void TraceCollector::collect(TraceType trace_type, const StackTrace & stack_trac
         8 * sizeof(char) +                     // maximum VarUInt length for string size
         QUERY_ID_MAX_LEN * sizeof(char) +      // maximum query_id length
         sizeof(UInt8) +                        // number of stack frames
-        sizeof(StackTrace::Frames) +           // collected stack trace, maximum capacity
+        sizeof(StackTrace::FramePointers) +    // collected stack trace, maximum capacity
         sizeof(TraceType) +                    // trace type
         sizeof(UInt64) +                       // thread_id
         sizeof(Int64);                         // size
+    /// Write should be atomic to avoid overlaps
+    /// (since recursive collect() is possible)
+    static_assert(buf_size < PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic");
+
     char buffer[buf_size];
     WriteBufferFromFileDescriptorDiscardOnFailure out(pipe.fds_rw[1], buf_size, buffer);
 
diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h
index c53ea60ec7c..9ff37a7045d 100644
--- a/src/Common/ZooKeeper/IKeeper.h
+++ b/src/Common/ZooKeeper/IKeeper.h
@@ -391,6 +391,9 @@ public:
     virtual void multi(
         const Requests & requests,
         MultiCallback callback) = 0;
+
+    /// Expire session and finish all pending requests
+    virtual void finalize() = 0;
 };
 
 }
diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h
index ca9f584304f..b46f98c0074 100644
--- a/src/Common/ZooKeeper/TestKeeper.h
+++ b/src/Common/ZooKeeper/TestKeeper.h
@@ -30,7 +30,7 @@ using TestKeeperRequestPtr = std::shared_ptr<TestKeeperRequest>;
   *
   * NOTE: You can add various failure modes for better testing.
   */
-class TestKeeper : public IKeeper
+class TestKeeper final : public IKeeper
 {
 public:
     TestKeeper(const String & root_path_, Poco::Timespan operation_timeout_);
@@ -83,6 +83,7 @@ public:
             const Requests & requests,
             MultiCallback callback) override;
 
+    void finalize() override;
 
     struct Node
     {
@@ -130,7 +131,6 @@ private:
 
     void pushRequest(RequestInfo && request);
 
-    void finalize();
 
     ThreadFromGlobalPool processing_thread;
 
diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index a1c6eb9b481..330985e1599 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -44,7 +44,7 @@ static void check(Coordination::Error code, const std::string & path)
 }
 
 
-void ZooKeeper::init(const std::string & implementation_, const std::string & hosts_, const std::string & identity_,
+void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
                      int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_)
 {
     log = &Poco::Logger::get("ZooKeeper");
@@ -60,13 +60,16 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho
         if (hosts.empty())
             throw KeeperException("No hosts passed to ZooKeeper constructor.", Coordination::Error::ZBADARGUMENTS);
 
-        std::vector<std::string> hosts_strings;
-        splitInto<','>(hosts_strings, hosts);
         Coordination::ZooKeeper::Nodes nodes;
-        nodes.reserve(hosts_strings.size());
+        nodes.reserve(hosts.size());
+
+        Strings shuffled_hosts = hosts;
+        /// Shuffle the hosts to distribute the load among ZooKeeper nodes.
+        pcg64 generator(randomSeed());
+        std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator);
 
         bool dns_error = false;
-        for (auto & host_string : hosts_strings)
+        for (auto & host_string : shuffled_hosts)
         {
             try
             {
@@ -109,9 +112,9 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho
                 Poco::Timespan(0, operation_timeout_ms_ * 1000));
 
         if (chroot.empty())
-            LOG_TRACE(log, "Initialized, hosts: {}", hosts);
+            LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(hosts, ","));
         else
-            LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", hosts, chroot);
+            LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(hosts, ","), chroot);
     }
     else if (implementation == "testkeeper")
     {
@@ -128,7 +131,16 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho
         throw KeeperException("Zookeeper root doesn't exist. You should create root node " + chroot + " before start.", Coordination::Error::ZNONODE);
 }
 
-ZooKeeper::ZooKeeper(const std::string & hosts_, const std::string & identity_, int32_t session_timeout_ms_,
+ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_,
+                     int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_)
+{
+    Strings hosts_strings;
+    splitInto<','>(hosts_strings, hosts_string);
+
+    init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
+}
+
+ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_,
                      int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_)
 {
     init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
@@ -141,8 +153,6 @@ struct ZooKeeperArgs
         Poco::Util::AbstractConfiguration::Keys keys;
         config.keys(config_name, keys);
 
-        std::vector<std::string> hosts_strings;
-
         session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS;
         operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS;
         implementation = "zookeeper";
@@ -150,7 +160,7 @@ struct ZooKeeperArgs
         {
             if (startsWith(key, "node"))
             {
-                hosts_strings.push_back(
+                hosts.push_back(
                         (config.getBool(config_name + "." + key + ".secure", false) ? "secure://" : "") +
                         config.getString(config_name + "." + key + ".host") + ":"
                         + config.getString(config_name + "." + key + ".port", "2181")
@@ -180,17 +190,6 @@ struct ZooKeeperArgs
                 throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS);
         }
 
-        /// Shuffle the hosts to distribute the load among ZooKeeper nodes.
-        pcg64 generator(randomSeed());
-        std::shuffle(hosts_strings.begin(), hosts_strings.end(), generator);
-
-        for (auto & host : hosts_strings)
-        {
-            if (!hosts.empty())
-                hosts += ',';
-            hosts += host;
-        }
-
         if (!chroot.empty())
         {
             if (chroot.front() != '/')
@@ -200,7 +199,7 @@ struct ZooKeeperArgs
         }
     }
 
-    std::string hosts;
+    Strings hosts;
     std::string identity;
     int session_timeout_ms;
     int operation_timeout_ms;
@@ -922,6 +921,10 @@ Coordination::Error ZooKeeper::tryMultiNoThrow(const Coordination::Requests & re
     }
 }
 
+void ZooKeeper::finalize()
+{
+    impl->finalize();
+}
 
 size_t KeeperMultiException::getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses)
 {
@@ -1000,4 +1003,5 @@ Coordination::RequestPtr makeCheckRequest(const std::string & path, int version)
     request->version = version;
     return request;
 }
+
 }
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index 5b37e4d6024..4a65ff070f7 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -50,7 +50,14 @@ class ZooKeeper
 public:
     using Ptr = std::shared_ptr<ZooKeeper>;
 
-    ZooKeeper(const std::string & hosts_, const std::string & identity_ = "",
+    /// hosts_string -- comma separated [secure://]host:port list
+    ZooKeeper(const std::string & hosts_string, const std::string & identity_ = "",
+              int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS,
+              int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
+              const std::string & chroot_ = "",
+              const std::string & implementation_ = "zookeeper");
+
+    ZooKeeper(const Strings & hosts_, const std::string & identity_ = "",
               int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS,
               int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
               const std::string & chroot_ = "",
@@ -247,10 +254,12 @@ public:
     /// Like the previous one but don't throw any exceptions on future.get()
     FutureMulti tryAsyncMulti(const Coordination::Requests & ops);
 
+    void finalize();
+
 private:
     friend class EphemeralNodeHolder;
 
-    void init(const std::string & implementation_, const std::string & hosts_, const std::string & identity_,
+    void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
               int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_);
 
     /// The following methods don't throw exceptions but return error codes.
@@ -266,7 +275,7 @@ private:
 
     std::unique_ptr<Coordination::IKeeper> impl;
 
-    std::string hosts;
+    Strings hosts;
     std::string identity;
     int32_t session_timeout_ms;
     int32_t operation_timeout_ms;
diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h
index 71b7cd56149..afd2e89538f 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.h
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.h
@@ -88,7 +88,7 @@ using namespace DB;
 
 /** Usage scenario: look at the documentation for IKeeper class.
   */
-class ZooKeeper : public IKeeper
+class ZooKeeper final : public IKeeper
 {
 public:
     struct Node
@@ -167,6 +167,20 @@ public:
         const Requests & requests,
         MultiCallback callback) override;
 
+    /// Without forcefully invalidating (finalizing) ZooKeeper session before
+    /// establishing a new one, there was a possibility that server is using
+    /// two ZooKeeper sessions simultaneously in different parts of code.
+    /// This is strong antipattern and we always prevented it.
+
+    /// ZooKeeper is linearizeable for writes, but not linearizeable for
+    /// reads, it only maintains "sequential consistency": in every session
+    /// you observe all events in order but possibly with some delay. If you
+    /// perform write in one session, then notify different part of code and
+    /// it will do read in another session, that read may not see the
+    /// already performed write.
+
+    void finalize()  override { finalize(false, false); }
+
 private:
     String root_path;
     ACLs default_acls;
diff --git a/src/Common/ya.make b/src/Common/ya.make
index 64dd628c457..debad6c5de2 100644
--- a/src/Common/ya.make
+++ b/src/Common/ya.make
@@ -39,6 +39,7 @@ SRCS(
     DNSResolver.cpp
     Dwarf.cpp
     Elf.cpp
+    Epoll.cpp
     ErrorCodes.cpp
     Exception.cpp
     ExternalLoaderStatus.cpp
diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
new file mode 100644
index 00000000000..63102ceedaa
--- /dev/null
+++ b/src/Coordination/Changelog.cpp
@@ -0,0 +1,557 @@
+#include <Coordination/Changelog.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <IO/ReadBufferFromFile.h>
+#include <filesystem>
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string/join.hpp>
+#include <boost/algorithm/string/trim.hpp>
+#include <Common/Exception.h>
+#include <Common/SipHash.h>
+#include <common/logger_useful.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CHECKSUM_DOESNT_MATCH;
+    extern const int CORRUPTED_DATA;
+    extern const int UNKNOWN_FORMAT_VERSION;
+    extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+
+constexpr auto DEFAULT_PREFIX = "changelog";
+
+std::string formatChangelogPath(const std::string & prefix, const ChangelogFileDescription & name)
+{
+    std::filesystem::path path(prefix);
+    path /= std::filesystem::path(name.prefix + "_" + std::to_string(name.from_log_index) + "_" + std::to_string(name.to_log_index) + ".bin");
+    return path;
+}
+
+ChangelogFileDescription getChangelogFileDescription(const std::string & path_str)
+{
+    std::filesystem::path path(path_str);
+    std::string filename = path.stem();
+    Strings filename_parts;
+    boost::split(filename_parts, filename, boost::is_any_of("_"));
+    if (filename_parts.size() < 3)
+        throw Exception(ErrorCodes::CORRUPTED_DATA, "Invalid changelog {}", path_str);
+
+    ChangelogFileDescription result;
+    result.prefix = filename_parts[0];
+    result.from_log_index = parse<size_t>(filename_parts[1]);
+    result.to_log_index = parse<size_t>(filename_parts[2]);
+    result.path = path_str;
+    return result;
+}
+
+LogEntryPtr makeClone(const LogEntryPtr & entry)
+{
+    return cs_new<nuraft::log_entry>(entry->get_term(), nuraft::buffer::clone(entry->get_buf()), entry->get_val_type());
+}
+
+Checksum computeRecordChecksum(const ChangelogRecord & record)
+{
+    SipHash hash;
+    hash.update(record.header.version);
+    hash.update(record.header.index);
+    hash.update(record.header.term);
+    hash.update(record.header.value_type);
+    hash.update(record.header.blob_size);
+    if (record.header.blob_size != 0)
+        hash.update(reinterpret_cast<char *>(record.blob->data_begin()), record.blob->size());
+    return hash.get64();
+}
+
+}
+
+class ChangelogWriter
+{
+public:
+    ChangelogWriter(const std::string & filepath_, WriteMode mode, size_t start_index_)
+        : filepath(filepath_)
+        , plain_buf(filepath, DBMS_DEFAULT_BUFFER_SIZE, mode == WriteMode::Rewrite ? -1 : (O_APPEND | O_CREAT | O_WRONLY))
+        , start_index(start_index_)
+    {}
+
+
+    off_t appendRecord(ChangelogRecord && record, bool sync)
+    {
+        off_t result = plain_buf.count();
+        writeIntBinary(computeRecordChecksum(record), plain_buf);
+
+        writeIntBinary(record.header.version, plain_buf);
+        writeIntBinary(record.header.index, plain_buf);
+        writeIntBinary(record.header.term, plain_buf);
+        writeIntBinary(record.header.value_type, plain_buf);
+        writeIntBinary(record.header.blob_size, plain_buf);
+
+        if (record.header.blob_size != 0)
+            plain_buf.write(reinterpret_cast<char *>(record.blob->data_begin()), record.blob->size());
+
+        entries_written++;
+
+        if (sync)
+            plain_buf.sync();
+        return result;
+    }
+
+    void truncateToLength(off_t new_length)
+    {
+        flush();
+        plain_buf.truncate(new_length);
+        plain_buf.seek(new_length, SEEK_SET);
+    }
+
+    void flush()
+    {
+        plain_buf.sync();
+    }
+
+    size_t getEntriesWritten() const
+    {
+        return entries_written;
+    }
+
+    void setEntriesWritten(size_t entries_written_)
+    {
+        entries_written = entries_written_;
+    }
+
+    size_t getStartIndex() const
+    {
+        return start_index;
+    }
+
+    void setStartIndex(size_t start_index_)
+    {
+        start_index = start_index_;
+    }
+
+private:
+    std::string filepath;
+    WriteBufferFromFile plain_buf;
+    size_t entries_written = 0;
+    size_t start_index;
+};
+
+struct ChangelogReadResult
+{
+    size_t entries_read;
+    off_t last_position;
+    bool error;
+};
+
+class ChangelogReader
+{
+public:
+    explicit ChangelogReader(const std::string & filepath_)
+        : filepath(filepath_)
+        , read_buf(filepath)
+    {}
+
+    ChangelogReadResult readChangelog(IndexToLogEntry & logs, size_t start_log_index, IndexToOffset & index_to_offset, Poco::Logger * log)
+    {
+        size_t previous_index = 0;
+        ChangelogReadResult result{};
+        try
+        {
+            while (!read_buf.eof())
+            {
+                result.last_position = read_buf.count();
+                Checksum record_checksum;
+                readIntBinary(record_checksum, read_buf);
+
+                /// Initialization is required, otherwise checksums may fail
+                ChangelogRecord record;
+                readIntBinary(record.header.version, read_buf);
+                readIntBinary(record.header.index, read_buf);
+                readIntBinary(record.header.term, read_buf);
+                readIntBinary(record.header.value_type, read_buf);
+                readIntBinary(record.header.blob_size, read_buf);
+
+                if (record.header.version > CURRENT_CHANGELOG_VERSION)
+                    throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", record.header.version, filepath);
+
+                if (record.header.blob_size != 0)
+                {
+                    auto buffer = nuraft::buffer::alloc(record.header.blob_size);
+                    auto * buffer_begin = reinterpret_cast<char *>(buffer->data_begin());
+                    read_buf.readStrict(buffer_begin, record.header.blob_size);
+                    record.blob = buffer;
+                }
+                else
+                    record.blob = nullptr;
+
+                if (previous_index != 0 && previous_index + 1 != record.header.index)
+                    throw Exception(ErrorCodes::CORRUPTED_DATA, "Previous log entry {}, next log entry {}, seems like some entries skipped", previous_index, record.header.index);
+
+                previous_index = record.header.index;
+
+                Checksum checksum = computeRecordChecksum(record);
+                if (checksum != record_checksum)
+                {
+                    throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH,
+                                    "Checksums doesn't match for log {} (version {}), index {}, blob_size {}",
+                                    filepath, record.header.version, record.header.index, record.header.blob_size);
+                }
+
+                if (logs.count(record.header.index) != 0)
+                    throw Exception(ErrorCodes::CORRUPTED_DATA, "Duplicated index id {} in log {}", record.header.index, filepath);
+
+                result.entries_read += 1;
+
+                if (record.header.index < start_log_index)
+                    continue;
+
+                auto log_entry = nuraft::cs_new<nuraft::log_entry>(record.header.term, record.blob, record.header.value_type);
+
+                logs.emplace(record.header.index, log_entry);
+                index_to_offset[record.header.index] = result.last_position;
+                if (result.entries_read % 50000 == 0)
+                    LOG_TRACE(log, "Reading changelog from path {}, entries {}", filepath, result.entries_read);
+            }
+        }
+        catch (const Exception & ex)
+        {
+            if (ex.code() == ErrorCodes::UNKNOWN_FORMAT_VERSION)
+                throw ex;
+
+            result.error = true;
+            LOG_WARNING(log, "Cannot completely read changelog on path {}, error: {}", filepath, ex.message());
+        }
+        catch (...)
+        {
+            result.error = true;
+            tryLogCurrentException(log);
+        }
+        LOG_TRACE(log, "Totally read from changelog {} {} entries", filepath, result.entries_read);
+
+        return result;
+    }
+
+private:
+    std::string filepath;
+    ReadBufferFromFile read_buf;
+};
+
+Changelog::Changelog(const std::string & changelogs_dir_, size_t rotate_interval_, Poco::Logger * log_)
+    : changelogs_dir(changelogs_dir_)
+    , rotate_interval(rotate_interval_)
+    , log(log_)
+{
+    namespace fs = std::filesystem;
+    if (!fs::exists(changelogs_dir))
+        fs::create_directories(changelogs_dir);
+
+    for (const auto & p : fs::directory_iterator(changelogs_dir))
+    {
+        auto file_description = getChangelogFileDescription(p.path());
+        existing_changelogs[file_description.from_log_index] = file_description;
+    }
+}
+
+void Changelog::readChangelogAndInitWriter(size_t from_log_index)
+{
+    start_index = from_log_index == 0 ? 1 : from_log_index;
+    size_t total_read = 0;
+    size_t entries_in_last = 0;
+    size_t incomplete_log_index = 0;
+    ChangelogReadResult result{};
+
+    bool started = false;
+    for (const auto & [changelog_start_index, changelog_description] : existing_changelogs)
+    {
+        entries_in_last = changelog_description.to_log_index - changelog_description.from_log_index + 1;
+
+        if (changelog_description.to_log_index >= from_log_index)
+        {
+            if (!started)
+            {
+                if (changelog_description.from_log_index > start_index)
+                    throw Exception(ErrorCodes::CORRUPTED_DATA, "Cannot read changelog from index {}, smallest available index {}", start_index, changelog_description.from_log_index);
+                started = true;
+            }
+
+            ChangelogReader reader(changelog_description.path);
+            result = reader.readChangelog(logs, from_log_index, index_to_start_pos, log);
+            total_read += result.entries_read;
+
+            /// May happen after truncate, crash or simply unfinished log
+            if (result.entries_read < entries_in_last)
+            {
+                incomplete_log_index = changelog_start_index;
+                break;
+            }
+        }
+    }
+
+    if (!started && start_index != 1)
+        throw Exception(ErrorCodes::CORRUPTED_DATA, "Required to read data from {}, but we don't have any active changelogs", from_log_index);
+
+    if (incomplete_log_index != 0)
+    {
+        /// All subsequent logs shouldn't exist. But they may exist if we crashed after writeAt started. Remove them.
+        for (auto itr = existing_changelogs.upper_bound(incomplete_log_index); itr != existing_changelogs.end();)
+        {
+            LOG_WARNING(log, "Removing changelog {}, because it's goes after broken changelog entry", itr->second.path);
+            std::filesystem::remove(itr->second.path);
+            itr = existing_changelogs.erase(itr);
+        }
+
+        /// Continue to write into existing log
+        if (!existing_changelogs.empty())
+        {
+            auto description = existing_changelogs.rbegin()->second;
+            LOG_TRACE(log, "Continue to write into {}", description.path);
+            current_writer = std::make_unique<ChangelogWriter>(description.path, WriteMode::Append, description.from_log_index);
+            current_writer->setEntriesWritten(result.entries_read);
+
+            /// Truncate all broken entries from log
+            if (result.error)
+            {
+                LOG_WARNING(log, "Read finished with error, truncating all broken log entries");
+                current_writer->truncateToLength(result.last_position);
+            }
+        }
+    }
+
+    /// Start new log if we don't initialize writer from previous log
+    if (!current_writer)
+        rotate(start_index + total_read);
+}
+
+void Changelog::rotate(size_t new_start_log_index)
+{
+    //// doesn't exist on init
+    if (current_writer)
+        current_writer->flush();
+
+    ChangelogFileDescription new_description;
+    new_description.prefix = DEFAULT_PREFIX;
+    new_description.from_log_index = new_start_log_index;
+    new_description.to_log_index = new_start_log_index + rotate_interval - 1;
+
+    new_description.path = formatChangelogPath(changelogs_dir, new_description);
+
+    LOG_TRACE(log, "Starting new changelog {}", new_description.path);
+    existing_changelogs[new_start_log_index] = new_description;
+    current_writer = std::make_unique<ChangelogWriter>(new_description.path, WriteMode::Rewrite, new_start_log_index);
+}
+
+ChangelogRecord Changelog::buildRecord(size_t index, const LogEntryPtr & log_entry)
+{
+    ChangelogRecord record;
+    record.header.version = ChangelogVersion::V0;
+    record.header.index = index;
+    record.header.term = log_entry->get_term();
+    record.header.value_type = log_entry->get_val_type();
+    auto buffer = log_entry->get_buf_ptr();
+    if (buffer)
+        record.header.blob_size = buffer->size();
+    else
+        record.header.blob_size = 0;
+
+    record.blob = buffer;
+
+    return record;
+}
+
+void Changelog::appendEntry(size_t index, const LogEntryPtr & log_entry, bool force_sync)
+{
+    if (!current_writer)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records");
+
+    if (logs.empty())
+        start_index = index;
+
+    if (current_writer->getEntriesWritten() == rotate_interval)
+        rotate(index);
+
+    auto offset = current_writer->appendRecord(buildRecord(index, log_entry), force_sync);
+    if (!index_to_start_pos.try_emplace(index, offset).second)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Record with index {} already exists", index);
+
+    logs[index] = makeClone(log_entry);
+}
+
+void Changelog::writeAt(size_t index, const LogEntryPtr & log_entry, bool force_sync)
+{
+    if (index_to_start_pos.count(index) == 0)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot write at index {} because changelog doesn't contain it", index);
+
+    bool go_to_previous_file = index < current_writer->getStartIndex();
+    if (go_to_previous_file)
+    {
+        auto index_changelog = existing_changelogs.lower_bound(index);
+        ChangelogFileDescription description;
+        if (index_changelog->first == index)
+            description = index_changelog->second;
+        else
+            description = std::prev(index_changelog)->second;
+
+        current_writer = std::make_unique<ChangelogWriter>(description.path, WriteMode::Append, index_changelog->first);
+        current_writer->setEntriesWritten(description.to_log_index - description.from_log_index + 1);
+    }
+
+    auto entries_written = current_writer->getEntriesWritten();
+    current_writer->truncateToLength(index_to_start_pos[index]);
+
+    if (go_to_previous_file)
+    {
+        /// Remove all subsequent files
+        auto to_remove_itr = existing_changelogs.upper_bound(index);
+        for (auto itr = to_remove_itr; itr != existing_changelogs.end();)
+        {
+            std::filesystem::remove(itr->second.path);
+            itr = existing_changelogs.erase(itr);
+        }
+    }
+
+    /// Remove redundant logs from memory
+    for (size_t i = index; ; ++i)
+    {
+        auto log_itr = logs.find(i);
+        if (log_itr == logs.end())
+            break;
+        logs.erase(log_itr);
+        index_to_start_pos.erase(i);
+        entries_written--;
+    }
+
+    current_writer->setEntriesWritten(entries_written);
+
+    appendEntry(index, log_entry, force_sync);
+}
+
+void Changelog::compact(size_t up_to_log_index)
+{
+    for (auto itr = existing_changelogs.begin(); itr != existing_changelogs.end();)
+    {
+        /// Remove all completely outdated changelog files
+        if (itr->second.to_log_index <= up_to_log_index)
+        {
+
+            LOG_INFO(log, "Removing changelog {} because of compaction", itr->second.path);
+            std::erase_if(index_to_start_pos, [right_index = itr->second.to_log_index] (const auto & item) { return item.first <= right_index; });
+            std::filesystem::remove(itr->second.path);
+            itr = existing_changelogs.erase(itr);
+        }
+        else /// Files are ordered, so all subsequent should exist
+            break;
+    }
+    start_index = up_to_log_index + 1;
+    std::erase_if(logs, [up_to_log_index] (const auto & item) { return item.first <= up_to_log_index; });
+}
+
+LogEntryPtr Changelog::getLastEntry() const
+{
+    static LogEntryPtr fake_entry = nuraft::cs_new<nuraft::log_entry>(0, nuraft::buffer::alloc(sizeof(size_t)));
+
+    size_t next_index = getNextEntryIndex() - 1;
+    auto entry = logs.find(next_index);
+    if (entry == logs.end())
+        return fake_entry;
+
+    return entry->second;
+}
+
+LogEntriesPtr Changelog::getLogEntriesBetween(size_t start, size_t end)
+{
+    LogEntriesPtr ret = nuraft::cs_new<std::vector<nuraft::ptr<nuraft::log_entry>>>();
+
+    ret->resize(end - start);
+    size_t result_pos = 0;
+    for (size_t i = start; i < end; ++i)
+    {
+        (*ret)[result_pos] = entryAt(i);
+        result_pos++;
+    }
+    return ret;
+}
+
+LogEntryPtr Changelog::entryAt(size_t index)
+{
+    nuraft::ptr<nuraft::log_entry> src = nullptr;
+    auto entry = logs.find(index);
+    if (entry == logs.end())
+        return nullptr;
+
+    src = entry->second;
+    return src;
+}
+
+nuraft::ptr<nuraft::buffer> Changelog::serializeEntriesToBuffer(size_t index, int32_t count)
+{
+    std::vector<nuraft::ptr<nuraft::buffer>> returned_logs;
+
+    size_t size_total = 0;
+    for (size_t i = index; i < index + count; ++i)
+    {
+        auto entry = logs.find(i);
+        if (entry == logs.end())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Don't have log entry {}", i);
+
+        nuraft::ptr<nuraft::buffer> buf = entry->second->serialize();
+        size_total += buf->size();
+        returned_logs.push_back(buf);
+    }
+
+    nuraft::ptr<nuraft::buffer> buf_out = nuraft::buffer::alloc(sizeof(int32_t) + count * sizeof(int32_t) + size_total);
+    buf_out->pos(0);
+    buf_out->put(static_cast<int32_t>(count));
+
+    for (auto & entry : returned_logs)
+    {
+        nuraft::ptr<nuraft::buffer> & bb = entry;
+        buf_out->put(static_cast<int32_t>(bb->size()));
+        buf_out->put(*bb);
+    }
+    return buf_out;
+}
+
+void Changelog::applyEntriesFromBuffer(size_t index, nuraft::buffer & buffer, bool force_sync)
+{
+    buffer.pos(0);
+    int num_logs = buffer.get_int();
+
+    for (int i = 0; i < num_logs; ++i)
+    {
+        size_t cur_index = index + i;
+        int buf_size = buffer.get_int();
+
+        nuraft::ptr<nuraft::buffer> buf_local = nuraft::buffer::alloc(buf_size);
+        buffer.get(buf_local);
+
+        LogEntryPtr log_entry = nuraft::log_entry::deserialize(*buf_local);
+        if (i == 0 && logs.count(cur_index))
+            writeAt(cur_index, log_entry, force_sync);
+        else
+            appendEntry(cur_index, log_entry, force_sync);
+    }
+}
+
+void Changelog::flush()
+{
+    current_writer->flush();
+}
+
+Changelog::~Changelog()
+{
+    try
+    {
+        if (current_writer)
+            current_writer->flush();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
+}
+
+}
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
new file mode 100644
index 00000000000..be38915066d
--- /dev/null
+++ b/src/Coordination/Changelog.h
@@ -0,0 +1,136 @@
+#pragma once
+
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
+#include <city.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/HashingWriteBuffer.h>
+#include <Compression/CompressedWriteBuffer.h>
+#include <Disks/IDisk.h>
+
+namespace DB
+{
+
+using Checksum = UInt64;
+
+using LogEntryPtr = nuraft::ptr<nuraft::log_entry>;
+using LogEntries = std::vector<LogEntryPtr>;
+using LogEntriesPtr = nuraft::ptr<LogEntries>;
+using BufferPtr = nuraft::ptr<nuraft::buffer>;
+
+using IndexToOffset = std::unordered_map<size_t, off_t>;
+using IndexToLogEntry = std::unordered_map<size_t, LogEntryPtr>;
+
+enum class ChangelogVersion : uint8_t
+{
+    V0 = 0,
+};
+
+static constexpr auto CURRENT_CHANGELOG_VERSION = ChangelogVersion::V0;
+
+struct ChangelogRecordHeader
+{
+    ChangelogVersion version = CURRENT_CHANGELOG_VERSION;
+    size_t index; /// entry log number
+    size_t term;
+    nuraft::log_val_type value_type;
+    size_t blob_size;
+};
+
+/// Changelog record on disk
+struct ChangelogRecord
+{
+    ChangelogRecordHeader header;
+    nuraft::ptr<nuraft::buffer> blob;
+};
+
+/// changelog_fromindex_toindex.bin
+/// [fromindex, toindex] <- inclusive
+struct ChangelogFileDescription
+{
+    std::string prefix;
+    size_t from_log_index;
+    size_t to_log_index;
+
+    std::string path;
+};
+
+class ChangelogWriter;
+
+/// Simplest changelog with files rotation.
+/// No compression, no metadata, just entries with headers one by one
+/// Able to read broken files/entries and discard them.
+class Changelog
+{
+
+public:
+    Changelog(const std::string & changelogs_dir_, size_t rotate_interval_, Poco::Logger * log_);
+
+    /// Read changelog from files on changelogs_dir_ skipping all entries before from_log_index
+    /// Truncate broken entries, remove files after broken entries.
+    void readChangelogAndInitWriter(size_t from_log_index);
+
+    /// Add entry to log with index. Call fsync if force_sync true.
+    void appendEntry(size_t index, const LogEntryPtr & log_entry, bool force_sync);
+
+    /// Write entry at index and truncate all subsequent entries.
+    void writeAt(size_t index, const LogEntryPtr & log_entry, bool force_sync);
+
+    /// Remove log files with to_log_index <= up_to_log_index.
+    void compact(size_t up_to_log_index);
+
+    size_t getNextEntryIndex() const
+    {
+        return start_index + logs.size();
+    }
+
+    size_t getStartIndex() const
+    {
+        return start_index;
+    }
+
+    /// Last entry in log, or fake entry with term 0 if log is empty
+    LogEntryPtr getLastEntry() const;
+
+    /// Return log entries between [start, end)
+    LogEntriesPtr getLogEntriesBetween(size_t start_index, size_t end_index);
+
+    /// Return entry at position index
+    LogEntryPtr entryAt(size_t index);
+
+    /// Serialize entries from index into buffer
+    BufferPtr serializeEntriesToBuffer(size_t index, int32_t count);
+
+    /// Apply entries from buffer overriding existing entries
+    void applyEntriesFromBuffer(size_t index, nuraft::buffer & buffer, bool force_sync);
+
+    /// Fsync log to disk
+    void flush();
+
+    size_t size() const
+    {
+        return logs.size();
+    }
+
+    /// Fsync log to disk
+    ~Changelog();
+
+private:
+    /// Pack log_entry into changelog record
+    static ChangelogRecord buildRecord(size_t index, const LogEntryPtr & log_entry);
+
+    /// Starts new file [new_start_log_index, new_start_log_index + rotate_interval]
+    void rotate(size_t new_start_log_index);
+
+private:
+    const std::string changelogs_dir;
+    const size_t rotate_interval;
+    Poco::Logger * log;
+
+    std::map<size_t, ChangelogFileDescription> existing_changelogs;
+    std::unique_ptr<ChangelogWriter> current_writer;
+    IndexToOffset index_to_start_pos;
+    IndexToLogEntry logs;
+    size_t start_index = 0;
+};
+
+}
diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h
index 441e1a5936f..34a97f82399 100644
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@@ -22,13 +22,15 @@ struct Settings;
     M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
     M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
     M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Lower bound of election timer (avoid too often leader elections)", 0) \
-    M(UInt64, reserved_log_items, 5000, "How many log items to store (don't remove during compaction)", 0) \
-    M(UInt64, snapshot_distance, 5000, "How many log items we have to collect to write new snapshot", 0) \
+    M(UInt64, reserved_log_items, 50000, "How many log items to store (don't remove during compaction)", 0) \
+    M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
     M(UInt64, max_stored_snapshots, 3, "How many snapshots we want to store", 0) \
     M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
     M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0) \
     M(Milliseconds, startup_timeout, 30000, "How many time we will until RAFT to start", 0) \
-    M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0)
+    M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
+    M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
+    M(Bool, force_sync, true, " Call fsync on each change in RAFT changelog", 0)
 
 DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
 
diff --git a/src/Coordination/InMemoryLogStore.cpp b/src/Coordination/InMemoryLogStore.cpp
index 101458891e7..877c8a60a2a 100644
--- a/src/Coordination/InMemoryLogStore.cpp
+++ b/src/Coordination/InMemoryLogStore.cpp
@@ -72,12 +72,12 @@ nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> InMemoryLogStore::log_e
 
     ret->resize(end - start);
     size_t cc = 0;
-    for (size_t ii = start; ii < end; ++ii)
+    for (size_t i = start; i < end; ++i)
     {
         nuraft::ptr<nuraft::log_entry> src = nullptr;
         {
             std::lock_guard<std::mutex> l(logs_lock);
-            auto entry = logs.find(ii);
+            auto entry = logs.find(i);
             if (entry == logs.end())
             {
                 entry = logs.find(0);
@@ -152,9 +152,9 @@ void InMemoryLogStore::apply_pack(size_t index, nuraft::buffer & pack)
     pack.pos(0);
     Int32 num_logs = pack.get_int();
 
-    for (Int32 ii = 0; ii < num_logs; ++ii)
+    for (Int32 i = 0; i < num_logs; ++i)
     {
-        size_t cur_idx = index + ii;
+        size_t cur_idx = index + i;
         Int32 buf_size = pack.get_int();
 
         nuraft::ptr<nuraft::buffer> buf_local = nuraft::buffer::alloc(buf_size);
diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h
index 755b72c06cc..25a1969d2e9 100644
--- a/src/Coordination/LoggerWrapper.h
+++ b/src/Coordination/LoggerWrapper.h
@@ -9,12 +9,26 @@ namespace DB
 
 class LoggerWrapper : public nuraft::logger
 {
+private:
+
+    static inline const std::unordered_map<LogsLevel, Poco::Message::Priority> LEVELS =
+    {
+        {LogsLevel::trace, Poco::Message::Priority::PRIO_TRACE},
+        {LogsLevel::debug, Poco::Message::Priority::PRIO_DEBUG},
+        {LogsLevel::information, Poco::Message::PRIO_INFORMATION},
+        {LogsLevel::warning, Poco::Message::PRIO_WARNING},
+        {LogsLevel::error, Poco::Message::PRIO_ERROR},
+        {LogsLevel::fatal, Poco::Message::PRIO_FATAL}
+    };
+    static inline const int LEVEL_MAX = static_cast<int>(LogsLevel::trace);
+    static inline const int LEVEL_MIN = static_cast<int>(LogsLevel::none);
+
 public:
     LoggerWrapper(const std::string & name, LogsLevel level_)
         : log(&Poco::Logger::get(name))
-        , level(static_cast<int>(level_))
+        , level(level_)
     {
-        log->setLevel(level);
+        log->setLevel(static_cast<int>(LEVELS.at(level)));
     }
 
     void put_details(
@@ -24,24 +38,26 @@ public:
         size_t /* line_number */,
         const std::string & msg) override
     {
-        LOG_IMPL(log, static_cast<DB::LogsLevel>(level_), static_cast<Poco::Message::Priority>(level_), msg);
+        LogsLevel db_level = static_cast<LogsLevel>(level_);
+        LOG_IMPL(log, db_level, LEVELS.at(db_level), msg);
     }
 
     void set_level(int level_) override
     {
-        level_ = std::min(6, std::max(1, level_));
-        log->setLevel(level_);
-        level = level_;
+        level_ = std::min(LEVEL_MAX, std::max(LEVEL_MIN, level_));
+        level = static_cast<LogsLevel>(level_);
+        log->setLevel(static_cast<int>(LEVELS.at(level)));
     }
 
     int get_level() override
     {
-        return level;
+        LogsLevel lvl = level;
+        return static_cast<int>(lvl);
     }
 
 private:
     Poco::Logger * log;
-    std::atomic<int> level;
+    std::atomic<LogsLevel> level;
 };
 
 }
diff --git a/src/Coordination/NuKeeperLogStore.cpp b/src/Coordination/NuKeeperLogStore.cpp
new file mode 100644
index 00000000000..6aba078bb80
--- /dev/null
+++ b/src/Coordination/NuKeeperLogStore.cpp
@@ -0,0 +1,105 @@
+#include <Coordination/NuKeeperLogStore.h>
+
+namespace DB
+{
+
+NuKeeperLogStore::NuKeeperLogStore(const std::string & changelogs_path, size_t rotate_interval_, bool force_sync_)
+    : log(&Poco::Logger::get("NuKeeperLogStore"))
+    , changelog(changelogs_path, rotate_interval_, log)
+    , force_sync(force_sync_)
+{
+}
+
+size_t NuKeeperLogStore::start_index() const
+{
+    std::lock_guard lock(changelog_lock);
+    return changelog.getStartIndex();
+}
+
+void NuKeeperLogStore::init(size_t from_log_idx)
+{
+    std::lock_guard lock(changelog_lock);
+    changelog.readChangelogAndInitWriter(from_log_idx);
+}
+
+size_t NuKeeperLogStore::next_slot() const
+{
+    std::lock_guard lock(changelog_lock);
+    return changelog.getNextEntryIndex();
+}
+
+nuraft::ptr<nuraft::log_entry> NuKeeperLogStore::last_entry() const
+{
+    std::lock_guard lock(changelog_lock);
+    return changelog.getLastEntry();
+}
+
+size_t NuKeeperLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
+{
+    std::lock_guard lock(changelog_lock);
+    size_t idx = changelog.getNextEntryIndex();
+    changelog.appendEntry(idx, entry, force_sync);
+    return idx;
+}
+
+
+void NuKeeperLogStore::write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry)
+{
+    std::lock_guard lock(changelog_lock);
+    changelog.writeAt(index, entry, force_sync);
+}
+
+nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> NuKeeperLogStore::log_entries(size_t start, size_t end)
+{
+    std::lock_guard lock(changelog_lock);
+    return changelog.getLogEntriesBetween(start, end);
+}
+
+nuraft::ptr<nuraft::log_entry> NuKeeperLogStore::entry_at(size_t index)
+{
+    std::lock_guard lock(changelog_lock);
+    return changelog.entryAt(index);
+}
+
+size_t NuKeeperLogStore::term_at(size_t index)
+{
+    std::lock_guard lock(changelog_lock);
+    auto entry = changelog.entryAt(index);
+    if (entry)
+        return entry->get_term();
+    return 0;
+}
+
+nuraft::ptr<nuraft::buffer> NuKeeperLogStore::pack(size_t index, int32_t cnt)
+{
+    std::lock_guard lock(changelog_lock);
+    return changelog.serializeEntriesToBuffer(index, cnt);
+}
+
+bool NuKeeperLogStore::compact(size_t last_log_index)
+{
+    std::lock_guard lock(changelog_lock);
+    changelog.compact(last_log_index);
+    return true;
+}
+
+bool NuKeeperLogStore::flush()
+{
+    std::lock_guard lock(changelog_lock);
+    changelog.flush();
+    return true;
+}
+
+void NuKeeperLogStore::apply_pack(size_t index, nuraft::buffer & pack)
+{
+    std::lock_guard lock(changelog_lock);
+    changelog.applyEntriesFromBuffer(index, pack, force_sync);
+}
+
+size_t NuKeeperLogStore::size() const
+{
+    std::lock_guard lock(changelog_lock);
+    return changelog.size();
+}
+
+}
diff --git a/src/Coordination/NuKeeperLogStore.h b/src/Coordination/NuKeeperLogStore.h
new file mode 100644
index 00000000000..a94b662fda4
--- /dev/null
+++ b/src/Coordination/NuKeeperLogStore.h
@@ -0,0 +1,52 @@
+#pragma once
+#include <libnuraft/log_store.hxx> // Y_IGNORE
+#include <map>
+#include <mutex>
+#include <Core/Types.h>
+#include <Coordination/Changelog.h>
+#include <common/logger_useful.h>
+
+namespace DB
+{
+
+class NuKeeperLogStore : public nuraft::log_store
+{
+public:
+    NuKeeperLogStore(const std::string & changelogs_path, size_t rotate_interval_, bool force_sync_);
+
+    void init(size_t from_log_idx);
+
+    size_t start_index() const override;
+
+    size_t next_slot() const override;
+
+    nuraft::ptr<nuraft::log_entry> last_entry() const override;
+
+    size_t append(nuraft::ptr<nuraft::log_entry> & entry) override;
+
+    void write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry) override;
+
+    nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> log_entries(size_t start, size_t end) override;
+
+    nuraft::ptr<nuraft::log_entry> entry_at(size_t index) override;
+
+    size_t term_at(size_t index) override;
+
+    nuraft::ptr<nuraft::buffer> pack(size_t index, int32_t cnt) override;
+
+    void apply_pack(size_t index, nuraft::buffer & pack) override;
+
+    bool compact(size_t last_log_index) override;
+
+    bool flush() override;
+
+    size_t size() const;
+
+private:
+    mutable std::mutex changelog_lock;
+    Poco::Logger * log;
+    Changelog changelog;
+    bool force_sync;
+};
+
+}
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 7464a06e86f..9ff1e2801c9 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -1,7 +1,7 @@
 #include <Coordination/NuKeeperServer.h>
 #include <Coordination/LoggerWrapper.h>
 #include <Coordination/NuKeeperStateMachine.h>
-#include <Coordination/InMemoryStateManager.h>
+#include <Coordination/NuKeeperStateManager.h>
 #include <Coordination/WriteBufferFromNuraftBuffer.h>
 #include <Coordination/ReadBufferFromNuraftBuffer.h>
 #include <IO/ReadHelpers.h>
@@ -26,17 +26,32 @@ NuKeeperServer::NuKeeperServer(
     : server_id(server_id_)
     , coordination_settings(coordination_settings_)
     , state_machine(nuraft::cs_new<NuKeeperStateMachine>(responses_queue_, coordination_settings))
-    , state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, "test_keeper_server.raft_configuration", config))
+    , state_manager(nuraft::cs_new<NuKeeperStateManager>(server_id, "test_keeper_server", config, coordination_settings))
     , responses_queue(responses_queue_)
 {
 }
 
 void NuKeeperServer::startup()
 {
+
+    state_manager->loadLogStore(state_machine->last_commit_index());
+    bool single_server = state_manager->getTotalServers() == 1;
+
     nuraft::raft_params params;
-    params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
-    params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds();
-    params.election_timeout_upper_bound_ = coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds();
+    if (single_server)
+    {
+        /// Don't make sense in single server mode
+        params.heart_beat_interval_ = 0;
+        params.election_timeout_lower_bound_ = 0;
+        params.election_timeout_upper_bound_ = 0;
+    }
+    else
+    {
+        params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
+        params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds();
+        params.election_timeout_upper_bound_ = coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds();
+    }
+
     params.reserved_log_items_ = coordination_settings->reserved_log_items;
     params.snapshot_distance_ = coordination_settings->snapshot_distance;
     params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds();
@@ -64,8 +79,10 @@ void NuKeeperServer::startup()
 void NuKeeperServer::shutdown()
 {
     state_machine->shutdownStorage();
-    if (!launcher.shutdown(coordination_settings->shutdown_timeout.totalSeconds()))
-        LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Failed to shutdown RAFT server in {} seconds", 5);
+    state_manager->flushLogStore();
+    auto timeout = coordination_settings->shutdown_timeout.totalSeconds();
+    if (!launcher.shutdown(timeout))
+        LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Failed to shutdown RAFT server in {} seconds", timeout);
 }
 
 namespace
@@ -157,13 +174,38 @@ bool NuKeeperServer::isLeaderAlive() const
 
 nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */)
 {
-    if (type == nuraft::cb_func::Type::BecomeFresh || type == nuraft::cb_func::Type::BecomeLeader)
+    /// Only initial record
+    bool empty_store = state_manager->getLogStore()->size() == 1;
+
+    auto set_initialized = [this] ()
     {
         std::unique_lock lock(initialized_mutex);
         initialized_flag = true;
         initialized_cv.notify_all();
+    };
+
+    switch (type)
+    {
+        case nuraft::cb_func::BecomeLeader:
+        {
+            if (empty_store) /// We become leader and store is empty, ready to serve requests
+                set_initialized();
+            return nuraft::cb_func::ReturnCode::Ok;
+        }
+        case nuraft::cb_func::BecomeFresh:
+        {
+            set_initialized(); /// We are fresh follower, ready to serve requests.
+            return nuraft::cb_func::ReturnCode::Ok;
+        }
+        case nuraft::cb_func::InitialBatchCommited:
+        {
+            if (isLeader()) /// We have committed our log store and we are leader, ready to serve requests.
+                set_initialized();
+            return nuraft::cb_func::ReturnCode::Ok;
+        }
+        default: /// ignore other events
+            return nuraft::cb_func::ReturnCode::Ok;
     }
-    return nuraft::cb_func::ReturnCode::Ok;
 }
 
 void NuKeeperServer::waitInit()
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index a8d269eb9eb..40f3efec76a 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -2,7 +2,7 @@
 
 #include <libnuraft/nuraft.hxx> // Y_IGNORE
 #include <Coordination/InMemoryLogStore.h>
-#include <Coordination/InMemoryStateManager.h>
+#include <Coordination/NuKeeperStateManager.h>
 #include <Coordination/NuKeeperStateMachine.h>
 #include <Coordination/NuKeeperStorage.h>
 #include <Coordination/CoordinationSettings.h>
@@ -20,7 +20,7 @@ private:
 
     nuraft::ptr<NuKeeperStateMachine> state_machine;
 
-    nuraft::ptr<InMemoryStateManager> state_manager;
+    nuraft::ptr<NuKeeperStateManager> state_manager;
 
     nuraft::raft_launcher launcher;
 
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 0061645c75c..33f15fca09c 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -46,7 +46,7 @@ NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, co
     , storage(coordination_settings->dead_session_check_period_ms.totalMilliseconds())
     , responses_queue(responses_queue_)
     , last_committed_idx(0)
-    , log(&Poco::Logger::get("NuRaftStateMachine"))
+    , log(&Poco::Logger::get("NuKeeperStateMachine"))
 {
     LOG_DEBUG(log, "Created nukeeper state machine");
 }
diff --git a/src/Coordination/InMemoryStateManager.cpp b/src/Coordination/NuKeeperStateManager.cpp
similarity index 66%
rename from src/Coordination/InMemoryStateManager.cpp
rename to src/Coordination/NuKeeperStateManager.cpp
index 69e93578cc1..a7d8b345fee 100644
--- a/src/Coordination/InMemoryStateManager.cpp
+++ b/src/Coordination/NuKeeperStateManager.cpp
@@ -1,4 +1,4 @@
-#include <Coordination/InMemoryStateManager.h>
+#include <Coordination/NuKeeperStateManager.h>
 #include <Common/Exception.h>
 
 namespace DB
@@ -9,30 +9,35 @@ namespace ErrorCodes
     extern const int RAFT_ERROR;
 }
 
-InMemoryStateManager::InMemoryStateManager(int server_id_, const std::string & host, int port)
+NuKeeperStateManager::NuKeeperStateManager(int server_id_, const std::string & host, int port, const std::string & logs_path)
     : my_server_id(server_id_)
     , my_port(port)
-    , log_store(nuraft::cs_new<InMemoryLogStore>())
+    , log_store(nuraft::cs_new<NuKeeperLogStore>(logs_path, 5000, false))
     , cluster_config(nuraft::cs_new<nuraft::cluster_config>())
 {
     auto peer_config = nuraft::cs_new<nuraft::srv_config>(my_server_id, host + ":" + std::to_string(port));
     cluster_config->get_servers().push_back(peer_config);
 }
 
-InMemoryStateManager::InMemoryStateManager(
+NuKeeperStateManager::NuKeeperStateManager(
     int my_server_id_,
     const std::string & config_prefix,
-    const Poco::Util::AbstractConfiguration & config)
+    const Poco::Util::AbstractConfiguration & config,
+    const CoordinationSettingsPtr & coordination_settings)
     : my_server_id(my_server_id_)
-    , log_store(nuraft::cs_new<InMemoryLogStore>())
+    , log_store(nuraft::cs_new<NuKeeperLogStore>(
+                    config.getString(config_prefix + ".log_storage_path"),
+                    coordination_settings->rotate_log_storage_interval, coordination_settings->force_sync))
     , cluster_config(nuraft::cs_new<nuraft::cluster_config>())
 {
+
     Poco::Util::AbstractConfiguration::Keys keys;
-    config.keys(config_prefix, keys);
+    config.keys(config_prefix + ".raft_configuration", keys);
+    total_servers = keys.size();
 
     for (const auto & server_key : keys)
     {
-        std::string full_prefix = config_prefix + "." + server_key;
+        std::string full_prefix = config_prefix + ".raft_configuration." + server_key;
         int server_id = config.getInt(full_prefix + ".id");
         std::string hostname = config.getString(full_prefix + ".hostname");
         int port = config.getInt(full_prefix + ".port");
@@ -53,13 +58,23 @@ InMemoryStateManager::InMemoryStateManager(
         cluster_config->get_servers().push_back(peer_config);
     }
     if (!my_server_config)
-        throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section");
+        throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section", my_server_id);
 
     if (start_as_follower_servers.size() == cluster_config->get_servers().size())
         throw Exception(ErrorCodes::RAFT_ERROR, "At least one of servers should be able to start as leader (without <start_as_follower>)");
 }
 
-void InMemoryStateManager::save_config(const nuraft::cluster_config & config)
+void NuKeeperStateManager::loadLogStore(size_t start_log_index)
+{
+    log_store->init(start_log_index);
+}
+
+void NuKeeperStateManager::flushLogStore()
+{
+    log_store->flush();
+}
+
+void NuKeeperStateManager::save_config(const nuraft::cluster_config & config)
 {
     // Just keep in memory in this example.
     // Need to write to disk here, if want to make it durable.
@@ -67,7 +82,7 @@ void InMemoryStateManager::save_config(const nuraft::cluster_config & config)
     cluster_config = nuraft::cluster_config::deserialize(*buf);
 }
 
-void InMemoryStateManager::save_state(const nuraft::srv_state & state)
+void NuKeeperStateManager::save_state(const nuraft::srv_state & state)
 {
      // Just keep in memory in this example.
      // Need to write to disk here, if want to make it durable.
diff --git a/src/Coordination/InMemoryStateManager.h b/src/Coordination/NuKeeperStateManager.h
similarity index 66%
rename from src/Coordination/InMemoryStateManager.h
rename to src/Coordination/NuKeeperStateManager.h
index 2a5c2f00dba..c84b0918beb 100644
--- a/src/Coordination/InMemoryStateManager.h
+++ b/src/Coordination/NuKeeperStateManager.h
@@ -2,25 +2,32 @@
 
 #include <Core/Types.h>
 #include <string>
-#include <Coordination/InMemoryLogStore.h>
+#include <Coordination/NuKeeperLogStore.h>
+#include <Coordination/CoordinationSettings.h>
 #include <libnuraft/nuraft.hxx> // Y_IGNORE
 #include <Poco/Util/AbstractConfiguration.h>
 
 namespace DB
 {
 
-class InMemoryStateManager : public nuraft::state_mgr
+class NuKeeperStateManager : public nuraft::state_mgr
 {
 public:
-    InMemoryStateManager(
+    NuKeeperStateManager(
         int server_id_,
         const std::string & config_prefix,
-        const Poco::Util::AbstractConfiguration & config);
+        const Poco::Util::AbstractConfiguration & config,
+        const CoordinationSettingsPtr & coordination_settings);
 
-    InMemoryStateManager(
+    NuKeeperStateManager(
         int server_id_,
         const std::string & host,
-        int port);
+        int port,
+        const std::string & logs_path);
+
+    void loadLogStore(size_t start_log_index);
+
+    void flushLogStore();
 
     nuraft::ptr<nuraft::cluster_config> load_config() override { return cluster_config; }
 
@@ -45,11 +52,16 @@ public:
         return start_as_follower_servers.count(my_server_id);
     }
 
+    nuraft::ptr<NuKeeperLogStore> getLogStore() const { return log_store; }
+
+    size_t getTotalServers() const { return total_servers; }
+
 private:
     int my_server_id;
     int my_port;
+    size_t total_servers{0};
     std::unordered_set<int> start_as_follower_servers;
-    nuraft::ptr<InMemoryLogStore> log_store;
+    nuraft::ptr<NuKeeperLogStore> log_store;
     nuraft::ptr<nuraft::srv_config> my_server_config;
     nuraft::ptr<nuraft::cluster_config> cluster_config;
     nuraft::ptr<nuraft::srv_state> server_state;
diff --git a/src/Coordination/NuKeeperStorage.cpp b/src/Coordination/NuKeeperStorage.cpp
index 631f975cddc..bb433474dc9 100644
--- a/src/Coordination/NuKeeperStorage.cpp
+++ b/src/Coordination/NuKeeperStorage.cpp
@@ -25,10 +25,10 @@ static String parentPath(const String & path)
     return "/";
 }
 
-static String baseName(const String & path)
+static std::string getBaseName(const String & path)
 {
-    auto rslash_pos = path.rfind('/');
-    return path.substr(rslash_pos + 1);
+    size_t basename_start = path.rfind('/');
+    return std::string{&path[basename_start + 1], path.length() - basename_start - 1};
 }
 
 static NuKeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches, Coordination::Event event_type)
@@ -167,14 +167,17 @@ struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
 
                 /// Increment sequential number even if node is not sequential
                 ++it->second.seq_num;
-
                 response.path_created = path_created;
+
                 container.emplace(path_created, std::move(created_node));
 
+                auto child_path = getBaseName(path_created);
+                it->second.children.insert(child_path);
+
                 if (request.is_ephemeral)
                     ephemerals[session_id].emplace(path_created);
 
-                undo = [&container, &ephemerals, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path = it->first]
+                undo = [&container, &ephemerals, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path = it->first, child_path]
                 {
                     container.erase(path_created);
                     if (is_ephemeral)
@@ -183,6 +186,7 @@ struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
                     --undo_parent.stat.cversion;
                     --undo_parent.stat.numChildren;
                     --undo_parent.seq_num;
+                    undo_parent.children.erase(child_path);
                 };
 
                 ++it->second.stat.cversion;
@@ -250,13 +254,16 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
             if (prev_node.is_ephemeral)
                 ephemerals[session_id].erase(request.path);
 
-            container.erase(it);
+            auto child_basename = getBaseName(it->first);
             auto & parent = container.at(parentPath(request.path));
             --parent.stat.numChildren;
             ++parent.stat.cversion;
+            parent.children.erase(child_basename);
             response.error = Coordination::Error::ZOK;
 
-            undo = [prev_node, &container, &ephemerals, session_id, path = request.path]
+            container.erase(it);
+
+            undo = [prev_node, &container, &ephemerals, session_id, path = request.path, child_basename]
             {
                 if (prev_node.is_ephemeral)
                     ephemerals[session_id].emplace(path);
@@ -265,6 +272,7 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
                 auto & undo_parent = container.at(parentPath(path));
                 ++undo_parent.stat.numChildren;
                 --undo_parent.stat.cversion;
+                undo_parent.children.insert(child_basename);
             };
         }
 
@@ -370,17 +378,9 @@ struct NuKeeperStorageListRequest final : public NuKeeperStorageRequest
             if (path_prefix.empty())
                 throw DB::Exception("Logical error: path cannot be empty", ErrorCodes::LOGICAL_ERROR);
 
-            if (path_prefix.back() != '/')
-                path_prefix += '/';
+            response.names.insert(response.names.end(), it->second.children.begin(), it->second.children.end());
 
-            /// Fairly inefficient.
-            for (auto child_it = container.upper_bound(path_prefix);
-                 child_it != container.end() && startsWith(child_it->first, path_prefix);
-                ++child_it)
-            {
-                if (parentPath(child_it->first) == request.path)
-                    response.names.emplace_back(baseName(child_it->first));
-            }
+            std::sort(response.names.begin(), response.names.end());
 
             response.stat = it->second.stat;
             response.error = Coordination::Error::ZOK;
diff --git a/src/Coordination/NuKeeperStorage.h b/src/Coordination/NuKeeperStorage.h
index 20ab1982b4e..1a2e6202bf0 100644
--- a/src/Coordination/NuKeeperStorage.h
+++ b/src/Coordination/NuKeeperStorage.h
@@ -16,6 +16,7 @@ using namespace DB;
 struct NuKeeperStorageRequest;
 using NuKeeperStorageRequestPtr = std::shared_ptr<NuKeeperStorageRequest>;
 using ResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr &)>;
+using ChildrenSet = std::unordered_set<std::string>;
 
 class NuKeeperStorage
 {
@@ -30,6 +31,7 @@ public:
         bool is_sequental = false;
         Coordination::Stat stat{};
         int32_t seq_num = 0;
+        ChildrenSet children{};
     };
 
     struct ResponseForSession
@@ -48,9 +50,9 @@ public:
 
     using RequestsForSessions = std::vector<RequestForSession>;
 
-    using Container = std::map<std::string, Node>;
-    using Ephemerals = std::unordered_map<int64_t, std::unordered_set<String>>;
-    using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<String>>;
+    using Container = std::unordered_map<std::string, Node>;
+    using Ephemerals = std::unordered_map<int64_t, std::unordered_set<std::string>>;
+    using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<std::string>>;
     using SessionAndTimeout = std::unordered_map<int64_t, long>;
     using SessionIDs = std::vector<int64_t>;
 
diff --git a/src/Coordination/NuKeeperStorageSerializer.cpp b/src/Coordination/NuKeeperStorageSerializer.cpp
index 298df45cde0..c29d0d1f1fa 100644
--- a/src/Coordination/NuKeeperStorageSerializer.cpp
+++ b/src/Coordination/NuKeeperStorageSerializer.cpp
@@ -59,13 +59,16 @@ void NuKeeperStorageSerializer::deserialize(NuKeeperStorage & storage, ReadBuffe
 
     size_t container_size;
     Coordination::read(container_size, in);
-    while (storage.container.size() < container_size)
+
+    size_t current_size = 0;
+    while (current_size < container_size)
     {
         std::string path;
         Coordination::read(path, in);
         NuKeeperStorage::Node node;
         readNode(node, in);
         storage.container[path] = node;
+        current_size++;
     }
     size_t ephemerals_size;
     Coordination::read(ephemerals_size, in);
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index ed9777350c5..37517808ef0 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -6,9 +6,10 @@
 #endif
 
 #if USE_NURAFT
-
+#include <Poco/ConsoleChannel.h>
+#include <Poco/Logger.h>
 #include <Coordination/InMemoryLogStore.h>
-#include <Coordination/InMemoryStateManager.h>
+#include <Coordination/NuKeeperStateManager.h>
 #include <Coordination/NuKeeperStorageSerializer.h>
 #include <Coordination/SummingStateMachine.h>
 #include <Coordination/NuKeeperStateMachine.h>
@@ -20,9 +21,35 @@
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
 #include <Common/ZooKeeper/ZooKeeperIO.h>
 #include <Common/Exception.h>
+#include <common/logger_useful.h>
 #include <libnuraft/nuraft.hxx> // Y_IGNORE
 #include <thread>
+#include <Coordination/NuKeeperLogStore.h>
+#include <Coordination/Changelog.h>
+#include <filesystem>
 
+namespace fs = std::filesystem;
+struct ChangelogDirTest
+{
+    std::string path;
+    bool drop;
+    explicit ChangelogDirTest(std::string path_, bool drop_ = true)
+        : path(path_)
+        , drop(drop_)
+    {
+        if (fs::exists(path))
+        {
+            EXPECT_TRUE(false) << "Path " << path << " already exists, remove it to run test";
+        }
+        fs::create_directory(path);
+    }
+
+    ~ChangelogDirTest()
+    {
+        if (fs::exists(path) && drop)
+            fs::remove_all(path);
+    }
+};
 
 TEST(CoordinationTest, BuildTest)
 {
@@ -67,14 +94,15 @@ TEST(CoordinationTest, BufferSerde)
 template <typename StateMachine>
 struct SimpliestRaftServer
 {
-    SimpliestRaftServer(int server_id_, const std::string & hostname_, int port_)
+    SimpliestRaftServer(int server_id_, const std::string & hostname_, int port_, const std::string & logs_path)
         : server_id(server_id_)
         , hostname(hostname_)
         , port(port_)
         , endpoint(hostname + ":" + std::to_string(port))
         , state_machine(nuraft::cs_new<StateMachine>())
-        , state_manager(nuraft::cs_new<DB::InMemoryStateManager>(server_id, hostname, port))
+        , state_manager(nuraft::cs_new<DB::NuKeeperStateManager>(server_id, hostname, port, logs_path))
     {
+        state_manager->loadLogStore(1);
         nuraft::raft_params params;
         params.heart_beat_interval_ = 100;
         params.election_timeout_lower_bound_ = 200;
@@ -90,10 +118,10 @@ struct SimpliestRaftServer
 
         if (!raft_instance)
         {
-            std::cerr << "Failed to initialize launcher (see the message "
-                         "in the log file)." << std::endl;
+            std::cerr << "Failed to initialize launcher" << std::endl;
             exit(-1);
         }
+
         std::cout << "init Raft instance " << server_id;
         for (size_t ii = 0; ii < 20; ++ii)
         {
@@ -123,7 +151,7 @@ struct SimpliestRaftServer
     nuraft::ptr<StateMachine> state_machine;
 
     // State manager.
-    nuraft::ptr<nuraft::state_mgr> state_manager;
+    nuraft::ptr<DB::NuKeeperStateManager> state_manager;
 
     // Raft launcher.
     nuraft::raft_launcher launcher;
@@ -134,11 +162,10 @@ struct SimpliestRaftServer
 
 using SummingRaftServer = SimpliestRaftServer<DB::SummingStateMachine>;
 
-nuraft::ptr<nuraft::buffer> getLogEntry(int64_t number)
+nuraft::ptr<nuraft::buffer> getBuffer(int64_t number)
 {
     nuraft::ptr<nuraft::buffer> ret = nuraft::buffer::alloc(sizeof(number));
     nuraft::buffer_serializer bs(ret);
-    // WARNING: We don't consider endian-safety in this example.
     bs.put_raw(&number, sizeof(number));
     return ret;
 }
@@ -146,12 +173,13 @@ nuraft::ptr<nuraft::buffer> getLogEntry(int64_t number)
 
 TEST(CoordinationTest, TestSummingRaft1)
 {
-    SummingRaftServer s1(1, "localhost", 44444);
+    ChangelogDirTest test("./logs");
+    SummingRaftServer s1(1, "localhost", 44444, "./logs");
 
     /// Single node is leader
     EXPECT_EQ(s1.raft_instance->get_leader(), 1);
 
-    auto entry1 = getLogEntry(143);
+    auto entry1 = getBuffer(143);
     auto ret = s1.raft_instance->append_entries({entry1});
     EXPECT_TRUE(ret->get_accepted()) << "failed to replicate: entry 1" << ret->get_result_code();
     EXPECT_EQ(ret->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 1" << ret->get_result_code();
@@ -167,110 +195,6 @@ TEST(CoordinationTest, TestSummingRaft1)
     s1.launcher.shutdown(5);
 }
 
-TEST(CoordinationTest, TestSummingRaft3)
-{
-    SummingRaftServer s1(1, "localhost", 44444);
-    SummingRaftServer s2(2, "localhost", 44445);
-    SummingRaftServer s3(3, "localhost", 44446);
-
-    nuraft::srv_config first_config(1, "localhost:44444");
-    auto ret1 = s2.raft_instance->add_srv(first_config);
-    if (!ret1->get_accepted())
-    {
-        std::cout << "failed to add server: "
-                  << ret1->get_result_str() << std::endl;
-        EXPECT_TRUE(false);
-    }
-
-    while (s1.raft_instance->get_leader() != 2)
-    {
-        std::cout << "Waiting s1 to join to s2 quorum\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    nuraft::srv_config third_config(3, "localhost:44446");
-    auto ret3 = s2.raft_instance->add_srv(third_config);
-    if (!ret3->get_accepted())
-    {
-        std::cout << "failed to add server: "
-                  << ret3->get_result_str() << std::endl;
-        EXPECT_TRUE(false);
-    }
-
-    while (s3.raft_instance->get_leader() != 2)
-    {
-        std::cout << "Waiting s3 to join to s2 quorum\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    /// S2 is leader
-    EXPECT_EQ(s1.raft_instance->get_leader(), 2);
-    EXPECT_EQ(s2.raft_instance->get_leader(), 2);
-    EXPECT_EQ(s3.raft_instance->get_leader(), 2);
-
-    std::cerr << "Starting to add entries\n";
-    auto entry = getLogEntry(1);
-    auto ret = s2.raft_instance->append_entries({entry});
-    EXPECT_TRUE(ret->get_accepted()) << "failed to replicate: entry 1" << ret->get_result_code();
-    EXPECT_EQ(ret->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 1" << ret->get_result_code();
-
-    while (s1.state_machine->getValue() != 1)
-    {
-        std::cout << "Waiting s1 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    while (s2.state_machine->getValue() != 1)
-    {
-        std::cout << "Waiting s2 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    while (s3.state_machine->getValue() != 1)
-    {
-        std::cout << "Waiting s3 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    EXPECT_EQ(s1.state_machine->getValue(), 1);
-    EXPECT_EQ(s2.state_machine->getValue(), 1);
-    EXPECT_EQ(s3.state_machine->getValue(), 1);
-
-    auto non_leader_entry = getLogEntry(3);
-    auto ret_non_leader1 = s1.raft_instance->append_entries({non_leader_entry});
-
-    EXPECT_FALSE(ret_non_leader1->get_accepted());
-
-    auto ret_non_leader3 = s3.raft_instance->append_entries({non_leader_entry});
-
-    EXPECT_FALSE(ret_non_leader3->get_accepted());
-
-    auto leader_entry = getLogEntry(77);
-    auto ret_leader = s2.raft_instance->append_entries({leader_entry});
-    EXPECT_TRUE(ret_leader->get_accepted()) << "failed to replicate: entry 78" << ret_leader->get_result_code();
-    EXPECT_EQ(ret_leader->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 78" << ret_leader->get_result_code();
-
-    while (s1.state_machine->getValue() != 78)
-    {
-        std::cout << "Waiting s1 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    while (s3.state_machine->getValue() != 78)
-    {
-        std::cout << "Waiting s3 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    EXPECT_EQ(s1.state_machine->getValue(), 78);
-    EXPECT_EQ(s2.state_machine->getValue(), 78);
-    EXPECT_EQ(s3.state_machine->getValue(), 78);
-
-    s1.launcher.shutdown(5);
-    s2.launcher.shutdown(5);
-    s3.launcher.shutdown(5);
-}
-
 nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
 {
     DB::WriteBufferFromNuraftBuffer buf;
@@ -333,4 +257,586 @@ TEST(CoordinationTest, TestStorageSerialization)
     EXPECT_EQ(new_storage.ephemerals[1].size(), 1);
 }
 
+DB::LogEntryPtr getLogEntry(const std::string & s, size_t term)
+{
+    DB::WriteBufferFromNuraftBuffer bufwriter;
+    writeText(s, bufwriter);
+    return nuraft::cs_new<nuraft::log_entry>(term, bufwriter.getBuffer());
+}
+
+TEST(CoordinationTest, ChangelogTestSimple)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 5, true);
+    changelog.init(1);
+    auto entry = getLogEntry("hello world", 77);
+    changelog.append(entry);
+    EXPECT_EQ(changelog.next_slot(), 2);
+    EXPECT_EQ(changelog.start_index(), 1);
+    EXPECT_EQ(changelog.last_entry()->get_term(), 77);
+    EXPECT_EQ(changelog.entry_at(1)->get_term(), 77);
+    EXPECT_EQ(changelog.log_entries(1, 2)->size(), 1);
+}
+
+TEST(CoordinationTest, ChangelogTestFile)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 5, true);
+    changelog.init(1);
+    auto entry = getLogEntry("hello world", 77);
+    changelog.append(entry);
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    for (const auto & p : fs::directory_iterator("./logs"))
+        EXPECT_EQ(p.path(), "./logs/changelog_1_5.bin");
+
+    changelog.append(entry);
+    changelog.append(entry);
+    changelog.append(entry);
+    changelog.append(entry);
+    changelog.append(entry);
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+}
+
+TEST(CoordinationTest, ChangelogReadWrite)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 1000, true);
+    changelog.init(1);
+    for (size_t i = 0; i < 10; ++i)
+    {
+        auto entry = getLogEntry("hello world", i * 10);
+        changelog.append(entry);
+    }
+    EXPECT_EQ(changelog.size(), 10);
+    DB::NuKeeperLogStore changelog_reader("./logs", 1000, true);
+    changelog_reader.init(1);
+    EXPECT_EQ(changelog_reader.size(), 10);
+    EXPECT_EQ(changelog_reader.last_entry()->get_term(), changelog.last_entry()->get_term());
+    EXPECT_EQ(changelog_reader.start_index(), changelog.start_index());
+    EXPECT_EQ(changelog_reader.next_slot(), changelog.next_slot());
+
+    for (size_t i = 0; i < 10; ++i)
+        EXPECT_EQ(changelog_reader.entry_at(i + 1)->get_term(), changelog.entry_at(i + 1)->get_term());
+
+    auto entries_from_range_read = changelog_reader.log_entries(1, 11);
+    auto entries_from_range = changelog.log_entries(1, 11);
+    EXPECT_EQ(entries_from_range_read->size(), entries_from_range->size());
+    EXPECT_EQ(10, entries_from_range->size());
+}
+
+TEST(CoordinationTest, ChangelogWriteAt)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 1000, true);
+    changelog.init(1);
+    for (size_t i = 0; i < 10; ++i)
+    {
+        auto entry = getLogEntry("hello world", i * 10);
+        changelog.append(entry);
+    }
+    EXPECT_EQ(changelog.size(), 10);
+
+    auto entry = getLogEntry("writer", 77);
+    changelog.write_at(7, entry);
+    EXPECT_EQ(changelog.size(), 7);
+    EXPECT_EQ(changelog.last_entry()->get_term(), 77);
+    EXPECT_EQ(changelog.entry_at(7)->get_term(), 77);
+    EXPECT_EQ(changelog.next_slot(), 8);
+
+    DB::NuKeeperLogStore changelog_reader("./logs", 1000, true);
+    changelog_reader.init(1);
+
+    EXPECT_EQ(changelog_reader.size(), changelog.size());
+    EXPECT_EQ(changelog_reader.last_entry()->get_term(), changelog.last_entry()->get_term());
+    EXPECT_EQ(changelog_reader.start_index(), changelog.start_index());
+    EXPECT_EQ(changelog_reader.next_slot(), changelog.next_slot());
+}
+
+
+TEST(CoordinationTest, ChangelogTestAppendAfterRead)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 5, true);
+    changelog.init(1);
+    for (size_t i = 0; i < 7; ++i)
+    {
+        auto entry = getLogEntry("hello world", i * 10);
+        changelog.append(entry);
+    }
+
+    EXPECT_EQ(changelog.size(), 7);
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+
+    DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
+    changelog_reader.init(1);
+
+    EXPECT_EQ(changelog_reader.size(), 7);
+    for (size_t i = 7; i < 10; ++i)
+    {
+        auto entry = getLogEntry("hello world", i * 10);
+        changelog_reader.append(entry);
+    }
+    EXPECT_EQ(changelog_reader.size(), 10);
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+
+    size_t logs_count = 0;
+    for (const auto & _ [[maybe_unused]]: fs::directory_iterator("./logs"))
+        logs_count++;
+
+    EXPECT_EQ(logs_count, 2);
+
+    auto entry = getLogEntry("someentry", 77);
+    changelog_reader.append(entry);
+    EXPECT_EQ(changelog_reader.size(), 11);
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
+
+    logs_count = 0;
+    for (const auto & _ [[maybe_unused]]: fs::directory_iterator("./logs"))
+        logs_count++;
+
+    EXPECT_EQ(logs_count, 3);
+}
+
+TEST(CoordinationTest, ChangelogTestCompaction)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 5, true);
+    changelog.init(1);
+
+    for (size_t i = 0; i < 3; ++i)
+    {
+        auto entry = getLogEntry("hello world", i * 10);
+        changelog.append(entry);
+    }
+
+    EXPECT_EQ(changelog.size(), 3);
+
+    changelog.compact(2);
+
+    EXPECT_EQ(changelog.size(), 1);
+    EXPECT_EQ(changelog.start_index(), 3);
+    EXPECT_EQ(changelog.next_slot(), 4);
+    EXPECT_EQ(changelog.last_entry()->get_term(), 20);
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+
+    auto e1 = getLogEntry("hello world", 30);
+    changelog.append(e1);
+    auto e2 = getLogEntry("hello world", 40);
+    changelog.append(e2);
+    auto e3 = getLogEntry("hello world", 50);
+    changelog.append(e3);
+    auto e4 = getLogEntry("hello world", 60);
+    changelog.append(e4);
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+
+    changelog.compact(6);
+
+    EXPECT_FALSE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+
+    EXPECT_EQ(changelog.size(), 1);
+    EXPECT_EQ(changelog.start_index(), 7);
+    EXPECT_EQ(changelog.next_slot(), 8);
+    EXPECT_EQ(changelog.last_entry()->get_term(), 60);
+    /// And we able to read it
+    DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
+    changelog_reader.init(7);
+    EXPECT_EQ(changelog_reader.size(), 1);
+    EXPECT_EQ(changelog_reader.start_index(), 7);
+    EXPECT_EQ(changelog_reader.next_slot(), 8);
+    EXPECT_EQ(changelog_reader.last_entry()->get_term(), 60);
+}
+
+TEST(CoordinationTest, ChangelogTestBatchOperations)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 100, true);
+    changelog.init(1);
+    for (size_t i = 0; i < 10; ++i)
+    {
+        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
+        changelog.append(entry);
+    }
+
+    EXPECT_EQ(changelog.size(), 10);
+
+    auto entries = changelog.pack(1, 5);
+
+    DB::NuKeeperLogStore apply_changelog("./logs", 100, true);
+    apply_changelog.init(1);
+
+    for (size_t i = 0; i < 10; ++i)
+    {
+        EXPECT_EQ(apply_changelog.entry_at(i + 1)->get_term(), i * 10);
+    }
+    EXPECT_EQ(apply_changelog.size(), 10);
+
+    apply_changelog.apply_pack(8, *entries);
+
+    EXPECT_EQ(apply_changelog.size(), 12);
+    EXPECT_EQ(apply_changelog.start_index(), 1);
+    EXPECT_EQ(apply_changelog.next_slot(), 13);
+
+    for (size_t i = 0; i < 7; ++i)
+    {
+        EXPECT_EQ(apply_changelog.entry_at(i + 1)->get_term(), i * 10);
+    }
+
+    EXPECT_EQ(apply_changelog.entry_at(8)->get_term(), 0);
+    EXPECT_EQ(apply_changelog.entry_at(9)->get_term(), 10);
+    EXPECT_EQ(apply_changelog.entry_at(10)->get_term(), 20);
+    EXPECT_EQ(apply_changelog.entry_at(11)->get_term(), 30);
+    EXPECT_EQ(apply_changelog.entry_at(12)->get_term(), 40);
+}
+
+TEST(CoordinationTest, ChangelogTestBatchOperationsEmpty)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 100, true);
+    changelog.init(1);
+    for (size_t i = 0; i < 10; ++i)
+    {
+        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
+        changelog.append(entry);
+    }
+
+    EXPECT_EQ(changelog.size(), 10);
+
+    auto entries = changelog.pack(5, 5);
+
+    ChangelogDirTest test1("./logs1");
+    DB::NuKeeperLogStore changelog_new("./logs1", 100, true);
+    changelog_new.init(1);
+    EXPECT_EQ(changelog_new.size(), 0);
+
+    changelog_new.apply_pack(5, *entries);
+
+    EXPECT_EQ(changelog_new.size(), 5);
+    EXPECT_EQ(changelog_new.start_index(), 5);
+    EXPECT_EQ(changelog_new.next_slot(), 10);
+
+    for (size_t i = 4; i < 9; ++i)
+        EXPECT_EQ(changelog_new.entry_at(i + 1)->get_term(), i * 10);
+
+    auto e = getLogEntry("hello_world", 110);
+    changelog_new.append(e);
+    EXPECT_EQ(changelog_new.size(), 6);
+    EXPECT_EQ(changelog_new.start_index(), 5);
+    EXPECT_EQ(changelog_new.next_slot(), 11);
+
+    DB::NuKeeperLogStore changelog_reader("./logs1", 100, true);
+    changelog_reader.init(5);
+}
+
+
+TEST(CoordinationTest, ChangelogTestWriteAtPreviousFile)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 5, true);
+    changelog.init(1);
+
+    for (size_t i = 0; i < 33; ++i)
+    {
+        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
+        changelog.append(entry);
+    }
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
+
+    EXPECT_EQ(changelog.size(), 33);
+
+    auto e1 = getLogEntry("helloworld", 5555);
+    changelog.write_at(7, e1);
+    EXPECT_EQ(changelog.size(), 7);
+    EXPECT_EQ(changelog.start_index(), 1);
+    EXPECT_EQ(changelog.next_slot(), 8);
+    EXPECT_EQ(changelog.last_entry()->get_term(), 5555);
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+
+    EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
+
+    DB::NuKeeperLogStore changelog_read("./logs", 5, true);
+    changelog_read.init(1);
+    EXPECT_EQ(changelog_read.size(), 7);
+    EXPECT_EQ(changelog_read.start_index(), 1);
+    EXPECT_EQ(changelog_read.next_slot(), 8);
+    EXPECT_EQ(changelog_read.last_entry()->get_term(), 5555);
+}
+
+TEST(CoordinationTest, ChangelogTestWriteAtFileBorder)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 5, true);
+    changelog.init(1);
+
+    for (size_t i = 0; i < 33; ++i)
+    {
+        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
+        changelog.append(entry);
+    }
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
+
+    EXPECT_EQ(changelog.size(), 33);
+
+    auto e1 = getLogEntry("helloworld", 5555);
+    changelog.write_at(11, e1);
+    EXPECT_EQ(changelog.size(), 11);
+    EXPECT_EQ(changelog.start_index(), 1);
+    EXPECT_EQ(changelog.next_slot(), 12);
+    EXPECT_EQ(changelog.last_entry()->get_term(), 5555);
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
+
+    EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
+
+    DB::NuKeeperLogStore changelog_read("./logs", 5, true);
+    changelog_read.init(1);
+    EXPECT_EQ(changelog_read.size(), 11);
+    EXPECT_EQ(changelog_read.start_index(), 1);
+    EXPECT_EQ(changelog_read.next_slot(), 12);
+    EXPECT_EQ(changelog_read.last_entry()->get_term(), 5555);
+}
+
+TEST(CoordinationTest, ChangelogTestWriteAtAllFiles)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 5, true);
+    changelog.init(1);
+
+    for (size_t i = 0; i < 33; ++i)
+    {
+        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
+        changelog.append(entry);
+    }
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
+
+    EXPECT_EQ(changelog.size(), 33);
+
+    auto e1 = getLogEntry("helloworld", 5555);
+    changelog.write_at(1, e1);
+    EXPECT_EQ(changelog.size(), 1);
+    EXPECT_EQ(changelog.start_index(), 1);
+    EXPECT_EQ(changelog.next_slot(), 2);
+    EXPECT_EQ(changelog.last_entry()->get_term(), 5555);
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+
+    EXPECT_FALSE(fs::exists("./logs/changelog_6_10.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
+}
+
+TEST(CoordinationTest, ChangelogTestStartNewLogAfterRead)
+{
+    ChangelogDirTest test("./logs");
+    DB::NuKeeperLogStore changelog("./logs", 5, true);
+    changelog.init(1);
+
+    for (size_t i = 0; i < 35; ++i)
+    {
+        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
+        changelog.append(entry);
+    }
+    EXPECT_EQ(changelog.size(), 35);
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_36_40.bin"));
+
+
+    DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
+    changelog_reader.init(1);
+
+    auto entry = getLogEntry("36_hello_world", 360);
+    changelog_reader.append(entry);
+
+    EXPECT_EQ(changelog_reader.size(), 36);
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_36_40.bin"));
+}
+
+
+TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
+{
+    ChangelogDirTest test("./logs");
+
+    DB::NuKeeperLogStore changelog("./logs", 5, true);
+    changelog.init(1);
+
+    for (size_t i = 0; i < 35; ++i)
+    {
+        auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
+        changelog.append(entry);
+    }
+    EXPECT_EQ(changelog.size(), 35);
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
+
+    DB::WriteBufferFromFile plain_buf("./logs/changelog_11_15.bin", DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
+    plain_buf.truncate(0);
+
+    DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
+    changelog_reader.init(1);
+
+    EXPECT_EQ(changelog_reader.size(), 10);
+    EXPECT_EQ(changelog_reader.last_entry()->get_term(), 90);
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
+
+    EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
+
+    auto entry = getLogEntry("h", 7777);
+    changelog_reader.append(entry);
+    EXPECT_EQ(changelog_reader.size(), 11);
+    EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777);
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
+
+    EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
+
+    DB::NuKeeperLogStore changelog_reader2("./logs", 5, true);
+    changelog_reader2.init(1);
+    EXPECT_EQ(changelog_reader2.size(), 11);
+    EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777);
+}
+
+TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
+{
+    ChangelogDirTest test("./logs");
+
+    DB::NuKeeperLogStore changelog("./logs", 20, true);
+    changelog.init(1);
+
+    for (size_t i = 0; i < 35; ++i)
+    {
+        auto entry = getLogEntry(std::to_string(i) + "_hello_world", (i + 44) * 10);
+        changelog.append(entry);
+    }
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin"));
+
+    DB::WriteBufferFromFile plain_buf("./logs/changelog_1_20.bin", DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
+    plain_buf.truncate(140);
+
+    DB::NuKeeperLogStore changelog_reader("./logs", 20, true);
+    changelog_reader.init(1);
+
+    EXPECT_EQ(changelog_reader.size(), 2);
+    EXPECT_EQ(changelog_reader.last_entry()->get_term(), 450);
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
+    EXPECT_FALSE(fs::exists("./logs/changelog_21_40.bin"));
+    auto entry = getLogEntry("hello_world", 7777);
+    changelog_reader.append(entry);
+    EXPECT_EQ(changelog_reader.size(), 3);
+    EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777);
+
+
+    DB::NuKeeperLogStore changelog_reader2("./logs", 20, true);
+    changelog_reader2.init(1);
+    EXPECT_EQ(changelog_reader2.size(), 3);
+    EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777);
+}
+
+TEST(CoordinationTest, ChangelogTestLostFiles)
+{
+    ChangelogDirTest test("./logs");
+
+    DB::NuKeeperLogStore changelog("./logs", 20, true);
+    changelog.init(1);
+
+    for (size_t i = 0; i < 35; ++i)
+    {
+        auto entry = getLogEntry(std::to_string(i) + "_hello_world", (i + 44) * 10);
+        changelog.append(entry);
+    }
+
+    EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
+    EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin"));
+
+    fs::remove("./logs/changelog_1_20.bin");
+
+    DB::NuKeeperLogStore changelog_reader("./logs", 20, true);
+    EXPECT_THROW(changelog_reader.init(5), DB::Exception);
+
+    fs::remove("./logs/changelog_21_40.bin");
+    EXPECT_THROW(changelog_reader.init(3), DB::Exception);
+}
+
+int main(int argc, char ** argv)
+{
+    Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
+    Poco::Logger::root().setChannel(channel);
+    Poco::Logger::root().setLevel("trace");
+    testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
+
 #endif
diff --git a/src/Core/Defines.h b/src/Core/Defines.h
index ff033aa6183..8fd8e0d6bdf 100644
--- a/src/Core/Defines.h
+++ b/src/Core/Defines.h
@@ -11,6 +11,9 @@
 #define DBMS_DEFAULT_CONNECT_TIMEOUT_WITH_FAILOVER_SECURE_MS 100
 #define DBMS_DEFAULT_SEND_TIMEOUT_SEC 300
 #define DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC 300
+/// Timeouts for hedged requests.
+#define DBMS_DEFAULT_HEDGED_CONNECTION_TIMEOUT_MS 100
+#define DBMS_DEFAULT_RECEIVE_DATA_TIMEOUT_SEC 2
 /// Timeout for synchronous request-result protocol call (like Ping or TablesStatus).
 #define DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC 5
 #define DBMS_DEFAULT_POLL_INTERVAL 10
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 1de89aa6047..8afc08da21a 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -55,6 +55,10 @@ class IColumn;
     M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "", 0) \
     M(Seconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, "", 0) \
     M(Seconds, tcp_keep_alive_timeout, 0, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \
+    M(Milliseconds, hedged_connection_timeout, DBMS_DEFAULT_HEDGED_CONNECTION_TIMEOUT_MS, "Connection timeout for establishing connection with replica for Hedged requests", 0) \
+    M(Seconds, receive_data_timeout, DBMS_DEFAULT_RECEIVE_DATA_TIMEOUT_SEC, "Connection timeout for receiving first packet of data or packet with positive progress from replica", 0) \
+    M(Bool, use_hedged_requests, true, "Use hedged requests for distributed queries", 0) \
+    M(Bool, allow_changing_replica_until_first_data_packet, false, "Allow HedgedConnections to change replica until receiving first data packet", 0) \
     M(Milliseconds, queue_max_wait_ms, 0, "The wait time in the request queue, if the number of concurrent requests exceeds the maximum.", 0) \
     M(Milliseconds, connection_pool_max_wait_ms, 0, "The wait time when the connection pool is full.", 0) \
     M(Milliseconds, replace_running_query_max_wait_ms, 5000, "The wait time for running query with the same query_id to finish when setting 'replace_running_query' is active.", 0) \
@@ -215,6 +219,10 @@ class IColumn;
     M(Milliseconds, stream_flush_interval_ms, 7500, "Timeout for flushing data from streaming storages.", 0) \
     M(Milliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.", 0) \
     \
+    /** Settings for testing hedged requests */ \
+    M(Int64, sleep_in_send_tables_status, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
+    M(Int64, sleep_in_send_data, 0, "Time to sleep in sending data in TCPHandler", 0) \
+    \
     M(Bool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
     M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \
     M(Seconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP send timeout", 0) \
@@ -437,6 +445,7 @@ class IColumn;
     M(UnionMode, union_default_mode, UnionMode::Unspecified, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0) \
     M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
     M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \
+    M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \
 
 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below.
diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp
index a967ee28502..3b0b4db72f9 100644
--- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp
+++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp
@@ -1,12 +1,15 @@
-#include <DataStreams/CheckConstraintsBlockOutputStream.h>
-#include <Parsers/formatAST.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Columns/ColumnsCommon.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnConst.h>
 #include <Common/assert_cast.h>
 #include <Common/quoteString.h>
 #include <Common/FieldVisitors.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <Columns/ColumnsCommon.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnNullable.h>
+#include <DataStreams/CheckConstraintsBlockOutputStream.h>
+#include <Parsers/formatAST.h>
+#include <Interpreters/ExpressionActions.h>
 
 
 namespace DB
@@ -15,7 +18,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int VIOLATED_CONSTRAINT;
-    extern const int LOGICAL_ERROR;
+    extern const int UNSUPPORTED_METHOD;
 }
 
 
@@ -48,62 +51,75 @@ void CheckConstraintsBlockOutputStream::write(const Block & block)
 
             ColumnWithTypeAndName res_column = block_to_calculate.getByName(constraint_ptr->expr->getColumnName());
 
-            if (!isUInt8(res_column.type))
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Constraint {} does not return a value of type UInt8",
+            auto result_type = removeNullable(removeLowCardinality(res_column.type));
+
+            if (!isUInt8(result_type))
+                throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Constraint {} does not return a value of type UInt8",
                     backQuote(constraint_ptr->name));
 
-            if (const ColumnConst * res_const = typeid_cast<const ColumnConst *>(res_column.column.get()))
-            {
-                UInt8 value = res_const->getValue<UInt64>();
+            auto result_column = res_column.column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality();
 
-                /// Is violated.
-                if (!value)
-                {
-                    throw Exception(ErrorCodes::VIOLATED_CONSTRAINT,
-                                    "Constraint {} for table {} is violated, because it is a constant expression returning 0. "
-                                    "It is most likely an error in table definition.",
-                                    backQuote(constraint_ptr->name), table_id.getNameForLogs());
-                }
+            if (const auto * column_nullable = checkAndGetColumn<ColumnNullable>(*result_column))
+            {
+                const auto & nested_column = column_nullable->getNestedColumnPtr();
+
+                /// Check if constraint value is nullable
+                const auto & null_map = column_nullable->getNullMapColumn();
+                const PaddedPODArray<UInt8> & data = null_map.getData();
+                bool null_map_contains_null = !memoryIsZero(data.raw_data(), data.size() * sizeof(UInt8));
+
+                if (null_map_contains_null)
+                    throw Exception(
+                        ErrorCodes::VIOLATED_CONSTRAINT,
+                        "Constraint {} for table {} is violated. Expression: ({})."\
+                        "Constraint expression returns nullable column that contains null value",
+                        backQuote(constraint_ptr->name),
+                        table_id.getNameForLogs(),
+                        serializeAST(*(constraint_ptr->expr), true));
+
+                result_column = nested_column;
             }
-            else
+
+            const ColumnUInt8 & res_column_uint8 = assert_cast<const ColumnUInt8 &>(*result_column);
+
+            const UInt8 * data = res_column_uint8.getData().data();
+            size_t size = res_column_uint8.size();
+
+            /// Is violated.
+            if (!memoryIsByte(data, size, 1))
             {
-                const ColumnUInt8 & res_column_uint8 = assert_cast<const ColumnUInt8 &>(*res_column.column);
+                size_t row_idx = 0;
+                for (; row_idx < size; ++row_idx)
+                    if (data[row_idx] != 1)
+                        break;
 
-                const UInt8 * data = res_column_uint8.getData().data();
-                size_t size = res_column_uint8.size();
+                Names related_columns = constraint_expr->getRequiredColumns();
 
-                /// Is violated.
-                if (!memoryIsByte(data, size, 1))
+                bool first = true;
+                String column_values_msg;
+                constexpr size_t approx_bytes_for_col = 32;
+                column_values_msg.reserve(approx_bytes_for_col * related_columns.size());
+                for (const auto & name : related_columns)
                 {
-                    size_t row_idx = 0;
-                    for (; row_idx < size; ++row_idx)
-                        if (data[row_idx] != 1)
-                            break;
+                    const IColumn & column = *block.getByName(name).column;
+                    assert(row_idx < column.size());
 
-                    Names related_columns = constraint_expr->getRequiredColumns();
-
-                    bool first = true;
-                    String column_values_msg;
-                    constexpr size_t approx_bytes_for_col = 32;
-                    column_values_msg.reserve(approx_bytes_for_col * related_columns.size());
-                    for (const auto & name : related_columns)
-                    {
-                        const IColumn & column = *block.getByName(name).column;
-                        assert(row_idx < column.size());
-
-                        if (!first)
-                            column_values_msg.append(", ");
-                        column_values_msg.append(backQuoteIfNeed(name));
-                        column_values_msg.append(" = ");
-                        column_values_msg.append(applyVisitor(FieldVisitorToString(), column[row_idx]));
-                        first = false;
-                    }
-
-                    throw Exception(ErrorCodes::VIOLATED_CONSTRAINT,
-                                    "Constraint {} for table {} is violated at row {}. Expression: ({}). Column values: {}",
-                                    backQuote(constraint_ptr->name), table_id.getNameForLogs(), rows_written + row_idx + 1,
-                                    serializeAST(*(constraint_ptr->expr), true), column_values_msg);
+                    if (!first)
+                        column_values_msg.append(", ");
+                    column_values_msg.append(backQuoteIfNeed(name));
+                    column_values_msg.append(" = ");
+                    column_values_msg.append(applyVisitor(FieldVisitorToString(), column[row_idx]));
+                    first = false;
                 }
+
+                throw Exception(
+                    ErrorCodes::VIOLATED_CONSTRAINT,
+                    "Constraint {} for table {} is violated at row {}. Expression: ({}). Column values: {}",
+                    backQuote(constraint_ptr->name),
+                    table_id.getNameForLogs(),
+                    rows_written + row_idx + 1,
+                    serializeAST(*(constraint_ptr->expr), true),
+                    column_values_msg);
             }
         }
     }
diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp
index fc3870b3f22..9cac638ceb5 100644
--- a/src/DataStreams/RemoteQueryExecutor.cpp
+++ b/src/DataStreams/RemoteQueryExecutor.cpp
@@ -13,6 +13,8 @@
 #include <Interpreters/InternalTextLogsQueue.h>
 #include <IO/ConnectionTimeoutsContext.h>
 #include <Common/FiberStack.h>
+#include <Client/MultiplexedConnections.h>
+#include <Client/HedgedConnections.h>
 #include <Storages/MergeTree/MergeTreeDataPartUUID.h>
 
 namespace DB
@@ -31,23 +33,23 @@ RemoteQueryExecutor::RemoteQueryExecutor(
     : header(header_), query(query_), context(context_)
     , scalars(scalars_), external_tables(external_tables_), stage(stage_)
 {
-    create_multiplexed_connections = [this, &connection, throttler]()
+    create_connections = [this, &connection, throttler]()
     {
         return std::make_unique<MultiplexedConnections>(connection, context.getSettingsRef(), throttler);
     };
 }
 
 RemoteQueryExecutor::RemoteQueryExecutor(
-    std::vector<IConnectionPool::Entry> && connections,
+    std::vector<IConnectionPool::Entry> && connections_,
     const String & query_, const Block & header_, const Context & context_,
     const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_)
     : header(header_), query(query_), context(context_)
     , scalars(scalars_), external_tables(external_tables_), stage(stage_)
 {
-    create_multiplexed_connections = [this, connections, throttler]() mutable
+    create_connections = [this, connections_, throttler]() mutable
     {
         return std::make_unique<MultiplexedConnections>(
-                std::move(connections), context.getSettingsRef(), throttler);
+                std::move(connections_), context.getSettingsRef(), throttler);
     };
 }
 
@@ -58,23 +60,34 @@ RemoteQueryExecutor::RemoteQueryExecutor(
     : header(header_), query(query_), context(context_)
     , scalars(scalars_), external_tables(external_tables_), stage(stage_)
 {
-    create_multiplexed_connections = [this, pool, throttler]()
+    create_connections = [this, pool, throttler]()->std::unique_ptr<IConnections>
     {
         const Settings & current_settings = context.getSettingsRef();
         auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
-        std::vector<IConnectionPool::Entry> connections;
+
+#if defined(OS_LINUX)
+        if (current_settings.use_hedged_requests)
+        {
+            std::shared_ptr<QualifiedTableName> table_to_check = nullptr;
+            if (main_table)
+                table_to_check = std::make_shared<QualifiedTableName>(main_table.getQualifiedName());
+
+            return std::make_unique<HedgedConnections>(pool, current_settings, timeouts, throttler, pool_mode, table_to_check);
+        }
+#endif
+
+        std::vector<IConnectionPool::Entry> connection_entries;
         if (main_table)
         {
             auto try_results = pool->getManyChecked(timeouts, &current_settings, pool_mode, main_table.getQualifiedName());
-            connections.reserve(try_results.size());
+            connection_entries.reserve(try_results.size());
             for (auto & try_result : try_results)
-                connections.emplace_back(std::move(try_result.entry));
+                connection_entries.emplace_back(std::move(try_result.entry));
         }
         else
-            connections = pool->getMany(timeouts, &current_settings, pool_mode);
+            connection_entries = pool->getMany(timeouts, &current_settings, pool_mode);
 
-        return std::make_unique<MultiplexedConnections>(
-                std::move(connections), current_settings, throttler);
+        return std::make_unique<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler);
     };
 }
 
@@ -85,7 +98,7 @@ RemoteQueryExecutor::~RemoteQueryExecutor()
       * these connections did not remain hanging in the out-of-sync state.
       */
     if (established || isQueryPending())
-        multiplexed_connections->disconnect();
+        connections->disconnect();
 }
 
 /** If we receive a block with slightly different column types, or with excessive columns,
@@ -142,10 +155,10 @@ void RemoteQueryExecutor::sendQuery()
     if (sent_query)
         return;
 
-    multiplexed_connections = create_multiplexed_connections();
+    connections = create_connections();
 
     const auto & settings = context.getSettingsRef();
-    if (settings.skip_unavailable_shards && 0 == multiplexed_connections->size())
+    if (settings.skip_unavailable_shards && 0 == connections->size())
         return;
 
     /// Query cannot be canceled in the middle of the send query,
@@ -173,10 +186,10 @@ void RemoteQueryExecutor::sendQuery()
     {
         std::lock_guard lock(duplicated_part_uuids_mutex);
         if (!duplicated_part_uuids.empty())
-            multiplexed_connections->sendIgnoredPartUUIDs(duplicated_part_uuids);
+            connections->sendIgnoredPartUUIDs(duplicated_part_uuids);
     }
 
-    multiplexed_connections->sendQuery(timeouts, query, query_id, stage, modified_client_info, true);
+    connections->sendQuery(timeouts, query, query_id, stage, modified_client_info, true);
 
     established = false;
     sent_query = true;
@@ -192,7 +205,7 @@ Block RemoteQueryExecutor::read()
     {
         sendQuery();
 
-        if (context.getSettingsRef().skip_unavailable_shards && (0 == multiplexed_connections->size()))
+        if (context.getSettingsRef().skip_unavailable_shards && (0 == connections->size()))
             return {};
     }
 
@@ -201,7 +214,7 @@ Block RemoteQueryExecutor::read()
         if (was_cancelled)
             return Block();
 
-        Packet packet = multiplexed_connections->receivePacket();
+        Packet packet = connections->receivePacket();
 
         if (auto block = processPacket(std::move(packet)))
             return *block;
@@ -218,7 +231,7 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
     {
         sendQuery();
 
-        if (context.getSettingsRef().skip_unavailable_shards && (0 == multiplexed_connections->size()))
+        if (context.getSettingsRef().skip_unavailable_shards && (0 == connections->size()))
             return Block();
     }
 
@@ -228,7 +241,7 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
         if (was_cancelled)
             return Block();
 
-        read_context = std::make_unique<ReadContext>(*multiplexed_connections);
+        read_context = std::make_unique<ReadContext>(*connections);
     }
 
     do
@@ -239,7 +252,7 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
         if (read_context->is_read_in_progress.load(std::memory_order_relaxed))
         {
             read_context->setTimer();
-            return read_context->epoll_fd;
+            return read_context->epoll.getFileDescriptor();
         }
         else
         {
@@ -260,7 +273,7 @@ std::variant<Block, int> RemoteQueryExecutor::restartQueryWithoutDuplicatedUUIDs
 {
     /// Cancel previous query and disconnect before retry.
     cancel(read_context);
-    multiplexed_connections->disconnect();
+    connections->disconnect();
 
     /// Only resend once, otherwise throw an exception
     if (!resent_query)
@@ -300,7 +313,7 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
             break;
 
         case Protocol::Server::EndOfStream:
-            if (!multiplexed_connections->hasActiveConnections())
+            if (!connections->hasActiveConnections())
             {
                 finished = true;
                 return Block();
@@ -342,7 +355,7 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
             got_unknown_packet_from_replica = true;
             throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from one of the following replicas: {}",
                 toString(packet.type),
-                multiplexed_connections->dumpAddresses());
+                connections->dumpAddresses());
     }
 
     return {};
@@ -382,7 +395,7 @@ void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
     tryCancel("Cancelling query because enough data has been read", read_context);
 
     /// Get the remaining packets so that there is no out of sync in the connections to the replicas.
-    Packet packet = multiplexed_connections->drain();
+    Packet packet = connections->drain();
     switch (packet.type)
     {
         case Protocol::Server::EndOfStream:
@@ -404,7 +417,7 @@ void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
             got_unknown_packet_from_replica = true;
             throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from one of the following replicas: {}",
                 toString(packet.type),
-                multiplexed_connections->dumpAddresses());
+                connections->dumpAddresses());
     }
 }
 
@@ -427,14 +440,14 @@ void RemoteQueryExecutor::cancel(std::unique_ptr<ReadContext> * read_context)
 
 void RemoteQueryExecutor::sendScalars()
 {
-    multiplexed_connections->sendScalarsData(scalars);
+    connections->sendScalarsData(scalars);
 }
 
 void RemoteQueryExecutor::sendExternalTables()
 {
     SelectQueryInfo query_info;
 
-    size_t count = multiplexed_connections->size();
+    size_t count = connections->size();
 
     {
         std::lock_guard lock(external_tables_mutex);
@@ -472,7 +485,7 @@ void RemoteQueryExecutor::sendExternalTables()
         }
     }
 
-    multiplexed_connections->sendExternalTablesData(external_tables_data);
+    connections->sendExternalTablesData(external_tables_data);
 }
 
 void RemoteQueryExecutor::tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context)
@@ -489,11 +502,11 @@ void RemoteQueryExecutor::tryCancel(const char * reason, std::unique_ptr<ReadCon
         if (read_context && *read_context)
             (*read_context)->cancel();
 
-        multiplexed_connections->sendCancel();
+        connections->sendCancel();
     }
 
     if (log)
-        LOG_TRACE(log, "({}) {}", multiplexed_connections->dumpAddresses(), reason);
+        LOG_TRACE(log, "({}) {}", connections->dumpAddresses(), reason);
 }
 
 bool RemoteQueryExecutor::isQueryPending() const
diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/DataStreams/RemoteQueryExecutor.h
index 6a10627b948..f55a023231b 100644
--- a/src/DataStreams/RemoteQueryExecutor.h
+++ b/src/DataStreams/RemoteQueryExecutor.h
@@ -1,7 +1,8 @@
 #pragma once
 
 #include <Client/ConnectionPool.h>
-#include <Client/MultiplexedConnections.h>
+#include <Client/IConnections.h>
+#include <Client/ConnectionPoolWithFailover.h>
 #include <Storages/IStorage_fwd.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/StorageID.h>
@@ -40,7 +41,7 @@ public:
 
     /// Accepts several connections already taken from pool.
     RemoteQueryExecutor(
-        std::vector<IConnectionPool::Entry> && connections,
+        std::vector<IConnectionPool::Entry> && connections_,
         const String & query_, const Block & header_, const Context & context_,
         const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
         QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
@@ -103,8 +104,8 @@ private:
     Block totals;
     Block extremes;
 
-    std::function<std::unique_ptr<MultiplexedConnections>()> create_multiplexed_connections;
-    std::unique_ptr<MultiplexedConnections> multiplexed_connections;
+    std::function<std::unique_ptr<IConnections>()> create_connections;
+    std::unique_ptr<IConnections> connections;
 
     const String query;
     String query_id = "";
diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.cpp b/src/DataStreams/RemoteQueryExecutorReadContext.cpp
index 3cc24ad5056..c2a65f02d08 100644
--- a/src/DataStreams/RemoteQueryExecutorReadContext.cpp
+++ b/src/DataStreams/RemoteQueryExecutorReadContext.cpp
@@ -3,7 +3,7 @@
 #include <DataStreams/RemoteQueryExecutorReadContext.h>
 #include <Common/Exception.h>
 #include <Common/NetException.h>
-#include <Client/MultiplexedConnections.h>
+#include <Client/IConnections.h>
 #include <sys/epoll.h>
 
 namespace DB
@@ -11,7 +11,7 @@ namespace DB
 
 struct RemoteQueryExecutorRoutine
 {
-    MultiplexedConnections & connections;
+    IConnections & connections;
     RemoteQueryExecutorReadContext & read_context;
 
     struct ReadCallback
@@ -19,15 +19,15 @@ struct RemoteQueryExecutorRoutine
         RemoteQueryExecutorReadContext & read_context;
         Fiber & fiber;
 
-        void operator()(Poco::Net::Socket & socket)
+        void operator()(int fd, const Poco::Timespan & timeout = 0, const std::string fd_description = "")
         {
             try
             {
-                read_context.setSocket(socket);
+                read_context.setConnectionFD(fd, timeout, fd_description);
             }
             catch (DB::Exception & e)
             {
-                e.addMessage(" while reading from socket ({})", socket.peerAddress().toString());
+                e.addMessage(" while reading from {}", fd_description);
                 throw;
             }
 
@@ -70,60 +70,38 @@ namespace ErrorCodes
     extern const int SOCKET_TIMEOUT;
 }
 
-RemoteQueryExecutorReadContext::RemoteQueryExecutorReadContext(MultiplexedConnections & connections_)
+RemoteQueryExecutorReadContext::RemoteQueryExecutorReadContext(IConnections & connections_)
     : connections(connections_)
 {
-    epoll_fd = epoll_create(2);
-    if (-1 == epoll_fd)
-        throwFromErrno("Cannot create epoll descriptor", ErrorCodes::CANNOT_OPEN_FILE);
 
     if (-1 == pipe2(pipe_fd, O_NONBLOCK))
         throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_OPEN_FILE);
 
     {
-        epoll_event socket_event;
-        socket_event.events = EPOLLIN | EPOLLPRI;
-        socket_event.data.fd = pipe_fd[0];
-
-        if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, pipe_fd[0], &socket_event))
-            throwFromErrno("Cannot add pipe descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+        epoll.add(pipe_fd[0]);
     }
 
     {
-        epoll_event timer_event;
-        timer_event.events = EPOLLIN | EPOLLPRI;
-        timer_event.data.fd = timer.getDescriptor();
-
-        if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, timer_event.data.fd, &timer_event))
-            throwFromErrno("Cannot add timer descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+        epoll.add(timer.getDescriptor());
     }
 
     auto routine = RemoteQueryExecutorRoutine{connections, *this};
     fiber = boost::context::fiber(std::allocator_arg_t(), stack, std::move(routine));
 }
 
-void RemoteQueryExecutorReadContext::setSocket(Poco::Net::Socket & socket)
+void RemoteQueryExecutorReadContext::setConnectionFD(int fd, const Poco::Timespan & timeout, const std::string & fd_description)
 {
-    int fd = socket.impl()->sockfd();
-    if (fd == socket_fd)
+    if (fd == connection_fd)
         return;
 
-    epoll_event socket_event;
-    socket_event.events = EPOLLIN | EPOLLPRI;
-    socket_event.data.fd = fd;
+    if (connection_fd != -1)
+        epoll.remove(connection_fd);
 
-    if (socket_fd != -1)
-    {
-        if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_DEL, socket_fd, &socket_event))
-            throwFromErrno("Cannot remove socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
-    }
+    connection_fd = fd;
+    epoll.add(connection_fd);
 
-    socket_fd = fd;
-
-    if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, socket_fd, &socket_event))
-        throwFromErrno("Cannot add socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
-
-    receive_timeout = socket.impl()->getReceiveTimeout();
+    receive_timeout = timeout;
+    connection_fd_description = fd_description;
 }
 
 bool RemoteQueryExecutorReadContext::checkTimeout() const
@@ -142,17 +120,11 @@ bool RemoteQueryExecutorReadContext::checkTimeout() const
 
 bool RemoteQueryExecutorReadContext::checkTimeoutImpl() const
 {
+    /// Wait for epoll will not block if it was polled externally.
     epoll_event events[3];
     events[0].data.fd = events[1].data.fd = events[2].data.fd = -1;
 
-    /// Wait for epoll_fd will not block if it was polled externally.
-    int num_events = 0;
-    while (num_events <= 0)
-    {
-        num_events = epoll_wait(epoll_fd, events, 3, -1);
-        if (num_events == -1 && errno != EINTR)
-            throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
-    }
+    int num_events = epoll.getManyReady(3, events,/* blocking = */ false);
 
     bool is_socket_ready = false;
     bool is_pipe_alarmed = false;
@@ -160,7 +132,7 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl() const
 
     for (int i = 0; i < num_events; ++i)
     {
-        if (events[i].data.fd == socket_fd)
+        if (events[i].data.fd == connection_fd)
             is_socket_ready = true;
         if (events[i].data.fd == timer.getDescriptor())
             has_timer_alarm = true;
@@ -229,9 +201,7 @@ void RemoteQueryExecutorReadContext::cancel()
 
 RemoteQueryExecutorReadContext::~RemoteQueryExecutorReadContext()
 {
-    /// socket_fd is closed by Poco::Net::Socket
-    if (epoll_fd != -1)
-        close(epoll_fd);
+    /// connection_fd is closed by Poco::Net::Socket or Epoll
     if (pipe_fd[0] != -1)
         close(pipe_fd[0]);
     if (pipe_fd[1] != -1)
diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.h b/src/DataStreams/RemoteQueryExecutorReadContext.h
index 89dc2813a9a..cb6421f78d0 100644
--- a/src/DataStreams/RemoteQueryExecutorReadContext.h
+++ b/src/DataStreams/RemoteQueryExecutorReadContext.h
@@ -7,7 +7,9 @@
 #include <Common/Fiber.h>
 #include <Common/FiberStack.h>
 #include <Common/TimerDescriptor.h>
+#include <Common/Epoll.h>
 #include <Client/Connection.h>
+#include <Client/IConnections.h>
 #include <Poco/Timespan.h>
 
 namespace Poco::Net
@@ -33,26 +35,29 @@ public:
     std::mutex fiber_lock;
 
     Poco::Timespan receive_timeout;
-    MultiplexedConnections & connections;
+    IConnections & connections;
     Poco::Net::Socket * last_used_socket = nullptr;
 
     /// Here we have three descriptors we are going to wait:
-    /// * socket_fd is a descriptor of connection. It may be changed in case of reading from several replicas.
+    /// * connection_fd is a descriptor of connection. It may be changed in case of reading from several replicas.
     /// * timer is a timerfd descriptor to manually check socket timeout
     /// * pipe_fd is a pipe we use to cancel query and socket polling by executor.
-    /// We put those descriptors into our own epoll_fd which is used by external executor.
+    /// We put those descriptors into our own epoll which is used by external executor.
     TimerDescriptor timer{CLOCK_MONOTONIC, 0};
-    int socket_fd = -1;
-    int epoll_fd = -1;
+    int connection_fd = -1;
     int pipe_fd[2] = { -1, -1 };
 
-    explicit RemoteQueryExecutorReadContext(MultiplexedConnections & connections_);
+    Epoll epoll;
+
+    std::string connection_fd_description;
+
+    explicit RemoteQueryExecutorReadContext(IConnections & connections_);
     ~RemoteQueryExecutorReadContext();
 
     bool checkTimeout() const;
     bool checkTimeoutImpl() const;
 
-    void setSocket(Poco::Net::Socket & socket);
+    void setConnectionFD(int fd, const Poco::Timespan & timeout = 0, const std::string & fd_description = "");
     void setTimer() const;
 
     bool resumeRoutine();
diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp
index dc3ce039dbd..eee3f22f9f1 100644
--- a/src/DataTypes/DataTypeFactory.cpp
+++ b/src/DataTypes/DataTypeFactory.cpp
@@ -84,16 +84,7 @@ DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr
         return get("LowCardinality", low_cardinality_params);
     }
 
-    DataTypePtr res = findCreatorByName(family_name)(parameters);
-
-    if (CurrentThread::isInitialized())
-    {
-        const auto * query_context = CurrentThread::get().getQueryContext();
-        if (query_context && query_context->getSettingsRef().log_queries)
-            query_context->addQueryFactoriesInfo(Context::QueryLogFactories::DataType, family_name);
-    }
-
-    return res;
+    return findCreatorByName(family_name)(parameters);
 }
 
 DataTypePtr DataTypeFactory::getCustom(DataTypeCustomDescPtr customization) const
@@ -165,10 +156,18 @@ void DataTypeFactory::registerSimpleDataTypeCustom(const String &name, SimpleCre
 
 const DataTypeFactory::Value & DataTypeFactory::findCreatorByName(const String & family_name) const
 {
+    const Context * query_context = nullptr;
+    if (CurrentThread::isInitialized())
+        query_context = CurrentThread::get().getQueryContext();
+
     {
         DataTypesDictionary::const_iterator it = data_types.find(family_name);
         if (data_types.end() != it)
+        {
+            if (query_context && query_context->getSettingsRef().log_queries)
+                query_context->addQueryFactoriesInfo(Context::QueryLogFactories::DataType, family_name);
             return it->second;
+        }
     }
 
     String family_name_lowercase = Poco::toLower(family_name);
@@ -176,7 +175,11 @@ const DataTypeFactory::Value & DataTypeFactory::findCreatorByName(const String &
     {
         DataTypesDictionary::const_iterator it = case_insensitive_data_types.find(family_name_lowercase);
         if (case_insensitive_data_types.end() != it)
+        {
+            if (query_context && query_context->getSettingsRef().log_queries)
+                query_context->addQueryFactoriesInfo(Context::QueryLogFactories::DataType, family_name_lowercase);
             return it->second;
+        }
     }
 
     auto hints = this->getHints(family_name);
diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp
index 21cfe855169..87e989d1dd2 100644
--- a/src/DataTypes/DataTypeFixedString.cpp
+++ b/src/DataTypes/DataTypeFixedString.cpp
@@ -25,6 +25,7 @@ namespace ErrorCodes
     extern const int CANNOT_READ_ALL_DATA;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int UNEXPECTED_AST_STRUCTURE;
+    extern const int TOO_LARGE_STRING_SIZE;
 }
 
 
@@ -120,13 +121,21 @@ void DataTypeFixedString::serializeTextEscaped(const IColumn & column, size_t ro
 }
 
 
-static inline void alignStringLength(const DataTypeFixedString & type,
-                                     ColumnFixedString::Chars & data,
-                                     size_t string_start)
+void DataTypeFixedString::alignStringLength(PaddedPODArray<UInt8> & chars, size_t old_size) const
 {
-    ColumnFixedString::alignStringLength(data, type.getN(), string_start);
+    size_t length = chars.size() - old_size;
+    if (length < n)
+    {
+        chars.resize_fill(old_size + n);
+    }
+    else if (length > n)
+    {
+        chars.resize_assume_reserved(old_size);
+        throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
+    }
 }
 
+
 template <typename Reader>
 static inline void read(const DataTypeFixedString & self, IColumn & column, Reader && reader)
 {
@@ -135,7 +144,7 @@ static inline void read(const DataTypeFixedString & self, IColumn & column, Read
     try
     {
         reader(data);
-        alignStringLength(self, data, prev_size);
+        self.alignStringLength(data, prev_size);
     }
     catch (...)
     {
diff --git a/src/DataTypes/DataTypeFixedString.h b/src/DataTypes/DataTypeFixedString.h
index af82e4b5d11..5c80a0e346a 100644
--- a/src/DataTypes/DataTypeFixedString.h
+++ b/src/DataTypes/DataTypeFixedString.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <DataTypes/IDataType.h>
+#include <Common/PODArray_fwd.h>
 
 #define MAX_FIXEDSTRING_SIZE 0xFFFFFF
 
@@ -82,6 +83,11 @@ public:
     bool isCategorial() const override { return true; }
     bool canBeInsideNullable() const override { return true; }
     bool canBeInsideLowCardinality() const override { return true; }
+
+    /// Makes sure that the length of a newly inserted string to `chars` is equal to getN().
+    /// If the length is less than getN() the function will add zero characters up to getN().
+    /// If the length is greater than getN() the function will throw an exception.
+    void alignStringLength(PaddedPODArray<UInt8> & chars, size_t old_size) const;
 };
 
 }
diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp
index 2261e776ea2..5d2050c09e9 100644
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@@ -138,7 +138,7 @@ void DataTypeTuple::serializeBinary(const IColumn & column, size_t row_num, Writ
         idx_elem.second->serializeBinary(extractElementColumn(column, idx_elem.first), row_num, ostr);
 }
 
-
+/// Function must atomically insert values into tuple column
 template <typename F>
 static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
 {
@@ -151,7 +151,8 @@ static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
 
         // Check that all columns now have the same size.
         size_t new_size = column.size();
-        for (auto i : ext::range(1, ext::size(elems)))
+
+        for (auto i : ext::range(0, ext::size(elems)))
         {
             const auto & element_column = extractElementColumn(column, i);
             if (element_column.size() != new_size)
@@ -168,6 +169,7 @@ static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
         for (const auto & i : ext::range(0, ext::size(elems)))
         {
             auto & element_column = extractElementColumn(column, i);
+
             if (element_column.size() > old_size)
                 element_column.popBack(1);
         }
@@ -215,17 +217,18 @@ void DataTypeTuple::deserializeText(IColumn & column, ReadBuffer & istr, const F
             }
             elems[i]->deserializeAsTextQuoted(extractElementColumn(column, i), istr, settings);
         }
-    });
 
-    // Special format for one element tuple (1,)
-    if (1 == elems.size())
-    {
+        // Special format for one element tuple (1,)
+        if (1 == elems.size())
+        {
+            skipWhitespaceIfAny(istr);
+            // Allow both (1) and (1,)
+            checkChar(',', istr);
+        }
+
         skipWhitespaceIfAny(istr);
-        // Allow both (1) and (1,)
-        checkChar(',', istr);
-    }
-    skipWhitespaceIfAny(istr);
-    assertChar(')', istr);
+        assertChar(')', istr);
+    });
 }
 
 void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@@ -290,10 +293,10 @@ void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, con
                 auto & element_column = extractElementColumn(column, element_pos);
                 elems[element_pos]->deserializeAsTextJSON(element_column, istr, settings);
             }
-        });
 
-        skipWhitespaceIfAny(istr);
-        assertChar('}', istr);
+            skipWhitespaceIfAny(istr);
+            assertChar('}', istr);
+        });
     }
     else
     {
@@ -312,10 +315,10 @@ void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, con
                 }
                 elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings);
             }
-        });
 
-        skipWhitespaceIfAny(istr);
-        assertChar(']', istr);
+            skipWhitespaceIfAny(istr);
+            assertChar(']', istr);
+        });
     }
 }
 
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index e0c5717711c..ee5a3b5eed0 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -22,7 +22,7 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db
     /// We also need similar graph to load tables on server startup in order of topsort.
 }
 
-void DatabaseReplicatedDDLWorker::initializeMainThread()
+bool DatabaseReplicatedDDLWorker::initializeMainThread()
 {
     while (!stop_flag)
     {
@@ -33,7 +33,7 @@ void DatabaseReplicatedDDLWorker::initializeMainThread()
                 database->tryConnectToZooKeeperAndInitDatabase(false);
             initializeReplication();
             initialized = true;
-            return;
+            return true;
         }
         catch (...)
         {
@@ -41,6 +41,8 @@ void DatabaseReplicatedDDLWorker::initializeMainThread()
             sleepForSeconds(5);
         }
     }
+
+    return false;
 }
 
 void DatabaseReplicatedDDLWorker::shutdown()
@@ -61,7 +63,7 @@ void DatabaseReplicatedDDLWorker::initializeReplication()
     if (our_log_ptr == 0 || our_log_ptr + logs_to_keep < max_log_ptr)
         database->recoverLostReplica(current_zookeeper, our_log_ptr, max_log_ptr);
     else
-        last_skipped_entry_name.emplace(log_ptr_str);
+        last_skipped_entry_name.emplace(DDLTaskBase::getLogEntryName(our_log_ptr));
 }
 
 String DatabaseReplicatedDDLWorker::enqueueQuery(DDLLogEntry & entry)
diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h
index 6ba46a98bca..3a45817c755 100644
--- a/src/Databases/DatabaseReplicatedWorker.h
+++ b/src/Databases/DatabaseReplicatedWorker.h
@@ -30,7 +30,7 @@ public:
     void shutdown() override;
 
 private:
-    void initializeMainThread() override;
+    bool initializeMainThread() override;
     void initializeReplication();
 
     DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper) override;
diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index a6e5ded3efd..b6128f1bb18 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -54,19 +54,32 @@ static DataTypePtr convertPostgreSQLDataType(std::string & type, bool is_nullabl
         res = std::make_shared<DataTypeDate>();
     else if (type.starts_with("numeric"))
     {
-        /// Numeric and decimal will both end up here as numeric.
-        res = DataTypeFactory::instance().get(type);
-        uint32_t precision = getDecimalPrecision(*res);
-        uint32_t scale = getDecimalScale(*res);
+        /// Numeric and decimal will both end up here as numeric. If it has type and precision,
+        /// there will be Numeric(x, y), otherwise just Numeric
+        UInt32 precision, scale;
+        if (type.ends_with(")"))
+        {
+            res = DataTypeFactory::instance().get(type);
+            precision = getDecimalPrecision(*res);
+            scale = getDecimalScale(*res);
 
-        if (precision <= DecimalUtils::max_precision<Decimal32>)
-            res = std::make_shared<DataTypeDecimal<Decimal32>>(precision, scale);
-        else if (precision <= DecimalUtils::max_precision<Decimal64>)
-            res = std::make_shared<DataTypeDecimal<Decimal64>>(precision, scale);
-        else if (precision <= DecimalUtils::max_precision<Decimal128>)
+            if (precision <= DecimalUtils::max_precision<Decimal32>)
+                res = std::make_shared<DataTypeDecimal<Decimal32>>(precision, scale);
+            else if (precision <= DecimalUtils::max_precision<Decimal64>)
+                res = std::make_shared<DataTypeDecimal<Decimal64>>(precision, scale);
+            else if (precision <= DecimalUtils::max_precision<Decimal128>)
+                res = std::make_shared<DataTypeDecimal<Decimal128>>(precision, scale);
+            else if (precision <= DecimalUtils::max_precision<Decimal256>)
+                res = std::make_shared<DataTypeDecimal<Decimal256>>(precision, scale);
+            else
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Precision {} and scale {} are too big and not supported", precision, scale);
+        }
+        else
+        {
+            precision = DecimalUtils::max_precision<Decimal128>;
+            scale = precision / 2;
             res = std::make_shared<DataTypeDecimal<Decimal128>>(precision, scale);
-        else if (precision <= DecimalUtils::max_precision<Decimal256>)
-            res = std::make_shared<DataTypeDecimal<Decimal256>>(precision, scale);
+        }
     }
 
     if (!res)
diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp
index a21b1bd50fc..572080aee1e 100644
--- a/src/Dictionaries/MySQLDictionarySource.cpp
+++ b/src/Dictionaries/MySQLDictionarySource.cpp
@@ -47,11 +47,13 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory)
 #    include <common/logger_useful.h>
 #    include <Formats/MySQLBlockInputStream.h>
 #    include "readInvalidateQuery.h"
+#    include <mysqlxx/Exception.h>
 #    include <mysqlxx/PoolFactory.h>
 
 namespace DB
 {
 static const UInt64 max_block_size = 8192;
+static const size_t default_num_tries_on_connection_loss = 3;
 
 
 MySQLDictionarySource::MySQLDictionarySource(
@@ -72,7 +74,10 @@ MySQLDictionarySource::MySQLDictionarySource(
     , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks}
     , load_all_query{query_builder.composeLoadAllQuery()}
     , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}
-    , close_connection{config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false)}
+    , close_connection(
+            config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false))
+    , max_tries_for_mysql_block_input_stream(
+            config.getBool(config_prefix + ".fail_on_connection_loss", false) ? 1 : default_num_tries_on_connection_loss)
 {
 }
 
@@ -94,6 +99,7 @@ MySQLDictionarySource::MySQLDictionarySource(const MySQLDictionarySource & other
     , invalidate_query{other.invalidate_query}
     , invalidate_query_response{other.invalidate_query_response}
     , close_connection{other.close_connection}
+    , max_tries_for_mysql_block_input_stream{other.max_tries_for_mysql_block_input_stream}
 {
 }
 
@@ -114,13 +120,41 @@ std::string MySQLDictionarySource::getUpdateFieldAndDate()
     }
 }
 
+BlockInputStreamPtr MySQLDictionarySource::retriedCreateMySqlBIStream(const std::string & data_fetch_query_str, const size_t max_tries)
+{
+    size_t count_connection_lost = 0;
+
+    while (true)
+    {
+        auto connection = pool.get();
+
+        try
+        {
+            return std::make_shared<MySQLBlockInputStream>(
+                    connection, data_fetch_query_str, sample_block, max_block_size, close_connection);
+        }
+        catch (const mysqlxx::ConnectionLost & ecl)  /// There are two retriable failures: CR_SERVER_GONE_ERROR, CR_SERVER_LOST
+        {
+            if (++count_connection_lost < max_tries)
+            {
+                LOG_WARNING(log, ecl.displayText());
+                LOG_WARNING(log, "Lost connection ({}/{}). Trying to reconnect...", count_connection_lost, max_tries);
+                continue;
+            }
+
+            LOG_ERROR(log, "Failed ({}/{}) to create BlockInputStream for MySQL dictionary source.", count_connection_lost, max_tries);
+            throw;
+        }
+    }
+}
+
 BlockInputStreamPtr MySQLDictionarySource::loadAll()
 {
     auto connection = pool.get();
     last_modification = getLastModification(connection, false);
 
     LOG_TRACE(log, load_all_query);
-    return std::make_shared<MySQLBlockInputStream>(connection, load_all_query, sample_block, max_block_size, close_connection);
+    return retriedCreateMySqlBIStream(load_all_query, max_tries_for_mysql_block_input_stream);
 }
 
 BlockInputStreamPtr MySQLDictionarySource::loadUpdatedAll()
@@ -130,7 +164,7 @@ BlockInputStreamPtr MySQLDictionarySource::loadUpdatedAll()
 
     std::string load_update_query = getUpdateFieldAndDate();
     LOG_TRACE(log, load_update_query);
-    return std::make_shared<MySQLBlockInputStream>(connection, load_update_query, sample_block, max_block_size, close_connection);
+    return retriedCreateMySqlBIStream(load_update_query, max_tries_for_mysql_block_input_stream);
 }
 
 BlockInputStreamPtr MySQLDictionarySource::loadIds(const std::vector<UInt64> & ids)
@@ -138,7 +172,7 @@ BlockInputStreamPtr MySQLDictionarySource::loadIds(const std::vector<UInt64> & i
     /// We do not log in here and do not update the modification time, as the request can be large, and often called.
 
     const auto query = query_builder.composeLoadIdsQuery(ids);
-    return std::make_shared<MySQLBlockInputStream>(pool.get(), query, sample_block, max_block_size, close_connection);
+    return retriedCreateMySqlBIStream(query, max_tries_for_mysql_block_input_stream);
 }
 
 BlockInputStreamPtr MySQLDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
@@ -146,7 +180,7 @@ BlockInputStreamPtr MySQLDictionarySource::loadKeys(const Columns & key_columns,
     /// We do not log in here and do not update the modification time, as the request can be large, and often called.
 
     const auto query = query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::AND_OR_CHAIN);
-    return std::make_shared<MySQLBlockInputStream>(pool.get(), query, sample_block, max_block_size, close_connection);
+    return retriedCreateMySqlBIStream(query, max_tries_for_mysql_block_input_stream);
 }
 
 bool MySQLDictionarySource::isModified() const
diff --git a/src/Dictionaries/MySQLDictionarySource.h b/src/Dictionaries/MySQLDictionarySource.h
index 34f784cdfeb..54bc2da6a56 100644
--- a/src/Dictionaries/MySQLDictionarySource.h
+++ b/src/Dictionaries/MySQLDictionarySource.h
@@ -69,6 +69,9 @@ private:
     // execute invalidate_query. expects single cell in result
     std::string doInvalidateQuery(const std::string & request) const;
 
+    /// A helper method for recovering from "Lost connection to MySQL server during query" errors
+    BlockInputStreamPtr retriedCreateMySqlBIStream(const std::string & query_str, const size_t max_tries);
+
     Poco::Logger * log;
 
     std::chrono::time_point<std::chrono::system_clock> update_time;
@@ -86,6 +89,7 @@ private:
     std::string invalidate_query;
     mutable std::string invalidate_query_response;
     const bool close_connection;
+    const size_t max_tries_for_mysql_block_input_stream;
 };
 
 }
diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp
index 82149460773..2ab8e11a854 100644
--- a/src/Formats/ProtobufSerializer.cpp
+++ b/src/Formats/ProtobufSerializer.cpp
@@ -525,16 +525,16 @@ namespace
     {
     public:
         using ColumnType = std::conditional_t<is_fixed_string, ColumnFixedString, ColumnString>;
-        using StringDataType = std::conditional_t<is_fixed_string, DataTypeFixedString, DataTypeString>;
 
         ProtobufSerializerString(
-            const StringDataType & string_data_type_,
+            const std::shared_ptr<const DataTypeFixedString> & fixed_string_data_type_,
             const google::protobuf::FieldDescriptor & field_descriptor_,
             const ProtobufReaderOrWriter & reader_or_writer_)
             : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
+            , fixed_string_data_type(fixed_string_data_type_)
+            , n(fixed_string_data_type->getN())
         {
             static_assert(is_fixed_string, "This constructor for FixedString only");
-            n = string_data_type_.getN();
             setFunctions();
             prepareEnumMapping();
         }
@@ -583,11 +583,11 @@ namespace
             {
                 if (row_num < old_size)
                 {
-                    ColumnFixedString::alignStringLength(text_buffer, n, 0);
+                    fixed_string_data_type->alignStringLength(text_buffer, 0);
                     memcpy(data.data() + row_num * n, text_buffer.data(), n);
                 }
                 else
-                    ColumnFixedString::alignStringLength(data, n, old_data_size);
+                    fixed_string_data_type->alignStringLength(data, old_data_size);
             }
             else
             {
@@ -817,7 +817,7 @@ namespace
                 auto str = default_function();
                 arr.insert(str.data(), str.data() + str.size());
                 if constexpr (is_fixed_string)
-                    ColumnFixedString::alignStringLength(arr, n, 0);
+                    fixed_string_data_type->alignStringLength(arr, 0);
                 default_string = std::move(arr);
             }
             return *default_string;
@@ -865,7 +865,8 @@ namespace
             str.insert(name.data(), name.data() + name.length());
         }
 
-        size_t n = 0;
+        const std::shared_ptr<const DataTypeFixedString> fixed_string_data_type;
+        const size_t n = 0;
         std::function<void(const std::string_view &)> write_function;
         std::function<void(PaddedPODArray<UInt8> &)> read_function;
         std::function<String()> default_function;
@@ -2765,7 +2766,7 @@ namespace
                 case TypeIndex::DateTime: return std::make_unique<ProtobufSerializerDateTime>(field_descriptor, reader_or_writer);
                 case TypeIndex::DateTime64: return std::make_unique<ProtobufSerializerDateTime64>(assert_cast<const DataTypeDateTime64 &>(*data_type), field_descriptor, reader_or_writer);
                 case TypeIndex::String: return std::make_unique<ProtobufSerializerString<false>>(field_descriptor, reader_or_writer);
-                case TypeIndex::FixedString: return std::make_unique<ProtobufSerializerString<true>>(assert_cast<const DataTypeFixedString &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::FixedString: return std::make_unique<ProtobufSerializerString<true>>(typeid_cast<std::shared_ptr<const DataTypeFixedString>>(data_type), field_descriptor, reader_or_writer);
                 case TypeIndex::Enum8: return std::make_unique<ProtobufSerializerEnum<Int8>>(typeid_cast<std::shared_ptr<const DataTypeEnum8>>(data_type), field_descriptor, reader_or_writer);
                 case TypeIndex::Enum16: return std::make_unique<ProtobufSerializerEnum<Int16>>(typeid_cast<std::shared_ptr<const DataTypeEnum16>>(data_type), field_descriptor, reader_or_writer);
                 case TypeIndex::Decimal32: return std::make_unique<ProtobufSerializerDecimal<Decimal32>>(assert_cast<const DataTypeDecimal<Decimal32> &>(*data_type), field_descriptor, reader_or_writer);
@@ -2810,12 +2811,7 @@ namespace
                     const auto & array_data_type = assert_cast<const DataTypeArray &>(*data_type);
 
                     if (!allow_repeat)
-                    {
-                        throw Exception(
-                            "The field " + quoteString(field_descriptor.full_name())
-                                + " must be repeated in the protobuf schema to match the column " + backQuote(StringRef{column_name}),
-                            ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
-                    }
+                        throwFieldNotRepeated(field_descriptor, column_name);
 
                     auto nested_serializer = buildFieldSerializer(column_name, array_data_type.getNestedType(), field_descriptor,
                                                                   /* allow_repeat = */ false); // We do our repeating now, so for nested type we forget about the repeating.
@@ -2860,12 +2856,7 @@ namespace
 
                     /// Serialize as a repeated field.
                     if (!allow_repeat && (size_of_tuple > 1))
-                    {
-                        throw Exception(
-                            "The field " + quoteString(field_descriptor.full_name())
-                                + " must be repeated in the protobuf schema to match the column " + backQuote(StringRef{column_name}),
-                            ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
-                    }
+                        throwFieldNotRepeated(field_descriptor, column_name);
 
                     std::vector<std::unique_ptr<ProtobufSerializer>> nested_serializers;
                     for (const auto & nested_data_type : tuple_data_type.getElements())
@@ -2891,6 +2882,21 @@ namespace
             }
         }
 
+        [[noreturn]] static void throwFieldNotRepeated(const FieldDescriptor & field_descriptor, const std::string_view & column_name)
+        {
+            if (!field_descriptor.is_repeated())
+                throw Exception(
+                    "The field " + quoteString(field_descriptor.full_name())
+                        + " must be repeated in the protobuf schema to match the column " + backQuote(StringRef{column_name}),
+                    ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
+
+            throw Exception(
+                "The field " + quoteString(field_descriptor.full_name())
+                    + " is repeated but the level of repeatedness is not enough to serialize a multidimensional array from the column "
+                    + backQuote(StringRef{column_name}) + ". It's recommended to make the parent field repeated as well.",
+                ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
+        }
+
         const ProtobufReaderOrWriter reader_or_writer;
     };
 }
diff --git a/src/Functions/FunctionFactory.cpp b/src/Functions/FunctionFactory.cpp
index 09fd360a925..e13f310de09 100644
--- a/src/Functions/FunctionFactory.cpp
+++ b/src/Functions/FunctionFactory.cpp
@@ -99,7 +99,8 @@ FunctionOverloadResolverImplPtr FunctionFactory::tryGetImpl(
         res = it->second(context);
     else
     {
-        it = case_insensitive_functions.find(Poco::toLower(name));
+        name = Poco::toLower(name);
+        it = case_insensitive_functions.find(name);
         if (case_insensitive_functions.end() != it)
             res = it->second(context);
     }
diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp
index 6b15bf821b2..49f155c687b 100644
--- a/src/Functions/FunctionJoinGet.cpp
+++ b/src/Functions/FunctionJoinGet.cpp
@@ -25,16 +25,18 @@ ColumnPtr ExecutableFunctionJoinGet<or_null>::execute(const ColumnsWithTypeAndNa
         auto key = arguments[i];
         keys.emplace_back(std::move(key));
     }
-    return join->joinGet(keys, result_columns).column;
+    return storage_join->joinGet(keys, result_columns).column;
 }
 
 template <bool or_null>
 ExecutableFunctionImplPtr FunctionJoinGet<or_null>::prepare(const ColumnsWithTypeAndName &) const
 {
-    return std::make_unique<ExecutableFunctionJoinGet<or_null>>(join, DB::Block{{return_type->createColumn(), return_type, attr_name}});
+    Block result_columns {{return_type->createColumn(), return_type, attr_name}};
+    return std::make_unique<ExecutableFunctionJoinGet<or_null>>(table_lock, storage_join, result_columns);
 }
 
-static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & context)
+static std::pair<std::shared_ptr<StorageJoin>, String>
+getJoin(const ColumnsWithTypeAndName & arguments, const Context & context)
 {
     String join_name;
     if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
@@ -87,13 +89,12 @@ FunctionBaseImplPtr JoinGetOverloadResolver<or_null>::build(const ColumnsWithTyp
                 + ", should be greater or equal to 3",
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
     auto [storage_join, attr_name] = getJoin(arguments, context);
-    auto join = storage_join->getJoin();
     DataTypes data_types(arguments.size() - 2);
     for (size_t i = 2; i < arguments.size(); ++i)
         data_types[i - 2] = arguments[i].type;
-    auto return_type = join->joinGetCheckAndGetReturnType(data_types, attr_name, or_null);
+    auto return_type = storage_join->joinGetCheckAndGetReturnType(data_types, attr_name, or_null);
     auto table_lock = storage_join->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
-    return std::make_unique<FunctionJoinGet<or_null>>(table_lock, storage_join, join, attr_name, data_types, return_type);
+    return std::make_unique<FunctionJoinGet<or_null>>(table_lock, storage_join, attr_name, data_types, return_type);
 }
 
 void registerFunctionJoinGet(FunctionFactory & factory)
diff --git a/src/Functions/FunctionJoinGet.h b/src/Functions/FunctionJoinGet.h
index 27f348e9698..f88b58e961a 100644
--- a/src/Functions/FunctionJoinGet.h
+++ b/src/Functions/FunctionJoinGet.h
@@ -9,14 +9,20 @@ namespace DB
 
 class Context;
 class HashJoin;
-using HashJoinPtr = std::shared_ptr<HashJoin>;
+class StorageJoin;
+using StorageJoinPtr = std::shared_ptr<StorageJoin>;
 
 template <bool or_null>
 class ExecutableFunctionJoinGet final : public IExecutableFunctionImpl
 {
 public:
-    ExecutableFunctionJoinGet(HashJoinPtr join_, const DB::Block & result_columns_)
-        : join(std::move(join_)), result_columns(result_columns_) {}
+    ExecutableFunctionJoinGet(TableLockHolder table_lock_,
+                              StorageJoinPtr storage_join_,
+                              const DB::Block & result_columns_)
+        : table_lock(std::move(table_lock_))
+        , storage_join(std::move(storage_join_))
+        , result_columns(result_columns_)
+    {}
 
     static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
 
@@ -29,7 +35,8 @@ public:
     String getName() const override { return name; }
 
 private:
-    HashJoinPtr join;
+    TableLockHolder table_lock;
+    StorageJoinPtr storage_join;
     DB::Block result_columns;
 };
 
@@ -39,12 +46,11 @@ class FunctionJoinGet final : public IFunctionBaseImpl
 public:
     static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
 
-    FunctionJoinGet(TableLockHolder table_lock_, StoragePtr storage_join_,
-                    HashJoinPtr join_, String attr_name_,
+    FunctionJoinGet(TableLockHolder table_lock_,
+                    StorageJoinPtr storage_join_, String attr_name_,
                     DataTypes argument_types_, DataTypePtr return_type_)
         : table_lock(std::move(table_lock_))
-        , storage_join(std::move(storage_join_))
-        , join(std::move(join_))
+        , storage_join(storage_join_)
         , attr_name(std::move(attr_name_))
         , argument_types(std::move(argument_types_))
         , return_type(std::move(return_type_))
@@ -60,8 +66,7 @@ public:
 
 private:
     TableLockHolder table_lock;
-    StoragePtr storage_join;
-    HashJoinPtr join;
+    StorageJoinPtr storage_join;
     const String attr_name;
     DataTypes argument_types;
     DataTypePtr return_type;
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index 2e2a4ce9cfa..4889132eeb2 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -2586,7 +2586,7 @@ private:
     WrapperType createTupleToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const
     {
         return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types]
-            (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr
+            (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr
         {
             const auto * col = arguments.front().column.get();
             const auto & column_tuple = assert_cast<const ColumnTuple &>(*col);
@@ -2597,7 +2597,7 @@ private:
             {
                 const auto & column_array = assert_cast<const ColumnArray &>(column_tuple.getColumn(i));
                 ColumnsWithTypeAndName element = {{column_array.getDataPtr(), from_kv_types[i], ""}};
-                converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, input_rows_count);
+                converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size());
                 offsets[i] = column_array.getOffsetsPtr();
             }
 
diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h
index d9531cb343d..3e80dd5b337 100644
--- a/src/Functions/ReplaceRegexpImpl.h
+++ b/src/Functions/ReplaceRegexpImpl.h
@@ -164,7 +164,10 @@ struct ReplaceRegexpImpl
         size_t size = offsets.size();
         res_offsets.resize(size);
 
-        re2_st::RE2 searcher(needle);
+        typename re2_st::RE2::Options regexp_options;
+        /// Never write error messages to stderr. It's ignorant to do it from library code.
+        regexp_options.set_log_errors(false);
+        re2_st::RE2 searcher(needle, regexp_options);
         int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, static_cast<int>(max_captures));
 
         Instructions instructions = createInstructions(replacement, num_captures);
@@ -193,7 +196,10 @@ struct ReplaceRegexpImpl
         res_data.reserve(data.size());
         res_offsets.resize(size);
 
-        re2_st::RE2 searcher(needle);
+        typename re2_st::RE2::Options regexp_options;
+        /// Never write error messages to stderr. It's ignorant to do it from library code.
+        regexp_options.set_log_errors(false);
+        re2_st::RE2 searcher(needle, regexp_options);
         int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, static_cast<int>(max_captures));
 
         Instructions instructions = createInstructions(replacement, num_captures);
diff --git a/src/Functions/connectionID.cpp b/src/Functions/connectionID.cpp
new file mode 100644
index 00000000000..8e9c81aed6c
--- /dev/null
+++ b/src/Functions/connectionID.cpp
@@ -0,0 +1,41 @@
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/IFunctionImpl.h>
+#include <Interpreters/Context.h>
+
+
+namespace DB
+{
+
+/// Get the connection ID. It's used for MySQL handler only.
+class FunctionConnectionID : public IFunction
+{
+public:
+    static constexpr auto name = "connectionID";
+
+    explicit FunctionConnectionID(const Context & context_) : context(context_) {}
+
+    static FunctionPtr create(const Context & context) { return std::make_shared<FunctionConnectionID>(context); }
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 0; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared<DataTypeUInt64>(); }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override
+    {
+        return result_type->createColumnConst(input_rows_count, context.getClientInfo().connection_id);
+    }
+
+private:
+    const Context & context;
+};
+
+void registerFunctionConnectionID(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionConnectionID>();
+    factory.registerAlias("connection_id", "connectionID");
+}
+
+}
diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp
new file mode 100644
index 00000000000..528bd0c311f
--- /dev/null
+++ b/src/Functions/extractTextFromHTML.cpp
@@ -0,0 +1,358 @@
+#include <Columns/ColumnString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunctionImpl.h>
+#include <common/find_symbols.h>
+#include <Common/StringUtils/StringUtils.h>
+
+
+/** A function to extract text from HTML or XHTML.
+  * It does not necessarily 100% conforms to any of the HTML, XML or XHTML standards,
+  * but the implementation is reasonably accurate and it is fast.
+  *
+  * The rules are the following:
+  *
+  * 1. Comments are skipped. Example: <!-- test -->
+  * Comment must end with -->. Nested comments are not possible.
+  * Note: constructions like <!--> <!---> are not valid comments in HTML but will be skipped by other rules.
+  *
+  * 2. CDATA is pasted verbatim.
+  * Note: CDATA is XML/XHTML specific. But we still process it for "best-effort" approach.
+  *
+  * 3. 'script' and 'style' elements are removed with all their content.
+  * Note: it's assumed that closing tag cannot appear inside content.
+  * For example, in JS string literal is has to be escaped as "<\/script>".
+  * Note: comments and CDATA is possible inside script or style - then closing tags are not searched inside CDATA.
+  * Example: <script><![CDATA[</script>]]></script>
+  * But still searched inside comments. Sometimes it becomes complicated:
+  * <script>var x = "<!--"; </script> var y = "-->"; alert(x + y);</script>
+  * Note: script and style can be the names of XML namespaces - then they are not treat like usual script or style.
+  * Example: <script:a>Hello</script:a>.
+  * Note: whitespaces are possible after closing tag name: </script > but not before: < / script>.
+  *
+  * 4. Other tags or tag-like elements are skipped without inner content.
+  * Example: <a>.</a>
+  * Note: it's expected that this HTML is illegal: <a test=">"></a>
+  * Note: it will also skip something like tags: <>, <!>, etc.
+  * Note: tag without end will be skipped to the end of input: <hello
+  * >
+  * 5. HTML and XML entities are not decoded.
+  * It should be processed by separate function.
+  *
+  * 6. Whitespaces in text are collapsed or inserted by specific rules.
+  * Whitespaces at beginning and at the end are removed.
+  * Consecutive whitespaces are collapsed.
+  * But if text is separated by other elements and there is no whitespace, it is inserted.
+  * It may be unnatural, examples: Hello<b>world</b>, Hello<!-- -->world
+  * - in HTML there will be no whitespace, but the function will insert it.
+  * But also consider: Hello<p>world</p>, Hello<br>world.
+  * This behaviour is reasonable for data analysis, e.g. convert HTML to a bag of words.
+  *
+  * 7. Also note that correct handling of whitespaces would require
+  * support of <pre></pre> and CSS display and white-space properties.
+  *
+  * Usage example:
+  *
+  * SELECT extractTextFromHTML(html) FROM url('https://yandex.ru/', RawBLOB, 'html String')
+  *
+  * - ClickHouse has embedded web browser.
+  */
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+
+inline bool startsWith(const char * s, const char * end, const char * prefix)
+{
+    return s + strlen(prefix) < end && 0 == memcmp(s, prefix, strlen(prefix));
+}
+
+inline bool checkAndSkip(const char * __restrict & s, const char * end, const char * prefix)
+{
+    if (startsWith(s, end, prefix))
+    {
+        s += strlen(prefix);
+        return true;
+    }
+    return false;
+}
+
+bool processComment(const char * __restrict & src, const char * end)
+{
+    if (!checkAndSkip(src, end, "<!--"))
+        return false;
+
+    while (true)
+    {
+        const char * gt = find_first_symbols<'>'>(src, end);
+        if (gt >= end)
+            break;
+
+        if (gt > src + strlen("--") && gt[-1] == '-' && gt[-2] == '-')
+        {
+            src = gt + 1;
+            break;
+        }
+
+        src = gt + 1;
+    }
+
+    return true;
+}
+
+bool processCDATA(const char * __restrict & src, const char * end, char * __restrict & dst)
+{
+    if (!checkAndSkip(src, end, "<![CDATA["))
+        return false;
+
+    const char * gt = src;
+    while (true)
+    {
+        gt = find_first_symbols<'>'>(gt, end);
+        if (gt >= end)
+            break;
+
+        if (gt[-1] == ']' && gt[-2] == ']')
+        {
+            if (dst)
+            {
+                size_t bytes_to_copy = gt - src - strlen("]]");
+                memcpy(dst, src, bytes_to_copy);
+                dst += bytes_to_copy;
+            }
+            src = gt + 1;
+            break;
+        }
+
+        ++gt;
+    }
+
+    return true;
+}
+
+bool processElementAndSkipContent(const char * __restrict & src, const char * end, const char * tag_name)
+{
+    const auto * old_src = src;
+
+    if (!(src < end && *src == '<'))
+        return false;
+    ++src;
+
+    if (!checkAndSkip(src, end, tag_name))
+    {
+        src = old_src;
+        return false;
+    }
+
+    if (src >= end)
+        return false;
+
+    if (!(isWhitespaceASCII(*src) || *src == '>'))
+    {
+        src = old_src;
+        return false;
+    }
+
+    const char * gt = find_first_symbols<'>'>(src, end);
+    if (gt >= end)
+        return false;
+
+    src = gt + 1;
+
+    while (true)
+    {
+        const char * lt = find_first_symbols<'<'>(src, end);
+        src = lt;
+        if (src + 1 >= end)
+            break;
+
+        ++src;
+
+        /// Skip CDATA
+        if (*src == '!')
+        {
+            --src;
+            char * dst = nullptr;
+            if (processCDATA(src, end, dst))
+                continue;
+            ++src;
+        }
+
+        if (*src != '/')
+            continue;
+        ++src;
+
+        if (checkAndSkip(src, end, tag_name))
+        {
+            while (src < end && isWhitespaceASCII(*src))
+                ++src;
+
+            if (src >= end)
+                break;
+
+            if (*src == '>')
+            {
+                ++src;
+                break;
+            }
+        }
+    }
+
+    return true;
+}
+
+bool skipTag(const char * __restrict & src, const char * end)
+{
+    if (src < end && *src == '<')
+    {
+        src = find_first_symbols<'>'>(src, end);
+        if (src < end)
+            ++src;
+
+        return true;
+    }
+
+    return false;
+}
+
+void copyText(const char * __restrict & src, const char * end, char * __restrict & dst, bool needs_whitespace)
+{
+    while (src < end && isWhitespaceASCII(*src))
+        ++src;
+
+    const char * lt = find_first_symbols<'<'>(src, end);
+
+    if (needs_whitespace && src < lt)
+    {
+        *dst = ' ';
+        ++dst;
+    }
+
+    while (true)
+    {
+        const char * ws = find_first_symbols<' ', '\t', '\n', '\r', '\f', '\v'>(src, lt);
+        size_t bytes_to_copy = ws - src;
+        memcpy(dst, src, bytes_to_copy);
+        dst += bytes_to_copy;
+
+        src = ws;
+        while (src < lt && isWhitespaceASCII(*src))
+            ++src;
+
+        if (src < lt)
+        {
+            *dst = ' ';
+            ++dst;
+        }
+        else
+        {
+            break;
+        }
+    }
+
+    src = lt;
+}
+
+size_t extract(const char * __restrict src, size_t size, char * __restrict dst)
+{
+    /** There are the following rules:
+      * - comments are removed with all their content;
+      * - elements 'script' and 'style' are removed with all their content;
+      * - for other elements tags are removed but content is processed as text;
+      * - CDATA should be copied verbatim;
+      */
+
+    const char * end = src + size;
+    char * dst_begin = dst;
+
+    while (src < end)
+    {
+        bool needs_whitespace = dst != dst_begin && dst[-1] != ' ';
+        copyText(src, end, dst, needs_whitespace);
+
+        processComment(src, end)
+            || processCDATA(src, end, dst)
+            || processElementAndSkipContent(src, end, "script")
+            || processElementAndSkipContent(src, end, "style")
+            || skipTag(src, end);
+    }
+
+    return dst - dst_begin;
+}
+
+}
+
+
+class FunctionExtractTextFromHTML : public IFunction
+{
+public:
+    static constexpr auto name = "extractTextFromHTML";
+
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionExtractTextFromHTML>(); }
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 1; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (!isString(arguments[0]))
+            throw Exception(
+                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        return arguments[0];
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t rows) const override
+    {
+        const ColumnString * src = checkAndGetColumn<ColumnString>(arguments[0].column.get());
+        if (!src)
+             throw Exception("First argument for function " + getName() + " must be string.", ErrorCodes::ILLEGAL_COLUMN);
+
+        const ColumnString::Chars & src_chars = src->getChars();
+        const ColumnString::Offsets & src_offsets = src->getOffsets();
+
+        auto res = ColumnString::create();
+
+        ColumnString::Chars & res_chars = res->getChars();
+        ColumnString::Offsets & res_offsets = res->getOffsets();
+
+        res_chars.resize(src_chars.size());
+        res_offsets.resize(src_offsets.size());
+
+        ColumnString::Offset src_offset = 0;
+        ColumnString::Offset res_offset = 0;
+
+        for (size_t i = 0; i < rows; ++i)
+        {
+            auto next_src_offset = src_offsets[i];
+
+            res_offset += extract(
+                reinterpret_cast<const char *>(&src_chars[src_offset]),
+                next_src_offset - src_offset - 1,
+                reinterpret_cast<char *>(&res_chars[res_offset]));
+
+            res_chars[res_offset] = 0;
+            ++res_offset;
+            res_offsets[i] = res_offset;
+
+            src_offset = next_src_offset;
+        }
+
+        res_chars.resize(res_offset);
+        return res;
+    }
+};
+
+void registerFunctionExtractTextFromHTML(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionExtractTextFromHTML>();
+}
+
+}
diff --git a/src/Functions/globalVariable.cpp b/src/Functions/globalVariable.cpp
index 55561af4d10..6482b3bb976 100644
--- a/src/Functions/globalVariable.cpp
+++ b/src/Functions/globalVariable.cpp
@@ -77,8 +77,11 @@ private:
         DataTypePtr type;
         Field value;
     };
-    std::unordered_map<String, TypeAndValue> global_variable_map = {
-        {"max_allowed_packet", {std::make_shared<DataTypeInt32>(), 67108864}}, {"version", {std::make_shared<DataTypeString>(), "5.7.30"}}};
+    std::unordered_map<String, TypeAndValue> global_variable_map
+        = {{"max_allowed_packet", {std::make_shared<DataTypeInt32>(), 67108864}},
+           {"version", {std::make_shared<DataTypeString>(), "5.7.30"}},
+           {"version_comment", {std::make_shared<DataTypeString>(), ""}},
+           {"transaction_isolation", {std::make_shared<DataTypeString>(), "READ-UNCOMMITTED"}}};
 };
 
 }
diff --git a/src/Functions/htmlOrXmlCoarseParse.cpp b/src/Functions/htmlOrXmlCoarseParse.cpp
deleted file mode 100644
index 442de3d36b0..00000000000
--- a/src/Functions/htmlOrXmlCoarseParse.cpp
+++ /dev/null
@@ -1,582 +0,0 @@
-#include <Columns/ColumnString.h>
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/IFunctionImpl.h>
-
-#include <utility>
-#include <vector>
-#include <algorithm>
-
-#if USE_HYPERSCAN
-#   include <hs.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_COLUMN;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int CANNOT_ALLOCATE_MEMORY;
-    extern const int NOT_IMPLEMENTED;
-}
-
-namespace
-{
-struct HxCoarseParseImpl
-{
-private:
-    struct SpanInfo
-    {
-        SpanInfo(): id(0), match_space(std::pair<unsigned long long, unsigned long long>(0, 0)) {}  // NOLINT
-        SpanInfo(unsigned int matchId, std::pair<unsigned long long, unsigned long long> matchSpan): id(matchId), match_space(matchSpan){} // NOLINT
-        SpanInfo(const SpanInfo& obj)
-        {
-            id = obj.id;
-            match_space = obj.match_space;
-        }
-        SpanInfo& operator=(const SpanInfo& obj) = default;
-
-        unsigned int id;
-        std::pair<unsigned long long, unsigned long long> match_space;  // NOLINT
-    };
-    using SpanElement = std::vector<SpanInfo>;
-    struct Span
-    {
-        Span(): set_script(false), set_style(false), set_semi(false), is_finding_cdata(false) {}
-
-        SpanElement copy_stack;         // copy area
-        SpanElement tag_stack;          // regexp area
-        SpanInfo script_ptr;            // script pointer
-        bool set_script;                // whether set script
-        SpanInfo style_ptr;             // style pointer
-        bool set_style;                 // whether set style
-        SpanInfo semi_ptr;              // tag ptr
-        bool set_semi;                  // whether set semi
-
-        bool is_finding_cdata;
-    };
-
-    static inline void copyZone(
-        ColumnString::Offset& current_dst_string_offset,
-        ColumnString::Offset& current_copy_loc,
-        ColumnString::Chars& dst_chars,
-        const ColumnString::Chars& src_chars,
-        size_t bytes_to_copy,
-        unsigned is_space
-    )
-    {
-        bool is_last_space = false;
-        if (current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' ')
-        {
-            is_last_space = true;
-        }
-        if (bytes_to_copy == 0)
-        {
-            if (is_space && !is_last_space)
-            {
-                dst_chars[current_dst_string_offset++] = ' ';
-            }
-        }
-        else
-        {
-            if (is_last_space && src_chars[current_copy_loc] == ' ')
-            {
-                --bytes_to_copy;
-                ++current_copy_loc;
-            }
-            if (bytes_to_copy > 0)
-            {
-                memcpySmallAllowReadWriteOverflow15(
-                    &dst_chars[current_dst_string_offset], &src_chars[current_copy_loc], bytes_to_copy);
-                current_dst_string_offset += bytes_to_copy;
-            }
-
-            // separator is space and last character is not space.
-            if (is_space && !(current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' '))
-            {
-                dst_chars[current_dst_string_offset++] = ' ';
-            }
-        }
-        // return;
-    }
-    static inline void popArea(SpanElement& stack, unsigned long long from, unsigned long long to)  //NOLINT
-    {
-        while (!stack.empty())
-        {
-            if (to > stack.back().match_space.second && from < stack.back().match_space.second)
-            {
-                stack.pop_back();
-            }
-            else
-            {
-                break;
-            }
-        }
-        // return;
-    }
-
-    static void dealCommonTag(Span* matches)
-    {
-        while (!matches->copy_stack.empty() && matches->copy_stack.back().id != 10)
-        {
-            matches->copy_stack.pop_back();
-        }
-        if (!matches->copy_stack.empty())
-        {
-            matches->copy_stack.pop_back();
-        }
-        unsigned long long from;    // NOLINT
-        unsigned long long to;      // NOLINT
-        unsigned id;
-        for (auto begin = matches->tag_stack.begin(); begin != matches->tag_stack.end(); ++begin)
-        {
-            from = begin->match_space.first;
-            to = begin->match_space.second;
-            id = begin->id;
-            switch (id)
-            {
-                case 12:
-                case 13:
-                {
-                    popArea(matches->copy_stack, from, to);
-                    if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
-                        matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
-                    break;
-                }
-                case 0:
-                case 2:
-                case 3:
-                case 4:
-                case 5:
-                case 6:
-                case 7:
-                case 8:
-                case 9:
-                case 10:
-                {
-                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
-                    {
-                        matches->set_semi = true;
-                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
-                    }
-                    break;
-                }
-                case 1:
-                {
-                    if (matches->set_semi)
-                    {
-                        switch (matches->semi_ptr.id)
-                        {
-                            case 0:
-                            case 2:
-                            case 3:
-                            case 6:
-                            case 7:
-                            case 10:
-                            {
-                                if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
-                                {
-                                    if (!matches->set_script)
-                                    {
-                                        matches->set_script = true;
-                                        matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
-                                    }
-                                }
-                                else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
-                                {
-                                    if (!matches->set_style)
-                                    {
-                                        matches->set_style = true;
-                                        matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
-                                    }
-                                }
-                                popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
-                                matches->copy_stack.push_back(SpanInfo(0, std::make_pair(matches->semi_ptr.match_space.first, to)));
-                                matches->set_semi = false;
-                                break;
-                            }
-                            case 4:
-                            case 5:
-                            case 8:
-                            case 9:
-                            {
-                                SpanInfo complete_zone;
-
-                                complete_zone.match_space.second = to;
-                                if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
-                                {
-                                    complete_zone.id = matches->script_ptr.id;
-                                    complete_zone.match_space.first = matches->script_ptr.match_space.first;
-                                    matches->set_script = false;
-                                }
-                                else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
-                                {
-                                    complete_zone.id = matches->style_ptr.id;
-                                    complete_zone.match_space.first = matches->style_ptr.match_space.first;
-                                    matches->set_style = false;
-                                }
-                                else
-                                {
-                                    complete_zone.id = matches->semi_ptr.id;
-                                    complete_zone.match_space.first = matches->semi_ptr.match_space.first;
-                                }
-                                popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
-                                matches->copy_stack.push_back(complete_zone);
-                                matches->set_semi = false;
-                                break;
-                            }
-                        }
-                    }
-                    break;
-                }
-                default:
-                {
-                    break;
-                }
-            }
-        }
-        // return;
-    }
-    static int spanCollect(unsigned int id,
-                          unsigned long long from,  // NOLINT
-                          unsigned long long to,    // NOLINT
-                          unsigned int , void * ctx)
-    {
-        Span* matches = static_cast<Span*>(ctx);
-        from = id == 12 ? from : to - patterns_length[id];
-
-        if (matches->is_finding_cdata)
-        {
-            if (id == 11)
-            {
-                matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
-                matches->is_finding_cdata = false;
-                matches->tag_stack.clear();
-                if (matches->semi_ptr.id == 10)
-                {
-                    matches->set_semi = false;
-                }
-            }
-            else if (id == 12 || id == 13)
-            {
-                popArea(matches->copy_stack, from, to);
-                if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
-                    matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
-
-                popArea(matches->tag_stack, from, to);
-                if (matches->tag_stack.empty() || from >= matches->tag_stack.back().match_space.second)
-                    matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
-            }
-            else
-            {
-                popArea(matches->tag_stack, from, to);
-                matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
-            }
-        }
-        else
-        {
-            switch (id)
-            {
-                case 12:
-                case 13:
-                {
-                    popArea(matches->copy_stack, from, to);
-                    if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
-                        matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
-                    break;
-                }
-                case 0:
-                case 2:
-                case 3:
-                case 4:
-                case 5:
-                case 6:
-                case 7:
-                case 8:
-                case 9:
-                {
-                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
-                    {
-                        matches->set_semi = true;
-                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
-                    }
-                    break;
-                }
-                case 10:
-                {
-                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
-                    {
-                        matches->set_semi = true;
-                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
-                    }
-                    matches->is_finding_cdata = true;
-                    matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
-                    matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
-                    break;
-                }
-                case 1:
-                {
-                    if (matches->set_semi)
-                    {
-                        switch (matches->semi_ptr.id)
-                        {
-                            case 0:
-                            case 2:
-                            case 3:
-                            case 6:
-                            case 7:
-                            case 10:
-                            {
-                                if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
-                                {
-                                    if (!matches->set_script)
-                                    {
-                                        matches->set_script = true;
-                                        matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
-                                    }
-                                }
-                                else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
-                                {
-                                    if (!matches->set_style)
-                                    {
-                                        matches->set_style = true;
-                                        matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
-                                    }
-                                }
-                                popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
-                                matches->copy_stack.push_back(SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to)));
-                                matches->set_semi = false;
-                                break;
-                            }
-                            case 4:
-                            case 5:
-                            case 8:
-                            case 9:
-                            {
-                                SpanInfo complete_zone;
-                                complete_zone.match_space.second = to;
-                                if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
-                                {
-                                    complete_zone.id = matches->script_ptr.id;
-                                    complete_zone.match_space.first = matches->script_ptr.match_space.first;
-                                    matches->set_script = false;
-                                }
-                                else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
-                                {
-                                    complete_zone.id = matches->style_ptr.id;
-                                    complete_zone.match_space.first = matches->style_ptr.match_space.first;
-                                    matches->set_style = false;
-                                }
-                                else
-                                {
-                                    complete_zone.id = matches->semi_ptr.id;
-                                    complete_zone.match_space.first = matches->semi_ptr.match_space.first;
-                                }
-                                popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
-                                matches->copy_stack.push_back(complete_zone);
-                                matches->set_semi = false;
-                                break;
-                            }
-                        }
-                    }
-                    break;
-                }
-                default:
-                {
-                    break;
-                }
-            }
-        }
-        return 0;
-    }
-    #if USE_HYPERSCAN
-    static hs_database_t* buildDatabase(const std::vector<const char* > &expressions,
-                                        const std::vector<unsigned> &flags,
-                                        const std::vector<unsigned> &id,
-                                        unsigned int mode)
-    {
-        hs_database_t *db;
-        hs_compile_error_t *compile_err;
-        hs_error_t err;
-        err = hs_compile_multi(expressions.data(), flags.data(), id.data(),
-                            expressions.size(), mode, nullptr, &db, &compile_err);
-
-        if (err != HS_SUCCESS)
-        {
-            hs_free_compile_error(compile_err);
-            throw Exception("Hyper scan database cannot be compiled.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
-        }
-        return db;
-    }
-    #endif
-    static std::vector<const char*> patterns;
-    static std::vector<std::size_t> patterns_length;
-    static std::vector<unsigned> patterns_flag;
-    static std::vector<unsigned> ids;
-
-public:
-    static void executeInternal(
-        const ColumnString::Chars & src_chars,
-        const ColumnString::Offsets & src_offsets,
-        ColumnString::Chars & dst_chars,
-        ColumnString::Offsets & dst_offsets)
-    {
-    #if USE_HYPERSCAN
-        hs_database_t * db = buildDatabase(patterns, patterns_flag, ids, HS_MODE_BLOCK);
-        hs_scratch_t* scratch = nullptr;
-        if (hs_alloc_scratch(db, &scratch) != HS_SUCCESS)
-        {
-            hs_free_database(db);
-            throw Exception("Unable to allocate scratch space.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
-        }
-        dst_chars.resize(src_chars.size());
-        dst_offsets.resize(src_offsets.size());
-
-        ColumnString::Offset current_src_string_offset = 0;
-        ColumnString::Offset current_dst_string_offset = 0;
-        ColumnString::Offset current_copy_loc;
-        ColumnString::Offset current_copy_end;
-        unsigned is_space;
-        size_t bytes_to_copy;
-        Span match_zoneall;
-
-        for (size_t off = 0; off < src_offsets.size(); ++off)
-        {
-            hs_scan(db, reinterpret_cast<const char *>(&src_chars[current_src_string_offset]), src_offsets[off] - current_src_string_offset, 0, scratch, spanCollect, &match_zoneall);
-            if (match_zoneall.is_finding_cdata)
-            {
-                dealCommonTag(&match_zoneall);
-            }
-            SpanElement& match_zone = match_zoneall.copy_stack;
-            current_copy_loc = current_src_string_offset;
-            if (match_zone.empty())
-            {
-                current_copy_end = src_offsets[off];
-                is_space = 0;
-            }
-            else
-            {
-                current_copy_end = current_src_string_offset + match_zone.begin()->match_space.first;
-                is_space = (match_zone.begin()->id == 12 || match_zone.begin()->id == 13)?1:0;
-            }
-
-            bytes_to_copy = current_copy_end - current_copy_loc;
-            copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
-            for (auto begin = match_zone.begin(); begin != match_zone.end(); ++begin)
-            {
-                current_copy_loc = current_src_string_offset + begin->match_space.second;
-                if (begin + 1 >= match_zone.end())
-                {
-                    current_copy_end = src_offsets[off];
-                    is_space = 0;
-                }
-                else
-                {
-                    current_copy_end = current_src_string_offset + (begin+1)->match_space.first;
-                    is_space = ((begin+1)->id == 12 || (begin+1)->id == 13)?1:0;
-                }
-                bytes_to_copy = current_copy_end - current_copy_loc;
-                copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
-            }
-            if (current_dst_string_offset > 1 && dst_chars[current_dst_string_offset - 2] == ' ')
-            {
-                dst_chars[current_dst_string_offset - 2] = 0;
-                --current_dst_string_offset;
-            }
-            dst_offsets[off] = current_dst_string_offset;
-            current_src_string_offset = src_offsets[off];
-            match_zoneall.copy_stack.clear();
-            match_zoneall.tag_stack.clear();
-        }
-            dst_chars.resize(dst_chars.size());
-            hs_free_scratch(scratch);
-            hs_free_database(db);
-    #else
-        (void)src_chars;
-        (void)src_offsets;
-        (void)dst_chars;
-        (void)dst_offsets;
-        throw Exception(
-            "htmlOrXmlCoarseParse is not implemented when hyperscan is off (is it x86 processor?)",
-            ErrorCodes::NOT_IMPLEMENTED);
-    #endif
-    }
-};
-
-std::vector<const char*> HxCoarseParseImpl::patterns =
-    {
-        "<[^\\s<>]",       // 0  "<", except "< ", "<<", "<>"
-        ">",               // 1  ">"
-        "<script\\s",      // 2  <script xxxxx>
-        "<script",         // 3  <script>
-        "</script\\s",     // 4  </script xxxx>
-        "</script",        // 5  </script>
-        "<style\\s",       // 6  <style xxxxxx>
-        "<style",          // 7  <style>
-        "</style\\s",      // 8  </style xxxxx>
-        "</style",         // 9  </style>
-        "<!\\[CDATA\\[",   // 10 <![CDATA[xxxxxx]]>
-        "\\]\\]>",         // 11 ]]>
-        "\\s{2,}",         // 12 "   ", continuous blanks
-        "[^\\S ]"          // 13 "\n", "\t" and other white space, it does not include single ' '.
-    };
-std::vector<std::size_t> HxCoarseParseImpl::patterns_length =
-    {
-        2, 1, 8, 7, 9, 8, 7, 6, 8, 7, 9, 3, 0, 1
-    };
-#if USE_HYPERSCAN
-std::vector<unsigned> HxCoarseParseImpl::patterns_flag =
-    {
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, HS_FLAG_SOM_LEFTMOST, 0
-    };
-#endif
-std::vector<unsigned> HxCoarseParseImpl::ids =
-    {
-        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
-    };
-
-class FunctionHtmlOrXmlCoarseParse : public IFunction
-{
-public:
-    static constexpr auto name = "htmlOrXmlCoarseParse";
-
-    static FunctionPtr create(const Context &) {return std::make_shared<FunctionHtmlOrXmlCoarseParse>(); }
-
-    String getName() const override {return name;}
-
-    size_t getNumberOfArguments() const override {return 1;}
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        if (!isString(arguments[0]))
-            throw Exception(
-                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-        return arguments[0];
-    }
-
-    bool useDefaultImplementationForConstants() const override {return true;}
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & , size_t) const override
-    {
-        const auto & strcolumn = arguments[0].column;
-        if (const ColumnString* html_sentence = checkAndGetColumn<ColumnString>(strcolumn.get()))
-        {
-            auto col_res = ColumnString::create();
-            HxCoarseParseImpl::executeInternal(html_sentence->getChars(), html_sentence->getOffsets(), col_res->getChars(), col_res->getOffsets());
-            return col_res;
-        }
-        else
-        {
-            throw Exception("First argument for function " + getName() + " must be string.", ErrorCodes::ILLEGAL_COLUMN);
-        }
-    }
-};
-}
-
-void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory & factory)
-{
-    factory.registerFunction<FunctionHtmlOrXmlCoarseParse>();
-}
-
-}
-#endif
diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp
index e13c46e9cd1..55396b1d1c7 100644
--- a/src/Functions/intDiv.cpp
+++ b/src/Functions/intDiv.cpp
@@ -24,9 +24,25 @@ template <typename A, typename B>
 struct DivideIntegralByConstantImpl
     : BinaryOperation<A, B, DivideIntegralImpl<A, B>>
 {
-    using ResultType = typename DivideIntegralImpl<A, B>::ResultType;
+    using Op = DivideIntegralImpl<A, B>;
+    using ResultType = typename Op::ResultType;
     static const constexpr bool allow_fixed_string = false;
 
+    template <OpCase op_case>
+    static void NO_INLINE process(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t size)
+    {
+        if constexpr (op_case == OpCase::Vector)
+            for (size_t i = 0; i < size; ++i)
+                c[i] = Op::template apply<ResultType>(a[i], b[i]);
+        else if constexpr (op_case == OpCase::LeftConstant)
+            for (size_t i = 0; i < size; ++i)
+                c[i] = Op::template apply<ResultType>(*a, b[i]);
+        else
+            vectorConstant(a, *b, c, size);
+    }
+
+    static ResultType process(A a, B b) { return Op::template apply<ResultType>(a, b); }
+
     static NO_INLINE void vectorConstant(const A * __restrict a_pos, B b, ResultType * __restrict c_pos, size_t size)
     {
 #pragma GCC diagnostic push
diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp
index 388e6ab9fb9..d9bf74ccaf5 100644
--- a/src/Functions/modulo.cpp
+++ b/src/Functions/modulo.cpp
@@ -24,10 +24,26 @@ template <typename A, typename B>
 struct ModuloByConstantImpl
     : BinaryOperation<A, B, ModuloImpl<A, B>>
 {
-    using ResultType = typename ModuloImpl<A, B>::ResultType;
+    using Op = ModuloImpl<A, B>;
+    using ResultType = typename Op::ResultType;
     static const constexpr bool allow_fixed_string = false;
 
-    static NO_INLINE void vectorConstant(const A * __restrict src, B b, ResultType * __restrict dst, size_t size)
+    template <OpCase op_case>
+    static void NO_INLINE process(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t size)
+    {
+        if constexpr (op_case == OpCase::Vector)
+            for (size_t i = 0; i < size; ++i)
+                c[i] = Op::template apply<ResultType>(a[i], b[i]);
+        else if constexpr (op_case == OpCase::LeftConstant)
+            for (size_t i = 0; i < size; ++i)
+                c[i] = Op::template apply<ResultType>(*a, b[i]);
+        else
+            vectorConstant(a, *b, c, size);
+    }
+
+    static ResultType process(A a, B b) { return Op::template apply<ResultType>(a, b); }
+
+    static void NO_INLINE vectorConstant(const A * __restrict src, B b, ResultType * __restrict dst, size_t size)
     {
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wsign-compare"
diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp
index 3438145981b..592f0d6774d 100644
--- a/src/Functions/registerFunctionsMiscellaneous.cpp
+++ b/src/Functions/registerFunctionsMiscellaneous.cpp
@@ -69,6 +69,7 @@ void registerFunctionErrorCodeToName(FunctionFactory &);
 void registerFunctionTcpPort(FunctionFactory &);
 void registerFunctionByteSize(FunctionFactory &);
 void registerFunctionFile(FunctionFactory & factory);
+void registerFunctionConnectionID(FunctionFactory & factory);
 
 #if USE_ICU
 void registerFunctionConvertCharset(FunctionFactory &);
@@ -138,6 +139,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
     registerFunctionTcpPort(factory);
     registerFunctionByteSize(factory);
     registerFunctionFile(factory);
+    registerFunctionConnectionID(factory);
 
 #if USE_ICU
     registerFunctionConvertCharset(factory);
diff --git a/src/Functions/registerFunctionsString.cpp b/src/Functions/registerFunctionsString.cpp
index b6327dfb92f..f6f95489f82 100644
--- a/src/Functions/registerFunctionsString.cpp
+++ b/src/Functions/registerFunctionsString.cpp
@@ -6,9 +6,7 @@ namespace DB
 {
 
 class FunctionFactory;
-#if USE_HYPERSCAN
-void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory &);
-#endif
+
 void registerFunctionRepeat(FunctionFactory &);
 void registerFunctionEmpty(FunctionFactory &);
 void registerFunctionNotEmpty(FunctionFactory &);
@@ -35,8 +33,9 @@ void registerFunctionRegexpQuoteMeta(FunctionFactory &);
 void registerFunctionNormalizeQuery(FunctionFactory &);
 void registerFunctionNormalizedQueryHash(FunctionFactory &);
 void registerFunctionCountMatches(FunctionFactory &);
-void registerFunctionEncodeXMLComponent(FunctionFactory & factory);
-void registerFunctionDecodeXMLComponent(FunctionFactory & factory);
+void registerFunctionEncodeXMLComponent(FunctionFactory &);
+void registerFunctionDecodeXMLComponent(FunctionFactory &);
+void registerFunctionExtractTextFromHTML(FunctionFactory &);
 
 
 #if USE_BASE64
@@ -47,9 +46,6 @@ void registerFunctionTryBase64Decode(FunctionFactory &);
 
 void registerFunctionsString(FunctionFactory & factory)
 {
-#if USE_HYPERSCAN
-    registerFunctionHtmlOrXmlCoarseParse(factory);
-#endif
     registerFunctionRepeat(factory);
     registerFunctionEmpty(factory);
     registerFunctionNotEmpty(factory);
@@ -78,6 +74,7 @@ void registerFunctionsString(FunctionFactory & factory)
     registerFunctionCountMatches(factory);
     registerFunctionEncodeXMLComponent(factory);
     registerFunctionDecodeXMLComponent(factory);
+    registerFunctionExtractTextFromHTML(factory);
 #if USE_BASE64
     registerFunctionBase64Encode(factory);
     registerFunctionBase64Decode(factory);
diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index 20ba5f846a3..7a4deae4d04 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -210,6 +210,7 @@ SRCS(
     cbrt.cpp
     coalesce.cpp
     concat.cpp
+    connectionID.cpp
     convertCharset.cpp
     cos.cpp
     cosh.cpp
@@ -246,6 +247,7 @@ SRCS(
     extractAllGroupsHorizontal.cpp
     extractAllGroupsVertical.cpp
     extractGroups.cpp
+    extractTextFromHTML.cpp
     extractTimeZoneFromFunctionArguments.cpp
     filesystem.cpp
     finalizeAggregation.cpp
@@ -291,7 +293,6 @@ SRCS(
     hasToken.cpp
     hasTokenCaseInsensitive.cpp
     hostName.cpp
-    htmlOrXmlCoarseParse.cpp
     hypot.cpp
     identity.cpp
     if.cpp
diff --git a/src/IO/AsynchronousWriteBuffer.h b/src/IO/AsynchronousWriteBuffer.h
deleted file mode 100644
index 8c44f8c7d4a..00000000000
--- a/src/IO/AsynchronousWriteBuffer.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#pragma once
-
-#include <vector>
-#include <Common/ThreadPool.h>
-#include <Common/MemoryTracker.h>
-#include <IO/WriteBuffer.h>
-
-
-namespace DB
-{
-
-
-/** Writes data asynchronously using double buffering.
-  */
-class AsynchronousWriteBuffer : public WriteBuffer
-{
-private:
-    WriteBuffer & out;               /// The main buffer, responsible for writing data.
-    std::vector <char> memory;       /// A piece of memory for duplicating the buffer.
-    ThreadPool pool;                 /// For asynchronous data writing.
-    bool started;                    /// Has an asynchronous data write started?
-
-    /// Swap the main and duplicate buffers.
-    void swapBuffers()
-    {
-        swap(out);
-    }
-
-    void nextImpl() override
-    {
-        if (!offset())
-            return;
-
-        if (started)
-            pool.wait();
-        else
-            started = true;
-
-        swapBuffers();
-
-        /// The data will be written in separate stream.
-        pool.scheduleOrThrowOnError([this] { thread(); });
-    }
-
-public:
-    AsynchronousWriteBuffer(WriteBuffer & out_) : WriteBuffer(nullptr, 0), out(out_), memory(out.buffer().size()), pool(1), started(false)
-    {
-        /// Data is written to the duplicate buffer.
-        set(memory.data(), memory.size());
-    }
-
-    ~AsynchronousWriteBuffer() override
-    {
-        /// FIXME move final flush into the caller
-        MemoryTracker::LockExceptionInThread lock;
-
-        if (started)
-            pool.wait();
-
-        swapBuffers();
-        out.next();
-    }
-
-    /// That is executed in a separate thread
-    void thread()
-    {
-        out.next();
-    }
-};
-
-}
diff --git a/src/IO/BrotliWriteBuffer.cpp b/src/IO/BrotliWriteBuffer.cpp
index d14c94ca43d..e87eeb1a2be 100644
--- a/src/IO/BrotliWriteBuffer.cpp
+++ b/src/IO/BrotliWriteBuffer.cpp
@@ -64,29 +64,38 @@ void BrotliWriteBuffer::nextImpl()
     in_data = reinterpret_cast<unsigned char *>(working_buffer.begin());
     in_available = offset();
 
-    do
+    try
     {
-        out->nextIfAtEnd();
-        out_data = reinterpret_cast<unsigned char *>(out->position());
-        out_capacity = out->buffer().end() - out->position();
-
-        int result = BrotliEncoderCompressStream(
-                brotli->state,
-                in_available ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
-                &in_available,
-                &in_data,
-                &out_capacity,
-                &out_data,
-                nullptr);
-
-        out->position() = out->buffer().end() - out_capacity;
-
-        if (result == 0)
+        do
         {
-            throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED);
+            out->nextIfAtEnd();
+            out_data = reinterpret_cast<unsigned char *>(out->position());
+            out_capacity = out->buffer().end() - out->position();
+
+            int result = BrotliEncoderCompressStream(
+                    brotli->state,
+                    in_available ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
+                    &in_available,
+                    &in_data,
+                    &out_capacity,
+                    &out_data,
+                    nullptr);
+
+            out->position() = out->buffer().end() - out_capacity;
+
+            if (result == 0)
+            {
+                throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED);
+            }
         }
+        while (in_available > 0);
+    }
+    catch (...)
+    {
+        /// Do not try to write next time after exception.
+        out->position() = out->buffer().begin();
+        throw;
     }
-    while (in_available > 0 || out_capacity == 0);
 }
 
 void BrotliWriteBuffer::finish()
@@ -94,6 +103,23 @@ void BrotliWriteBuffer::finish()
     if (finished)
         return;
 
+    try
+    {
+        finishImpl();
+        out->next();
+        finished = true;
+    }
+    catch (...)
+    {
+        /// Do not try to flush next time after exception.
+        out->position() = out->buffer().begin();
+        finished = true;
+        throw;
+    }
+}
+
+void BrotliWriteBuffer::finishImpl()
+{
     next();
 
     while (true)
@@ -115,7 +141,6 @@ void BrotliWriteBuffer::finish()
 
         if (BrotliEncoderIsFinished(brotli->state))
         {
-            finished = true;
             return;
         }
 
diff --git a/src/IO/BrotliWriteBuffer.h b/src/IO/BrotliWriteBuffer.h
index 5a294354f49..26788bc6795 100644
--- a/src/IO/BrotliWriteBuffer.h
+++ b/src/IO/BrotliWriteBuffer.h
@@ -18,11 +18,14 @@ public:
 
     ~BrotliWriteBuffer() override;
 
-    void finish();
+    void finalize() override { finish(); }
 
 private:
     void nextImpl() override;
 
+    void finish();
+    void finishImpl();
+
     class BrotliStateWrapper;
     std::unique_ptr<BrotliStateWrapper> brotli;
 
diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h
index e5efabee6e2..5f12a4edc79 100644
--- a/src/IO/ConnectionTimeouts.h
+++ b/src/IO/ConnectionTimeouts.h
@@ -17,6 +17,10 @@ struct ConnectionTimeouts
     Poco::Timespan http_keep_alive_timeout;
     Poco::Timespan secure_connection_timeout;
 
+    /// Timeouts for HedgedConnections
+    Poco::Timespan hedged_connection_timeout;
+    Poco::Timespan receive_data_timeout;
+
     ConnectionTimeouts() = default;
 
     ConnectionTimeouts(const Poco::Timespan & connection_timeout_,
@@ -27,7 +31,9 @@ struct ConnectionTimeouts
       receive_timeout(receive_timeout_),
       tcp_keep_alive_timeout(0),
       http_keep_alive_timeout(0),
-      secure_connection_timeout(connection_timeout)
+      secure_connection_timeout(connection_timeout),
+      hedged_connection_timeout(receive_timeout_),
+      receive_data_timeout(receive_timeout_)
     {
     }
 
@@ -40,7 +46,9 @@ struct ConnectionTimeouts
       receive_timeout(receive_timeout_),
       tcp_keep_alive_timeout(tcp_keep_alive_timeout_),
       http_keep_alive_timeout(0),
-      secure_connection_timeout(connection_timeout)
+      secure_connection_timeout(connection_timeout),
+      hedged_connection_timeout(receive_timeout_),
+      receive_data_timeout(receive_timeout_)
     {
     }
     ConnectionTimeouts(const Poco::Timespan & connection_timeout_,
@@ -53,7 +61,9 @@ struct ConnectionTimeouts
           receive_timeout(receive_timeout_),
           tcp_keep_alive_timeout(tcp_keep_alive_timeout_),
           http_keep_alive_timeout(http_keep_alive_timeout_),
-          secure_connection_timeout(connection_timeout)
+          secure_connection_timeout(connection_timeout),
+          hedged_connection_timeout(receive_timeout_),
+          receive_data_timeout(receive_timeout_)
     {
     }
 
@@ -62,13 +72,17 @@ struct ConnectionTimeouts
                        const Poco::Timespan & receive_timeout_,
                        const Poco::Timespan & tcp_keep_alive_timeout_,
                        const Poco::Timespan & http_keep_alive_timeout_,
-                       const Poco::Timespan & secure_connection_timeout_)
-            : connection_timeout(connection_timeout_),
-              send_timeout(send_timeout_),
-              receive_timeout(receive_timeout_),
-              tcp_keep_alive_timeout(tcp_keep_alive_timeout_),
-              http_keep_alive_timeout(http_keep_alive_timeout_),
-              secure_connection_timeout(secure_connection_timeout_)
+                       const Poco::Timespan & secure_connection_timeout_,
+                       const Poco::Timespan & receive_hello_timeout_,
+                       const Poco::Timespan & receive_data_timeout_)
+        : connection_timeout(connection_timeout_),
+          send_timeout(send_timeout_),
+          receive_timeout(receive_timeout_),
+          tcp_keep_alive_timeout(tcp_keep_alive_timeout_),
+          http_keep_alive_timeout(http_keep_alive_timeout_),
+          secure_connection_timeout(secure_connection_timeout_),
+          hedged_connection_timeout(receive_hello_timeout_),
+          receive_data_timeout(receive_data_timeout_)
     {
     }
 
@@ -87,7 +101,9 @@ struct ConnectionTimeouts
                                   saturate(receive_timeout, limit),
                                   saturate(tcp_keep_alive_timeout, limit),
                                   saturate(http_keep_alive_timeout, limit),
-                                  saturate(secure_connection_timeout, limit));
+                                  saturate(secure_connection_timeout, limit),
+                                  saturate(hedged_connection_timeout, limit),
+                                  saturate(receive_data_timeout, limit));
     }
 
     /// Timeouts for the case when we have just single attempt to connect.
diff --git a/src/IO/ConnectionTimeoutsContext.h b/src/IO/ConnectionTimeoutsContext.h
index ce19738f507..c08ec2e8b92 100644
--- a/src/IO/ConnectionTimeoutsContext.h
+++ b/src/IO/ConnectionTimeoutsContext.h
@@ -16,7 +16,15 @@ inline ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithoutFailover(cons
 /// Timeouts for the case when we will try many addresses in a loop.
 inline ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithFailover(const Settings & settings)
 {
-    return ConnectionTimeouts(settings.connect_timeout_with_failover_ms, settings.send_timeout, settings.receive_timeout, settings.tcp_keep_alive_timeout, 0, settings.connect_timeout_with_failover_secure_ms);
+    return ConnectionTimeouts(
+        settings.connect_timeout_with_failover_ms,
+        settings.send_timeout,
+        settings.receive_timeout,
+        settings.tcp_keep_alive_timeout,
+        0,
+        settings.connect_timeout_with_failover_secure_ms,
+        settings.hedged_connection_timeout,
+        settings.receive_data_timeout);
 }
 
 inline ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Context & context)
diff --git a/src/IO/HexWriteBuffer.cpp b/src/IO/HexWriteBuffer.cpp
deleted file mode 100644
index 4e3403ba74b..00000000000
--- a/src/IO/HexWriteBuffer.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#include <common/types.h>
-#include <Common/hex.h>
-#include <Common/MemoryTracker.h>
-#include <IO/HexWriteBuffer.h>
-
-
-namespace DB
-{
-
-void HexWriteBuffer::nextImpl()
-{
-    if (!offset())
-        return;
-
-    for (Position p = working_buffer.begin(); p != pos; ++p)
-    {
-        UInt8 byte = *p;
-        out.write(hexDigitUppercase(byte / 16));
-        out.write(hexDigitUppercase(byte % 16));
-    }
-}
-
-HexWriteBuffer::~HexWriteBuffer()
-{
-    /// FIXME move final flush into the caller
-    MemoryTracker::LockExceptionInThread lock;
-    nextImpl();
-}
-
-}
diff --git a/src/IO/HexWriteBuffer.h b/src/IO/HexWriteBuffer.h
deleted file mode 100644
index a68dd29065b..00000000000
--- a/src/IO/HexWriteBuffer.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#pragma once
-
-#include <IO/WriteBuffer.h>
-
-
-/// Since HexWriteBuffer is often created in the inner loop, we'll make its buffer size small.
-#define DBMS_HEX_WRITE_BUFFER_SIZE 32
-
-
-namespace DB
-{
-
-/** Everything that is written into it, translates to HEX (in capital letters) and writes to another WriteBuffer.
-  */
-class HexWriteBuffer final : public WriteBuffer
-{
-protected:
-    char buf[DBMS_HEX_WRITE_BUFFER_SIZE]; //-V730
-    WriteBuffer & out;
-
-    void nextImpl() override;
-
-public:
-    HexWriteBuffer(WriteBuffer & out_) : WriteBuffer(buf, sizeof(buf)), out(out_) {}
-    ~HexWriteBuffer() override;
-};
-
-}
diff --git a/src/IO/LZMADeflatingWriteBuffer.cpp b/src/IO/LZMADeflatingWriteBuffer.cpp
index 5803bc1e9f1..96f1d34b01b 100644
--- a/src/IO/LZMADeflatingWriteBuffer.cpp
+++ b/src/IO/LZMADeflatingWriteBuffer.cpp
@@ -64,27 +64,36 @@ void LZMADeflatingWriteBuffer::nextImpl()
     lstr.next_in = reinterpret_cast<unsigned char *>(working_buffer.begin());
     lstr.avail_in = offset();
 
-    lzma_action action = LZMA_RUN;
-    do
+    try
     {
-        out->nextIfAtEnd();
-        lstr.next_out = reinterpret_cast<unsigned char *>(out->position());
-        lstr.avail_out = out->buffer().end() - out->position();
+        lzma_action action = LZMA_RUN;
+        do
+        {
+            out->nextIfAtEnd();
+            lstr.next_out = reinterpret_cast<unsigned char *>(out->position());
+            lstr.avail_out = out->buffer().end() - out->position();
 
-        lzma_ret ret = lzma_code(&lstr, action);
-        out->position() = out->buffer().end() - lstr.avail_out;
+            lzma_ret ret = lzma_code(&lstr, action);
+            out->position() = out->buffer().end() - lstr.avail_out;
 
-        if (ret == LZMA_STREAM_END)
-            return;
+            if (ret == LZMA_STREAM_END)
+                return;
 
-        if (ret != LZMA_OK)
-            throw Exception(
-                ErrorCodes::LZMA_STREAM_ENCODER_FAILED,
-                "lzma stream encoding failed: error code: {}; lzma_version: {}",
-                ret,
-                LZMA_VERSION_STRING);
+            if (ret != LZMA_OK)
+                throw Exception(
+                    ErrorCodes::LZMA_STREAM_ENCODER_FAILED,
+                    "lzma stream encoding failed: error code: {}; lzma_version: {}",
+                    ret,
+                    LZMA_VERSION_STRING);
 
-    } while (lstr.avail_in > 0 || lstr.avail_out == 0);
+        } while (lstr.avail_in > 0 || lstr.avail_out == 0);
+    }
+    catch (...)
+    {
+        /// Do not try to write next time after exception.
+        out->position() = out->buffer().begin();
+        throw;
+    }
 }
 
 
@@ -93,6 +102,23 @@ void LZMADeflatingWriteBuffer::finish()
     if (finished)
         return;
 
+    try
+    {
+        finishImpl();
+        out->next();
+        finished = true;
+    }
+    catch (...)
+    {
+        /// Do not try to flush next time after exception.
+        out->position() = out->buffer().begin();
+        finished = true;
+        throw;
+    }
+}
+
+void LZMADeflatingWriteBuffer::finishImpl()
+{
     next();
 
     do
@@ -106,7 +132,6 @@ void LZMADeflatingWriteBuffer::finish()
 
         if (ret == LZMA_STREAM_END)
         {
-            finished = true;
             return;
         }
 
diff --git a/src/IO/LZMADeflatingWriteBuffer.h b/src/IO/LZMADeflatingWriteBuffer.h
index efa4532d372..98eb1732e76 100644
--- a/src/IO/LZMADeflatingWriteBuffer.h
+++ b/src/IO/LZMADeflatingWriteBuffer.h
@@ -24,13 +24,16 @@ public:
         char * existing_memory = nullptr,
         size_t alignment = 0);
 
-    void finish();
+    void finalize() override { finish(); }
 
     ~LZMADeflatingWriteBuffer() override;
 
 private:
     void nextImpl() override;
 
+    void finish();
+    void finishImpl();
+
     std::unique_ptr<WriteBuffer> out;
     lzma_stream lstr;
     bool finished = false;
diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp
index 59f0dc25667..37896a387bb 100644
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@@ -14,7 +14,6 @@ namespace ProfileEvents
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int NETWORK_ERROR;
@@ -42,7 +41,7 @@ bool ReadBufferFromPocoSocket::nextImpl()
         /// Note that receive timeout is not checked here. External code should check it while polling.
         while (bytes_read < 0 && async_callback && errno == EAGAIN)
         {
-            async_callback(socket);
+            async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), socket_description);
             bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size(), flags);
         }
     }
@@ -74,7 +73,10 @@ bool ReadBufferFromPocoSocket::nextImpl()
 }
 
 ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size)
-    : BufferWithOwnMemory<ReadBuffer>(buf_size), socket(socket_), peer_address(socket.peerAddress())
+    : BufferWithOwnMemory<ReadBuffer>(buf_size)
+    , socket(socket_)
+    , peer_address(socket.peerAddress())
+    , socket_description("socket (" + peer_address.toString() + ")")
 {
 }
 
diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h
index d182d48d1f8..032020c7330 100644
--- a/src/IO/ReadBufferFromPocoSocket.h
+++ b/src/IO/ReadBufferFromPocoSocket.h
@@ -8,6 +8,8 @@
 namespace DB
 {
 
+using AsyncCallback = std::function<void(int, const Poco::Timespan &, const std::string &)>;
+
 /// Works with the ready Poco::Net::Socket. Blocking operations.
 class ReadBufferFromPocoSocket : public BufferWithOwnMemory<ReadBuffer>
 {
@@ -27,10 +29,11 @@ public:
 
     bool poll(size_t timeout_microseconds) const;
 
-    void setAsyncCallback(std::function<void(Poco::Net::Socket &)> async_callback_) { async_callback = std::move(async_callback_); }
+    void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); }
 
 private:
-    std::function<void(Poco::Net::Socket &)> async_callback;
+    AsyncCallback async_callback;
+    std::string socket_description;
 };
 
 }
diff --git a/src/IO/ZlibDeflatingWriteBuffer.cpp b/src/IO/ZlibDeflatingWriteBuffer.cpp
index 4b838ac6d0a..5da82b52279 100644
--- a/src/IO/ZlibDeflatingWriteBuffer.cpp
+++ b/src/IO/ZlibDeflatingWriteBuffer.cpp
@@ -75,19 +75,28 @@ void ZlibDeflatingWriteBuffer::nextImpl()
     zstr.next_in = reinterpret_cast<unsigned char *>(working_buffer.begin());
     zstr.avail_in = offset();
 
-    do
+    try
     {
-        out->nextIfAtEnd();
-        zstr.next_out = reinterpret_cast<unsigned char *>(out->position());
-        zstr.avail_out = out->buffer().end() - out->position();
+        do
+        {
+            out->nextIfAtEnd();
+            zstr.next_out = reinterpret_cast<unsigned char *>(out->position());
+            zstr.avail_out = out->buffer().end() - out->position();
 
-        int rc = deflate(&zstr, Z_NO_FLUSH);
-        out->position() = out->buffer().end() - zstr.avail_out;
+            int rc = deflate(&zstr, Z_NO_FLUSH);
+            out->position() = out->buffer().end() - zstr.avail_out;
 
-        if (rc != Z_OK)
-            throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED);
+            if (rc != Z_OK)
+                throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED);
+        }
+        while (zstr.avail_in > 0 || zstr.avail_out == 0);
+    }
+    catch (...)
+    {
+        /// Do not try to write next time after exception.
+        out->position() = out->buffer().begin();
+        throw;
     }
-    while (zstr.avail_in > 0 || zstr.avail_out == 0);
 }
 
 void ZlibDeflatingWriteBuffer::finish()
@@ -95,6 +104,23 @@ void ZlibDeflatingWriteBuffer::finish()
     if (finished)
         return;
 
+    try
+    {
+        finishImpl();
+        out->next();
+        finished = true;
+    }
+    catch (...)
+    {
+        /// Do not try to flush next time after exception.
+        out->position() = out->buffer().begin();
+        finished = true;
+        throw;
+    }
+}
+
+void ZlibDeflatingWriteBuffer::finishImpl()
+{
     next();
 
     /// https://github.com/zlib-ng/zlib-ng/issues/494
@@ -123,7 +149,6 @@ void ZlibDeflatingWriteBuffer::finish()
 
         if (rc == Z_STREAM_END)
         {
-            finished = true;
             return;
         }
 
diff --git a/src/IO/ZlibDeflatingWriteBuffer.h b/src/IO/ZlibDeflatingWriteBuffer.h
index f9df8f8157b..6f623f55f56 100644
--- a/src/IO/ZlibDeflatingWriteBuffer.h
+++ b/src/IO/ZlibDeflatingWriteBuffer.h
@@ -22,16 +22,19 @@ public:
             char * existing_memory = nullptr,
             size_t alignment = 0);
 
-    /// Flush all pending data and write zlib footer to the underlying buffer.
-    /// After the first call to this function, subsequent calls will have no effect and
-    /// an attempt to write to this buffer will result in exception.
-    void finish();
+    void finalize() override { finish(); }
 
     ~ZlibDeflatingWriteBuffer() override;
 
 private:
     void nextImpl() override;
 
+    void finishImpl();
+    /// Flush all pending data and write zlib footer to the underlying buffer.
+    /// After the first call to this function, subsequent calls will have no effect and
+    /// an attempt to write to this buffer will result in exception.
+    void finish();
+
     std::unique_ptr<WriteBuffer> out;
     z_stream zstr;
     bool finished = false;
diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp
index 9b79d5ae513..27694797db6 100644
--- a/src/IO/ZstdDeflatingWriteBuffer.cpp
+++ b/src/IO/ZstdDeflatingWriteBuffer.cpp
@@ -61,28 +61,53 @@ void ZstdDeflatingWriteBuffer::nextImpl()
     input.size = offset();
     input.pos = 0;
 
-    bool finished = false;
-    do
+    try
     {
-        out->nextIfAtEnd();
+        bool ended = false;
+        do
+        {
+            out->nextIfAtEnd();
 
-        output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
-        output.size = out->buffer().size();
-        output.pos = out->offset();
+            output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
+            output.size = out->buffer().size();
+            output.pos = out->offset();
 
 
-        ZSTD_compressStream2(cctx, &output, &input, mode);
-        out->position() = out->buffer().begin() + output.pos;
-        finished = (input.pos == input.size);
-    } while (!finished);
-
+            ZSTD_compressStream2(cctx, &output, &input, mode);
+            out->position() = out->buffer().begin() + output.pos;
+            ended = (input.pos == input.size);
+        } while (!ended);
+    }
+    catch (...)
+    {
+        /// Do not try to write next time after exception.
+        out->position() = out->buffer().begin();
+        throw;
+    }
 }
 
 void ZstdDeflatingWriteBuffer::finish()
 {
-    if (flushed)
+    if (finished)
         return;
 
+    try
+    {
+        finishImpl();
+        out->next();
+        finished = true;
+    }
+    catch (...)
+    {
+        /// Do not try to flush next time after exception.
+        out->position() = out->buffer().begin();
+        finished = true;
+        throw;
+    }
+}
+
+void ZstdDeflatingWriteBuffer::finishImpl()
+{
     next();
 
     out->nextIfAtEnd();
@@ -99,7 +124,6 @@ void ZstdDeflatingWriteBuffer::finish()
     if (ZSTD_isError(remaining))
         throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder end failed: zstd version: {}", ZSTD_VERSION_STRING);
     out->position() = out->buffer().begin() + output.pos;
-    flushed = true;
 }
 
 }
diff --git a/src/IO/ZstdDeflatingWriteBuffer.h b/src/IO/ZstdDeflatingWriteBuffer.h
index 2c7dd38dbb0..b4ecc44d6f4 100644
--- a/src/IO/ZstdDeflatingWriteBuffer.h
+++ b/src/IO/ZstdDeflatingWriteBuffer.h
@@ -20,21 +20,24 @@ public:
         char * existing_memory = nullptr,
         size_t alignment = 0);
 
-    /// Flush all pending data and write zstd footer to the underlying buffer.
-    /// After the first call to this function, subsequent calls will have no effect and
-    /// an attempt to write to this buffer will result in exception.
-    void finish();
+    void finalize() override { finish(); }
 
     ~ZstdDeflatingWriteBuffer() override;
 
 private:
     void nextImpl() override;
 
+    /// Flush all pending data and write zstd footer to the underlying buffer.
+    /// After the first call to this function, subsequent calls will have no effect and
+    /// an attempt to write to this buffer will result in exception.
+    void finish();
+    void finishImpl();
+
     std::unique_ptr<WriteBuffer> out;
     ZSTD_CCtx * cctx;
     ZSTD_inBuffer input;
     ZSTD_outBuffer output;
-    bool flushed = false;
+    bool finished = false;
 };
 
 }
diff --git a/src/IO/tests/CMakeLists.txt b/src/IO/tests/CMakeLists.txt
index fcd59d94cb0..79800d8339c 100644
--- a/src/IO/tests/CMakeLists.txt
+++ b/src/IO/tests/CMakeLists.txt
@@ -25,9 +25,6 @@ target_link_libraries (var_uint PRIVATE clickhouse_common_io)
 add_executable (read_escaped_string read_escaped_string.cpp)
 target_link_libraries (read_escaped_string PRIVATE clickhouse_common_io)
 
-add_executable (async_write async_write.cpp)
-target_link_libraries (async_write PRIVATE dbms)
-
 add_executable (parse_int_perf parse_int_perf.cpp)
 target_link_libraries (parse_int_perf PRIVATE clickhouse_common_io)
 
diff --git a/src/IO/tests/async_write.cpp b/src/IO/tests/async_write.cpp
deleted file mode 100644
index e3bff7cf341..00000000000
--- a/src/IO/tests/async_write.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-#include <iostream>
-
-#include <IO/ReadBufferFromFileDescriptor.h>
-#include <IO/WriteBufferFromFileDescriptor.h>
-#include <IO/AsynchronousWriteBuffer.h>
-#include <IO/copyData.h>
-#include <Compression/CompressedWriteBuffer.h>
-
-
-int main(int, char **)
-try
-{
-    DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
-    DB::WriteBufferFromFileDescriptor out1(STDOUT_FILENO);
-    DB::AsynchronousWriteBuffer out2(out1);
-    DB::CompressedWriteBuffer out3(out2);
-
-    DB::copyData(in1, out3);
-
-    return 0;
-}
-catch (const DB::Exception & e)
-{
-    std::cerr << e.what() << ", " << e.displayText() << std::endl;
-    return 1;
-}
diff --git a/src/IO/tests/lzma_buffers.cpp b/src/IO/tests/lzma_buffers.cpp
index 7eb6bf8b81c..ff3d518bfab 100644
--- a/src/IO/tests/lzma_buffers.cpp
+++ b/src/IO/tests/lzma_buffers.cpp
@@ -28,7 +28,7 @@ try
             DB::writeIntText(i, lzma_buf);
             DB::writeChar('\t', lzma_buf);
         }
-        lzma_buf.finish();
+        lzma_buf.finalize();
 
         stopwatch.stop();
 
diff --git a/src/IO/tests/parse_int_perf.cpp b/src/IO/tests/parse_int_perf.cpp
index 93f49d80258..e35a3d8a857 100644
--- a/src/IO/tests/parse_int_perf.cpp
+++ b/src/IO/tests/parse_int_perf.cpp
@@ -62,7 +62,6 @@ int main(int argc, char ** argv)
         {
             DB::WriteBufferFromVector wb(formatted);
         //    DB::CompressedWriteBuffer wb2(wb1);
-        //    DB::AsynchronousWriteBuffer wb(wb2);
             Stopwatch watch;
 
             UInt64 tsc = rdtsc();
diff --git a/src/IO/tests/zlib_buffers.cpp b/src/IO/tests/zlib_buffers.cpp
index 3428d5e995a..2068a3e6668 100644
--- a/src/IO/tests/zlib_buffers.cpp
+++ b/src/IO/tests/zlib_buffers.cpp
@@ -30,7 +30,7 @@ try
             DB::writeIntText(i, deflating_buf);
             DB::writeChar('\t', deflating_buf);
         }
-        deflating_buf.finish();
+        deflating_buf.finalize();
 
         stopwatch.stop();
         std::cout << "Writing done. Elapsed: " << stopwatch.elapsedSeconds() << " s."
diff --git a/src/IO/tests/zstd_buffers.cpp b/src/IO/tests/zstd_buffers.cpp
index f269c0b22fd..533229f4878 100644
--- a/src/IO/tests/zstd_buffers.cpp
+++ b/src/IO/tests/zstd_buffers.cpp
@@ -30,7 +30,7 @@ try
             DB::writeIntText(i, zstd_buf);
             DB::writeChar('\t', zstd_buf);
         }
-        zstd_buf.finish();
+        zstd_buf.finalize();
 
         stopwatch.stop();
 
diff --git a/src/IO/ya.make b/src/IO/ya.make
index 980719aa74f..6605cf64277 100644
--- a/src/IO/ya.make
+++ b/src/IO/ya.make
@@ -29,7 +29,6 @@ SRCS(
     HTTPChunkedReadBuffer.cpp
     HTTPCommon.cpp
     HashingWriteBuffer.cpp
-    HexWriteBuffer.cpp
     LZMADeflatingWriteBuffer.cpp
     LZMAInflatingReadBuffer.cpp
     LimitReadBuffer.cpp
diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 12942371d4f..1d3e74ca563 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -6,6 +6,7 @@
 #include <Functions/IFunctionAdaptors.h>
 #include <Functions/FunctionsConversion.h>
 #include <Functions/materialize.h>
+#include <Functions/FunctionsLogical.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ExpressionJIT.h>
 #include <IO/WriteBufferFromString.h>
@@ -80,14 +81,14 @@ ActionsDAG::Node & ActionsDAG::getNode(const std::string & name)
     return **it;
 }
 
-const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type, bool can_replace)
+const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type, bool can_replace, bool add_to_index)
 {
     Node node;
     node.type = ActionType::INPUT;
     node.result_type = std::move(type);
     node.result_name = std::move(name);
 
-    return addNode(std::move(node), can_replace);
+    return addNode(std::move(node), can_replace, add_to_index);
 }
 
 const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool can_replace)
@@ -364,7 +365,7 @@ void ActionsDAG::removeUnusedActions(const std::vector<Node *> & required_nodes)
     removeUnusedActions();
 }
 
-void ActionsDAG::removeUnusedActions()
+void ActionsDAG::removeUnusedActions(bool allow_remove_inputs)
 {
     std::unordered_set<const Node *> visited_nodes;
     std::stack<Node *> stack;
@@ -388,6 +389,9 @@ void ActionsDAG::removeUnusedActions()
             visited_nodes.insert(&node);
             stack.push(&node);
         }
+
+        if (node.type == ActionType::INPUT && !allow_remove_inputs)
+            visited_nodes.insert(&node);
     }
 
     while (!stack.empty())
@@ -516,6 +520,11 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name)
             if (col == child)
                 return false;
 
+    /// Do not remove input if it was mentioned in index several times.
+    for (const auto * node : index)
+        if (col == node)
+            return false;
+
     /// Remove from nodes and inputs.
     for (auto jt = nodes.begin(); jt != nodes.end(); ++jt)
     {
@@ -1203,4 +1212,340 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & co
     return split(split_nodes);
 }
 
+namespace
+{
+
+struct ConjunctionNodes
+{
+    std::vector<ActionsDAG::Node *> allowed;
+    std::vector<ActionsDAG::Node *> rejected;
+};
+
+/// Take a node which result is predicate.
+/// Assuming predicate is a conjunction (probably, trivial).
+/// Find separate conjunctions nodes. Split nodes into allowed and rejected sets.
+/// Allowed predicate is a predicate which can be calculated using only nodes from allowed_nodes set.
+ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordered_set<const ActionsDAG::Node *> allowed_nodes)
+{
+    ConjunctionNodes conjunction;
+    std::unordered_set<ActionsDAG::Node *> allowed;
+    std::unordered_set<ActionsDAG::Node *> rejected;
+
+    struct Frame
+    {
+        ActionsDAG::Node * node;
+        bool is_predicate = false;
+        size_t next_child_to_visit = 0;
+        size_t num_allowed_children = 0;
+    };
+
+    std::stack<Frame> stack;
+    std::unordered_set<ActionsDAG::Node *> visited_nodes;
+
+    stack.push(Frame{.node = predicate, .is_predicate = true});
+    visited_nodes.insert(predicate);
+    while (!stack.empty())
+    {
+        auto & cur = stack.top();
+        bool is_conjunction = cur.is_predicate
+                                && cur.node->type == ActionsDAG::ActionType::FUNCTION
+                                && cur.node->function_base->getName() == "and";
+
+        /// At first, visit all children.
+        while (cur.next_child_to_visit < cur.node->children.size())
+        {
+            auto * child = cur.node->children[cur.next_child_to_visit];
+
+            if (visited_nodes.count(child) == 0)
+            {
+                visited_nodes.insert(child);
+                stack.push({.node = child, .is_predicate = is_conjunction});
+                break;
+            }
+
+            if (allowed_nodes.contains(child))
+                ++cur.num_allowed_children;
+            ++cur.next_child_to_visit;
+        }
+
+        if (cur.next_child_to_visit == cur.node->children.size())
+        {
+            if (cur.num_allowed_children == cur.node->children.size())
+            {
+                if (cur.node->type != ActionsDAG::ActionType::ARRAY_JOIN && cur.node->type != ActionsDAG::ActionType::INPUT)
+                    allowed_nodes.emplace(cur.node);
+            }
+            else if (is_conjunction)
+            {
+                for (auto * child : cur.node->children)
+                {
+                    if (allowed_nodes.count(child))
+                    {
+                        if (allowed.insert(child).second)
+                            conjunction.allowed.push_back(child);
+
+                    }
+                }
+            }
+            else if (cur.is_predicate)
+            {
+                if (rejected.insert(cur.node).second)
+                    conjunction.rejected.push_back(cur.node);
+            }
+
+            stack.pop();
+        }
+    }
+
+    if (conjunction.allowed.empty())
+    {
+        /// If nothing was added to conjunction, check if it is trivial.
+        if (allowed_nodes.count(predicate))
+            conjunction.allowed.push_back(predicate);
+    }
+
+    return conjunction;
+}
+
+ColumnsWithTypeAndName prepareFunctionArguments(const std::vector<ActionsDAG::Node *> nodes)
+{
+    ColumnsWithTypeAndName arguments;
+    arguments.reserve(nodes.size());
+
+    for (const auto * child : nodes)
+    {
+        ColumnWithTypeAndName argument;
+        argument.column = child->column;
+        argument.type = child->result_type;
+        argument.name = child->result_name;
+
+        arguments.emplace_back(std::move(argument));
+    }
+
+    return arguments;
+}
+
+}
+
+/// Create actions which calculate conjunction of selected nodes.
+/// Assume conjunction nodes are predicates (and may be used as arguments of function AND).
+///
+/// Result actions add single column with conjunction result (it is always last in index).
+/// No other columns are added or removed.
+ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunction)
+{
+    if (conjunction.empty())
+        return nullptr;
+
+    auto actions = cloneEmpty();
+    actions->settings.project_input = false;
+
+    FunctionOverloadResolverPtr func_builder_and =
+            std::make_shared<FunctionOverloadResolverAdaptor>(
+                    std::make_unique<DefaultOverloadResolver>(
+                            std::make_shared<FunctionAnd>()));
+
+    std::unordered_map<const ActionsDAG::Node *, ActionsDAG::Node *> nodes_mapping;
+
+    struct Frame
+    {
+        const ActionsDAG::Node * node;
+        size_t next_child_to_visit = 0;
+    };
+
+    std::stack<Frame> stack;
+
+    /// DFS. Clone actions.
+    for (const auto * predicate : conjunction)
+    {
+        if (nodes_mapping.count(predicate))
+            continue;
+
+        stack.push({.node = predicate});
+        while (!stack.empty())
+        {
+            auto & cur = stack.top();
+            /// At first, visit all children.
+            while (cur.next_child_to_visit < cur.node->children.size())
+            {
+                auto * child = cur.node->children[cur.next_child_to_visit];
+
+                if (nodes_mapping.count(child) == 0)
+                {
+                    stack.push({.node = child});
+                    break;
+                }
+
+                ++cur.next_child_to_visit;
+            }
+
+            if (cur.next_child_to_visit == cur.node->children.size())
+            {
+                auto & node = actions->nodes.emplace_back(*cur.node);
+                nodes_mapping[cur.node] = &node;
+
+                for (auto & child : node.children)
+                    child = nodes_mapping[child];
+
+                if (node.type == ActionType::INPUT)
+                {
+                    actions->inputs.emplace_back(&node);
+                    actions->index.insert(&node);
+                }
+
+                stack.pop();
+            }
+        }
+    }
+
+    Node * result_predicate = nodes_mapping[*conjunction.begin()];
+
+    if (conjunction.size() > 1)
+    {
+        std::vector<Node *> args;
+        args.reserve(conjunction.size());
+        for (const auto * predicate : conjunction)
+            args.emplace_back(nodes_mapping[predicate]);
+
+        result_predicate = &actions->addFunction(func_builder_and, args, {}, true, false);
+    }
+
+    actions->index.insert(result_predicate);
+    return actions;
+}
+
+ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, bool can_remove_filter, const Names & available_inputs)
+{
+    Node * predicate;
+
+    {
+        auto it = index.begin();
+        for (; it != index.end(); ++it)
+            if ((*it)->result_name == filter_name)
+                break;
+
+        if (it == index.end())
+            throw Exception(ErrorCodes::LOGICAL_ERROR,
+                            "Index for ActionsDAG does not contain filter column name {}. DAG:\n{}",
+                            filter_name, dumpDAG());
+
+        predicate = *it;
+    }
+
+    std::unordered_set<const Node *> allowed_nodes;
+
+    /// Get input nodes from available_inputs names.
+    {
+        std::unordered_map<std::string_view, std::list<const Node *>> inputs_map;
+        for (const auto & input : inputs)
+            inputs_map[input->result_name].emplace_back(input);
+
+        for (const auto & name : available_inputs)
+        {
+            auto & inputs_list = inputs_map[name];
+            if (inputs_list.empty())
+                continue;
+
+            allowed_nodes.emplace(inputs_list.front());
+            inputs_list.pop_front();
+        }
+    }
+
+    auto conjunction = getConjunctionNodes(predicate, allowed_nodes);
+    auto actions = cloneActionsForConjunction(conjunction.allowed);
+    if (!actions)
+        return nullptr;
+
+    /// Now, when actions are created, update current DAG.
+
+    if (conjunction.rejected.empty())
+    {
+        /// The whole predicate was split.
+        if (can_remove_filter)
+        {
+            /// If filter column is not needed, remove it from index.
+            for (auto i = index.begin(); i != index.end(); ++i)
+            {
+                if (*i == predicate)
+                {
+                    index.remove(i);
+                    break;
+                }
+            }
+        }
+        else
+        {
+            /// Replace predicate result to constant 1.
+            Node node;
+            node.type = ActionType::COLUMN;
+            node.result_name = std::move(predicate->result_name);
+            node.result_type = std::move(predicate->result_type);
+            node.column = node.result_type->createColumnConst(0, 1);
+            *predicate = std::move(node);
+        }
+
+        removeUnusedActions(false);
+    }
+    else
+    {
+        /// Predicate is conjunction, where both allowed and rejected sets are not empty.
+        /// Replace this node to conjunction of rejected predicates.
+
+        std::vector<Node *> new_children(conjunction.rejected.begin(), conjunction.rejected.end());
+
+        if (new_children.size() == 1)
+        {
+            /// Rejected set has only one predicate.
+            if (new_children.front()->result_type->equals(*predicate->result_type))
+            {
+                /// If it's type is same, just add alias.
+                Node node;
+                node.type = ActionType::ALIAS;
+                node.result_name = predicate->result_name;
+                node.result_type = predicate->result_type;
+                node.children.swap(new_children);
+                *predicate = std::move(node);
+            }
+            else
+            {
+                /// If type is different, cast column.
+                /// This case is possible, cause AND can use any numeric type as argument.
+                Node node;
+                node.type = ActionType::COLUMN;
+                node.result_name = predicate->result_type->getName();
+                node.column = DataTypeString().createColumnConst(0, node.result_name);
+                node.result_type = std::make_shared<DataTypeString>();
+
+                auto * right_arg = &nodes.emplace_back(std::move(node));
+                auto * left_arg = new_children.front();
+
+                predicate->children = {left_arg, right_arg};
+                auto arguments = prepareFunctionArguments(predicate->children);
+
+                FunctionOverloadResolverPtr func_builder_cast =
+                        std::make_shared<FunctionOverloadResolverAdaptor>(
+                                CastOverloadResolver<CastType::nonAccurate>::createImpl(false));
+
+                predicate->function_builder = func_builder_cast;
+                predicate->function_base = predicate->function_builder->build(arguments);
+                predicate->function = predicate->function_base->prepare(arguments);
+            }
+        }
+        else
+        {
+            /// Predicate is function AND, which still have more then one argument.
+            /// Just update children and rebuild it.
+            predicate->children.swap(new_children);
+            auto arguments = prepareFunctionArguments(predicate->children);
+
+            predicate->function_base = predicate->function_builder->build(arguments);
+            predicate->function = predicate->function_base->prepare(arguments);
+        }
+
+        removeUnusedActions(false);
+    }
+
+    return actions;
+}
+
 }
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index 3c8778e239a..61a3bea8f40 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -152,6 +152,9 @@ public:
         }
     };
 
+    /// NOTE: std::list is an implementation detail.
+    /// It allows to add and remove new nodes inplace without reallocation.
+    /// Raw pointers to nodes remain valid.
     using Nodes = std::list<Node>;
     using Inputs = std::vector<Node *>;
 
@@ -196,7 +199,7 @@ public:
     std::string dumpNames() const;
     std::string dumpDAG() const;
 
-    const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false);
+    const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false, bool add_to_index = true);
     const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false);
     const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false, bool materialize = false);
     const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false);
@@ -208,6 +211,8 @@ public:
             const Context & context,
             bool can_replace = false);
 
+    void addNodeToIndex(const Node * node) { index.insert(const_cast<Node *>(node)); }
+
     /// Call addAlias several times.
     void addAliases(const NamesWithAliases & aliases);
     /// Add alias actions and remove unused columns from index. Also specify result columns order in index.
@@ -220,7 +225,7 @@ public:
     /// Return true if column was removed from inputs.
     bool removeUnusedResult(const std::string & column_name);
 
-    void projectInput() { settings.project_input = true; }
+    void projectInput(bool project = true) { settings.project_input = project; }
     void removeUnusedActions(const Names & required_names);
 
     bool hasArrayJoin() const;
@@ -278,6 +283,13 @@ public:
     /// Index of initial actions must contain column_name.
     SplitResult splitActionsForFilter(const std::string & column_name) const;
 
+    /// Create actions which may calculate part of filter using only available_inputs.
+    /// If nothing may be calculated, returns nullptr.
+    /// Otherwise, return actions which inputs are from available_inputs.
+    /// Returned actions add single column which may be used for filter.
+    /// Also, replace some nodes of current inputs to constant 1 in case they are filtered.
+    ActionsDAGPtr splitActionsForFilter(const std::string & filter_name, bool can_remove_filter, const Names & available_inputs);
+
 private:
     Node & addNode(Node node, bool can_replace = false, bool add_to_index = true);
     Node & getNode(const std::string & name);
@@ -302,10 +314,12 @@ private:
     }
 
     void removeUnusedActions(const std::vector<Node *> & required_nodes);
-    void removeUnusedActions();
+    void removeUnusedActions(bool allow_remove_inputs = true);
     void addAliases(const NamesWithAliases & aliases, std::vector<Node *> & result_nodes);
 
     void compileFunctions();
+
+    ActionsDAGPtr cloneActionsForConjunction(std::vector<Node *> conjunction);
 };
 
 
diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h
index d2b7beb7d8c..bc471dc3aa4 100644
--- a/src/Interpreters/ClientInfo.h
+++ b/src/Interpreters/ClientInfo.h
@@ -84,6 +84,9 @@ public:
     String http_user_agent;
     String http_referer;
 
+    /// For mysql
+    UInt64 connection_id = 0;
+
     /// Comma separated list of forwarded IP addresses (from X-Forwarded-For for HTTP interface).
     /// It's expected that proxy appends the forwarded address to the end of the list.
     /// The element can be trusted only if you trust the corresponding proxy.
diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index c9c56c96cbe..fb9788e84c4 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -6,7 +6,6 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/parseAddress.h>
 #include <Core/Settings.h>
-#include <IO/HexWriteBuffer.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 #include <Poco/Util/AbstractConfiguration.h>
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 47726e49d50..e0fcc4738ba 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -284,7 +284,7 @@ void SelectStreamFactory::createForShard(
             if (try_results.empty() || local_delay < max_remote_delay)
             {
                 auto plan = createLocalPlan(modified_query_ast, header, context, stage);
-                return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline()));
+                return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef()))));
             }
             else
             {
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 98e4a87fba3..6f27a6bdb7d 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1661,7 +1661,12 @@ void Context::resetZooKeeper() const
 static void reloadZooKeeperIfChangedImpl(const ConfigurationPtr & config, const std::string & config_name, zkutil::ZooKeeperPtr & zk)
 {
     if (!zk || zk->configChanged(*config, config_name))
+    {
+        if (zk)
+            zk->finalize();
+
         zk = std::make_shared<zkutil::ZooKeeper>(*config, config_name);
+    }
 }
 
 void Context::reloadZooKeeperIfChanged(const ConfigurationPtr & config) const
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 18c1f4c80cd..45702599fcf 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -189,7 +189,7 @@ public:
 
     void commit();
 
-    ~ZooKeeperMetadataTransaction() { assert(isExecuted() || std::uncaught_exception()); }
+    ~ZooKeeperMetadataTransaction() { assert(isExecuted() || std::uncaught_exceptions()); }
 };
 
 }
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 67f716c235c..4da0d21791b 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -305,20 +305,26 @@ static void filterAndSortQueueNodes(Strings & all_nodes)
     std::sort(all_nodes.begin(), all_nodes.end());
 }
 
-void DDLWorker::scheduleTasks()
+void DDLWorker::scheduleTasks(bool reinitialized)
 {
     LOG_DEBUG(log, "Scheduling tasks");
     auto zookeeper = tryGetZooKeeper();
 
-    for (auto & task : current_tasks)
+    /// Main thread of DDLWorker was restarted, probably due to lost connection with ZooKeeper.
+    /// We have some unfinished tasks. To avoid duplication of some queries, try to write execution status.
+    if (reinitialized)
     {
-        /// Main thread of DDLWorker was restarted, probably due to lost connection with ZooKeeper.
-        /// We have some unfinished tasks. To avoid duplication of some queries, try to write execution status.
-        bool task_still_exists = zookeeper->exists(task->entry_path);
-        bool status_written = zookeeper->exists(task->getFinishedNodePath());
-        if (task->was_executed && !status_written && task_still_exists)
+        for (auto & task : current_tasks)
         {
-            processTask(*task, zookeeper);
+            if (task->was_executed)
+            {
+                bool task_still_exists = zookeeper->exists(task->entry_path);
+                bool status_written = zookeeper->exists(task->getFinishedNodePath());
+                if (!status_written && task_still_exists)
+                {
+                    processTask(*task, zookeeper);
+                }
+            }
         }
     }
 
@@ -332,19 +338,23 @@ void DDLWorker::scheduleTasks()
     else if (max_tasks_in_queue < queue_nodes.size())
         cleanup_event->set();
 
-    bool server_startup = current_tasks.empty();
+    /// Detect queue start, using:
+    /// - skipped tasks
+    /// - in memory tasks (that are currently active)
     auto begin_node = queue_nodes.begin();
-
-    if (!server_startup)
+    UInt64 last_task_id = 0;
+    if (!current_tasks.empty())
     {
-        /// We will recheck status of last executed tasks. It's useful if main thread was just restarted.
-        auto & min_task = *std::min_element(current_tasks.begin(), current_tasks.end());
-        String min_entry_name = last_skipped_entry_name ? std::min(min_task->entry_name, *last_skipped_entry_name) : min_task->entry_name;
-        begin_node = std::upper_bound(queue_nodes.begin(), queue_nodes.end(), min_entry_name);
-        current_tasks.clear();
+        auto & last_task = current_tasks.back();
+        last_task_id = DDLTaskBase::getLogEntryNumber(last_task->entry_name);
+        begin_node = std::upper_bound(queue_nodes.begin(), queue_nodes.end(), last_task->entry_name);
+    }
+    if (last_skipped_entry_name)
+    {
+        UInt64 last_skipped_entry_id = DDLTaskBase::getLogEntryNumber(*last_skipped_entry_name);
+        if (last_skipped_entry_id > last_task_id)
+            begin_node = std::upper_bound(queue_nodes.begin(), queue_nodes.end(), *last_skipped_entry_name);
     }
-
-    assert(current_tasks.empty());
 
     for (auto it = begin_node; it != queue_nodes.end() && !stop_flag; ++it)
     {
@@ -365,7 +375,7 @@ void DDLWorker::scheduleTasks()
 
         if (worker_pool)
         {
-            worker_pool->scheduleOrThrowOnError([this, &saved_task, &zookeeper]()
+            worker_pool->scheduleOrThrowOnError([this, &saved_task, zookeeper]()
             {
                 setThreadName("DDLWorkerExec");
                 processTask(saved_task, zookeeper);
@@ -930,11 +940,11 @@ String DDLWorker::enqueueQuery(DDLLogEntry & entry)
 }
 
 
-void DDLWorker::initializeMainThread()
+bool DDLWorker::initializeMainThread()
 {
     assert(!initialized);
     setThreadName("DDLWorker");
-    LOG_DEBUG(log, "Started DDLWorker thread");
+    LOG_DEBUG(log, "Initializing DDLWorker thread");
 
     while (!stop_flag)
     {
@@ -943,7 +953,7 @@ void DDLWorker::initializeMainThread()
             auto zookeeper = getAndSetZooKeeper();
             zookeeper->createAncestors(fs::path(queue_dir) / "");
             initialized = true;
-            return;
+            return true;
         }
         catch (const Coordination::Exception & e)
         {
@@ -964,6 +974,8 @@ void DDLWorker::initializeMainThread()
         /// Avoid busy loop when ZooKeeper is not available.
         sleepForSeconds(5);
     }
+
+    return false;
 }
 
 void DDLWorker::runMainThread()
@@ -989,15 +1001,19 @@ void DDLWorker::runMainThread()
     {
         try
         {
+            bool reinitialized = !initialized;
+
             /// Reinitialize DDLWorker state (including ZooKeeper connection) if required
             if (!initialized)
             {
-                initializeMainThread();
+                /// Stopped
+                if (!initializeMainThread())
+                    break;
                 LOG_DEBUG(log, "Initialized DDLWorker thread");
             }
 
             cleanup_event->set();
-            scheduleTasks();
+            scheduleTasks(reinitialized);
 
             LOG_DEBUG(log, "Waiting for queue updates");
             queue_updated_event->wait();
@@ -1007,6 +1023,9 @@ void DDLWorker::runMainThread()
             if (Coordination::isHardwareError(e.code))
             {
                 initialized = false;
+                /// Wait for pending async tasks
+                if (1 < pool_size)
+                    worker_pool = std::make_unique<ThreadPool>(pool_size);
                 LOG_INFO(log, "Lost ZooKeeper connection, will try to connect again: {}", getCurrentExceptionMessage(true));
             }
             else
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 8b0a8f038a0..0ef7456430f 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -69,7 +69,7 @@ protected:
     ZooKeeperPtr getAndSetZooKeeper();
 
     /// Iterates through queue tasks in ZooKeeper, runs execution of new tasks
-    void scheduleTasks();
+    void scheduleTasks(bool reinitialized);
 
     DDLTaskBase & saveTask(DDLTaskPtr && task);
 
@@ -104,7 +104,8 @@ protected:
     /// Init task node
     void createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper);
 
-    virtual void initializeMainThread();
+    /// Return false if the worker was stopped (stop_flag = true)
+    virtual bool initializeMainThread();
 
     void runMainThread();
     void runCleanupThread();
diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
index e6061aabe94..7ee7bb1f301 100644
--- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
+++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
@@ -21,7 +21,7 @@
 
 #include <IO/WriteHelpers.h>
 
-#include <Processors/Executors/PullingPipelineExecutor.h>
+#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
 
 namespace DB
 {
@@ -122,8 +122,10 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
 
             try
             {
-                PullingPipelineExecutor executor(io.pipeline);
-                if (!executor.pull(block))
+                PullingAsyncPipelineExecutor executor(io.pipeline);
+                while (block.rows() == 0 && executor.pull(block));
+
+                if (block.rows() == 0)
                 {
                     /// Interpret subquery with empty result as Null literal
                     auto ast_new = std::make_unique<ASTLiteral>(Null());
@@ -132,7 +134,13 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
                     return;
                 }
 
-                if (block.rows() != 1 || executor.pull(block))
+                if (block.rows() != 1)
+                    throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY);
+
+                Block tmp_block;
+                while (tmp_block.rows() == 0 && executor.pull(tmp_block));
+
+                if (tmp_block.rows() != 0)
                     throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY);
             }
             catch (const Exception & e)
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 660718549b3..2e2c3354d4c 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -54,7 +54,7 @@
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromString.h>
 
-#include <Processors/Executors/PullingPipelineExecutor.h>
+#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
 #include <Parsers/formatAST.h>
 
 namespace DB
@@ -320,7 +320,7 @@ void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr
 
     auto interpreter_subquery = interpretSubquery(subquery_or_table_name, context, {}, query_options);
     auto io = interpreter_subquery->execute();
-    PullingPipelineExecutor executor(io.pipeline);
+    PullingAsyncPipelineExecutor executor(io.pipeline);
 
     SetPtr set = std::make_shared<Set>(settings.size_limits_for_set, true, context.getSettingsRef().transform_null_in);
     set->setHeader(executor.getHeader());
@@ -328,6 +328,9 @@ void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr
     Block block;
     while (executor.pull(block))
     {
+        if (block.rows() == 0)
+            continue;
+
         /// If the limits have been exceeded, give up and let the default subquery processing actions take place.
         if (!set->insertFromBlock(block))
             return;
@@ -739,7 +742,7 @@ static JoinPtr tryGetStorageJoin(std::shared_ptr<TableJoin> analyzed_join)
 {
     if (auto * table = analyzed_join->joined_storage.get())
         if (auto * storage_join = dynamic_cast<StorageJoin *>(table))
-            return storage_join->getJoin(analyzed_join);
+            return storage_join->getJoinLocked(analyzed_join);
     return {};
 }
 
@@ -855,6 +858,10 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
     if (!select_query->prewhere())
         return prewhere_actions;
 
+    Names first_action_names;
+    if (!chain.steps.empty())
+        first_action_names = chain.steps.front()->getRequiredColumns().getNames();
+
     auto & step = chain.lastStep(sourceColumns());
     getRootActions(select_query->prewhere(), only_types, step.actions());
     String prewhere_column_name = select_query->prewhere()->getColumnName();
@@ -879,6 +886,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
         auto tmp_actions = std::make_shared<ExpressionActions>(tmp_actions_dag);
         auto required_columns = tmp_actions->getRequiredColumns();
         NameSet required_source_columns(required_columns.begin(), required_columns.end());
+        required_source_columns.insert(first_action_names.begin(), first_action_names.end());
 
         /// Add required columns to required output in order not to remove them after prewhere execution.
         /// TODO: add sampling and final execution to common chain.
@@ -1337,7 +1345,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
         bool first_stage_,
         bool second_stage_,
         bool only_types,
-        const FilterInfoPtr & filter_info_,
+        const FilterDAGInfoPtr & filter_info_,
         const Block & source_header)
     : first_stage(first_stage_)
     , second_stage(second_stage_)
@@ -1400,7 +1408,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
         if (storage && filter_info_)
         {
             filter_info = filter_info_;
-            query_analyzer.appendPreliminaryFilter(chain, filter_info->actions_dag, filter_info->column_name);
+            filter_info->do_remove_column = true;
         }
 
         if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
@@ -1567,10 +1575,13 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
 
 void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, size_t where_step_num)
 {
+    size_t next_step_i = 0;
+
     if (hasPrewhere())
     {
-        const ExpressionActionsChain::Step & step = *chain.steps.at(0);
+        const ExpressionActionsChain::Step & step = *chain.steps.at(next_step_i++);
         prewhere_info->remove_prewhere_column = step.can_remove_required_output.at(0);
+        prewhere_info->prewhere_actions->projectInput(false);
 
         NameSet columns_to_remove;
         for (size_t i = 1; i < step.required_output.size(); ++i)
@@ -1581,19 +1592,16 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
 
         columns_to_remove_after_prewhere = std::move(columns_to_remove);
     }
-    else if (hasFilter())
-    {
-        /// Can't have prewhere and filter set simultaneously
-        filter_info->do_remove_column = chain.steps.at(0)->can_remove_required_output.at(0);
-    }
+
     if (hasWhere())
-        remove_where_filter = chain.steps.at(where_step_num)->can_remove_required_output.at(0);
+    {
+        const ExpressionActionsChain::Step & step = *chain.steps.at(where_step_num);
+        remove_where_filter = step.can_remove_required_output.at(0);
+    }
 }
 
 void ExpressionAnalysisResult::removeExtraColumns() const
 {
-    if (hasFilter())
-        filter_info->actions_dag->projectInput();
     if (hasWhere())
         before_where->projectInput();
     if (hasHaving())
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index 319be9c1409..dc7afb183fc 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -216,7 +216,7 @@ struct ExpressionAnalysisResult
     NameSet columns_to_remove_after_prewhere;
 
     PrewhereDAGInfoPtr prewhere_info;
-    FilterInfoPtr filter_info;
+    FilterDAGInfoPtr filter_info;
     ConstantFilterDescription prewhere_constant_filter_description;
     ConstantFilterDescription where_constant_filter_description;
     /// Actions by every element of ORDER BY
@@ -231,7 +231,7 @@ struct ExpressionAnalysisResult
         bool first_stage,
         bool second_stage,
         bool only_types,
-        const FilterInfoPtr & filter_info,
+        const FilterDAGInfoPtr & filter_info,
         const Block & source_header);
 
     /// Filter for row-level security.
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 5c50b53e2ca..fcd89aed84d 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -421,25 +421,12 @@ bool HashJoin::empty() const
     return data->type == Type::EMPTY;
 }
 
-size_t HashJoin::getTotalByteCount() const
-{
-    std::shared_lock lock(data->rwlock);
-    return getTotalByteCountLocked();
-}
-
-size_t HashJoin::getTotalRowCount() const
-{
-    std::shared_lock lock(data->rwlock);
-    return getTotalRowCountLocked();
-}
-
 bool HashJoin::alwaysReturnsEmptySet() const
 {
-    std::shared_lock lock(data->rwlock);
     return isInnerOrRight(getKind()) && data->empty && !overDictionary();
 }
 
-size_t HashJoin::getTotalRowCountLocked() const
+size_t HashJoin::getTotalRowCount() const
 {
     size_t res = 0;
 
@@ -456,7 +443,7 @@ size_t HashJoin::getTotalRowCountLocked() const
     return res;
 }
 
-size_t HashJoin::getTotalByteCountLocked() const
+size_t HashJoin::getTotalByteCount() const
 {
     size_t res = 0;
 
@@ -652,7 +639,9 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
     size_t total_bytes = 0;
 
     {
-        std::unique_lock lock(data->rwlock);
+        if (storage_join_lock.mutex())
+            throw DB::Exception("addJoinedBlock called when HashJoin locked to prevent updates",
+                                ErrorCodes::LOGICAL_ERROR);
 
         data->blocks.emplace_back(std::move(structured_block));
         Block * stored_block = &data->blocks.back();
@@ -677,8 +666,8 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
             return true;
 
         /// TODO: Do not calculate them every time
-        total_rows = getTotalRowCountLocked();
-        total_bytes = getTotalByteCountLocked();
+        total_rows = getTotalRowCount();
+        total_bytes = getTotalByteCount();
     }
 
     return table_join->sizeLimits().check(total_rows, total_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
@@ -1216,11 +1205,8 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed)
     block = block.cloneWithColumns(std::move(dst_columns));
 }
 
-
 DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const
 {
-    std::shared_lock lock(data->rwlock);
-
     size_t num_keys = data_types.size();
     if (right_table_keys.columns() != num_keys)
         throw Exception(
@@ -1232,8 +1218,8 @@ DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types,
     {
         const auto & left_type_origin = data_types[i];
         const auto & [c2, right_type_origin, right_name] = right_table_keys.safeGetByPosition(i);
-        auto left_type = removeNullable(left_type_origin);
-        auto right_type = removeNullable(right_type_origin);
+        auto left_type = removeNullable(recursiveRemoveLowCardinality(left_type_origin));
+        auto right_type = removeNullable(recursiveRemoveLowCardinality(right_type_origin));
         if (!left_type->equals(*right_type))
             throw Exception(
                 "Type mismatch in joinGet key " + toString(i) + ": found type " + left_type->getName() + ", while the needed type is "
@@ -1250,11 +1236,16 @@ DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types,
     return elem.type;
 }
 
-
-template <typename Maps>
-ColumnWithTypeAndName HashJoin::joinGetImpl(const Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const
+/// TODO: return multiple columns as named tuple
+/// TODO: return array of values when strictness == ASTTableJoin::Strictness::All
+ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const
 {
-    // Assemble the key block with correct names.
+    bool is_valid = (strictness == ASTTableJoin::Strictness::Any || strictness == ASTTableJoin::Strictness::RightAny)
+        && kind == ASTTableJoin::Kind::Left;
+    if (!is_valid)
+        throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN);
+
+    /// Assemble the key block with correct names.
     Block keys;
     for (size_t i = 0; i < block.columns(); ++i)
     {
@@ -1263,32 +1254,15 @@ ColumnWithTypeAndName HashJoin::joinGetImpl(const Block & block, const Block & b
         keys.insert(std::move(key));
     }
 
+    static_assert(!MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Any>::flagged,
+                  "joinGet are not protected from hash table changes between block processing");
     joinBlockImpl<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Any>(
-        keys, key_names_right, block_with_columns_to_add, maps_);
+        keys, key_names_right, block_with_columns_to_add, std::get<MapsOne>(data->maps));
     return keys.getByPosition(keys.columns() - 1);
 }
 
-
-// TODO: return multiple columns as named tuple
-// TODO: return array of values when strictness == ASTTableJoin::Strictness::All
-ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const
-{
-    std::shared_lock lock(data->rwlock);
-
-    if ((strictness == ASTTableJoin::Strictness::Any || strictness == ASTTableJoin::Strictness::RightAny) &&
-        kind == ASTTableJoin::Kind::Left)
-    {
-        return joinGetImpl(block, block_with_columns_to_add, std::get<MapsOne>(data->maps));
-    }
-    else
-        throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN);
-}
-
-
 void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed)
 {
-    std::shared_lock lock(data->rwlock);
-
     const Names & key_names_left = table_join->keyNamesLeft();
     JoinCommon::checkTypesOfKeys(block, key_names_left, right_table_keys, key_names_right);
 
@@ -1337,7 +1311,7 @@ void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed)
 
 void HashJoin::joinTotals(Block & block) const
 {
-    JoinCommon::joinTotals(totals, sample_block_with_columns_to_add, key_names_right, block);
+    JoinCommon::joinTotals(totals, sample_block_with_columns_to_add, *table_join, block);
 }
 
 
diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h
index 06ce7559f31..b726de44f3a 100644
--- a/src/Interpreters/HashJoin.h
+++ b/src/Interpreters/HashJoin.h
@@ -306,10 +306,6 @@ public:
 
     struct RightTableData
     {
-        /// Protect state for concurrent use in insertFromBlock and joinBlock.
-        /// @note that these methods could be called simultaneously only while use of StorageJoin.
-        mutable std::shared_mutex rwlock;
-
         Type type = Type::EMPTY;
         bool empty = true;
 
@@ -322,6 +318,13 @@ public:
         Arena pool;
     };
 
+    /// We keep correspondence between used_flags and hash table internal buffer.
+    /// Hash table cannot be modified during HashJoin lifetime and must be protected with lock.
+    void setLock(std::shared_mutex & rwlock)
+    {
+        storage_join_lock = std::shared_lock<std::shared_mutex>(rwlock);
+    }
+
     void reuseJoinedData(const HashJoin & join);
 
     std::shared_ptr<RightTableData> getJoinedData() const
@@ -353,6 +356,8 @@ private:
     /// Flags that indicate that particular row already used in join.
     /// Flag is stored for every record in hash map.
     /// Number of this flags equals to hashtable buffer size (plus one for zero value).
+    /// Changes in hash table broke correspondence,
+    /// so we must guarantee constantness of hash table during HashJoin lifetime (using method setLock)
     mutable JoinStuff::JoinUsedFlags used_flags;
     Sizes key_sizes;
 
@@ -371,6 +376,10 @@ private:
 
     Block totals;
 
+    /// Should be set via setLock to protect hash table from modification from StorageJoin
+    /// If set HashJoin instance is not available for modification (addJoinedBlock)
+    std::shared_lock<std::shared_mutex> storage_join_lock;
+
     void init(Type type_);
 
     const Block & savedBlockSample() const { return data->sample_block; }
@@ -388,15 +397,8 @@ private:
 
     void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const;
 
-    template <typename Maps>
-    ColumnWithTypeAndName joinGetImpl(const Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const;
-
     static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes);
 
-    /// Call with already locked rwlock.
-    size_t getTotalRowCountLocked() const;
-    size_t getTotalByteCountLocked() const;
-
     bool empty() const;
     bool overDictionary() const;
 };
diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h
index 3d313adcb6c..ade6eaa0cc9 100644
--- a/src/Interpreters/IJoin.h
+++ b/src/Interpreters/IJoin.h
@@ -28,7 +28,9 @@ public:
     virtual void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) = 0;
 
     virtual bool hasTotals() const = 0;
+    /// Set totals for right table
     virtual void setTotals(const Block & block) = 0;
+    /// Add totals to block from left table
     virtual void joinTotals(Block & block) const = 0;
 
     virtual size_t getTotalRowCount() const = 0;
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index bf624507574..37eaecf9a90 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -104,6 +104,7 @@ BlockIO InterpreterAlterQuery::execute()
 
     if (!mutation_commands.empty())
     {
+        table->checkMutationIsPossible(mutation_commands, context.getSettingsRef());
         MutationsInterpreter(table, metadata_snapshot, mutation_commands, context, false).validate();
         table->mutate(mutation_commands, context);
     }
@@ -136,7 +137,7 @@ BlockIO InterpreterAlterQuery::execute()
         StorageInMemoryMetadata metadata = table->getInMemoryMetadata();
         alter_commands.validate(metadata, context);
         alter_commands.prepare(metadata);
-        table->checkAlterIsPossible(alter_commands, context.getSettingsRef());
+        table->checkAlterIsPossible(alter_commands, context);
         table->alter(alter_commands, context, alter_lock);
     }
 
diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp
index b13350d7ba2..5135e40e4dd 100644
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@@ -117,7 +117,7 @@ struct QueryPlanSettings
 {
     QueryPlan::ExplainPlanOptions query_plan_options;
 
-    /// Apply query plan optimisations.
+    /// Apply query plan optimizations.
     bool optimize = true;
 
     constexpr static char name[] = "PLAN";
@@ -251,7 +251,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
         interpreter.buildQueryPlan(plan);
 
         if (settings.optimize)
-            plan.optimize();
+            plan.optimize(QueryPlanOptimizationSettings(context.getSettingsRef()));
 
         plan.explainPlan(buf, settings.query_plan_options);
     }
@@ -265,7 +265,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
 
         InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), context, SelectQueryOptions());
         interpreter.buildQueryPlan(plan);
-        auto pipeline = plan.buildQueryPipeline();
+        auto pipeline = plan.buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef()));
 
         if (settings.graph)
         {
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index da6ad7ab102..38b0d0cb0c9 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -104,8 +104,7 @@ namespace ErrorCodes
 }
 
 /// Assumes `storage` is set and the table filter (row-level security) is not empty.
-String InterpreterSelectQuery::generateFilterActions(
-    ActionsDAGPtr & actions, const ASTPtr & row_policy_filter, const Names & prerequisite_columns) const
+String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, const Names & prerequisite_columns) const
 {
     const auto & db_name = table_id.getDatabaseName();
     const auto & table_name = table_id.getTableName();
@@ -142,7 +141,15 @@ String InterpreterSelectQuery::generateFilterActions(
     SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, *context, metadata_snapshot);
     actions = analyzer.simpleSelectActions();
 
-    return expr_list->children.at(0)->getColumnName();
+    auto column_name = expr_list->children.at(0)->getColumnName();
+    actions->removeUnusedActions({column_name});
+    actions->projectInput(false);
+
+    ActionsDAG::Index index;
+    for (const auto * node : actions->getInputs())
+        actions->addNodeToIndex(node);
+
+    return column_name;
 }
 
 InterpreterSelectQuery::InterpreterSelectQuery(
@@ -352,7 +359,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     ASTSelectQuery & query = getSelectQuery();
     std::shared_ptr<TableJoin> table_join = joined_tables.makeTableJoin(query);
 
-    ASTPtr row_policy_filter;
     if (storage)
         row_policy_filter = context->getRowPolicyCondition(table_id.getDatabaseName(), table_id.getTableName(), RowPolicy::SELECT_FILTER);
 
@@ -386,7 +392,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
             view = nullptr;
         }
 
-        if (try_move_to_prewhere && storage && !row_policy_filter && query.where() && !query.prewhere() && !query.final())
+        if (try_move_to_prewhere && storage && query.where() && !query.prewhere() && !query.final())
         {
             /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
             if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
@@ -446,20 +452,23 @@ InterpreterSelectQuery::InterpreterSelectQuery(
 
         if (storage)
         {
-            source_header = metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals(), storage->getStorageID());
-
             /// Fix source_header for filter actions.
             if (row_policy_filter)
             {
-                filter_info = std::make_shared<FilterInfo>();
-                filter_info->column_name = generateFilterActions(filter_info->actions_dag, row_policy_filter, required_columns);
-                source_header = metadata_snapshot->getSampleBlockForColumns(
-                        filter_info->actions_dag->getRequiredColumns().getNames(), storage->getVirtuals(), storage->getStorageID());
-            }
-        }
+                filter_info = std::make_shared<FilterDAGInfo>();
+                filter_info->column_name = generateFilterActions(filter_info->actions, required_columns);
 
-        if (!options.only_analyze && storage && filter_info && query.prewhere())
-            throw Exception("PREWHERE is not supported if the table is filtered by row-level security expression", ErrorCodes::ILLEGAL_PREWHERE);
+                auto required_columns_from_filter = filter_info->actions->getRequiredColumns();
+
+                for (const auto & column : required_columns_from_filter)
+                {
+                    if (required_columns.end() == std::find(required_columns.begin(), required_columns.end(), column.name))
+                        required_columns.push_back(column.name);
+                }
+            }
+
+            source_header = metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals(), storage->getStorageID());
+        }
 
         /// Calculate structure of the result.
         result_header = getSampleBlockImpl();
@@ -548,7 +557,7 @@ BlockIO InterpreterSelectQuery::execute()
 
     buildQueryPlan(query_plan);
 
-    res.pipeline = std::move(*query_plan.buildQueryPipeline());
+    res.pipeline = std::move(*query_plan.buildQueryPipeline(QueryPlanOptimizationSettings(context->getSettingsRef())));
     return res;
 }
 
@@ -838,13 +847,64 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
     bool to_aggregation_stage = false;
     bool from_aggregation_stage = false;
 
+    if (expressions.filter_info)
+    {
+        if (!expressions.prewhere_info)
+        {
+            const bool does_storage_support_prewhere = !input && !input_pipe && storage && storage->supportsPrewhere();
+            if (does_storage_support_prewhere && settings.optimize_move_to_prewhere)
+            {
+                /// Execute row level filter in prewhere as a part of "move to prewhere" optimization.
+                expressions.prewhere_info = std::make_shared<PrewhereDAGInfo>(
+                    std::move(expressions.filter_info->actions),
+                    std::move(expressions.filter_info->column_name));
+                expressions.prewhere_info->prewhere_actions->projectInput(false);
+                expressions.prewhere_info->remove_prewhere_column = expressions.filter_info->do_remove_column;
+                expressions.prewhere_info->need_filter = true;
+                expressions.filter_info = nullptr;
+            }
+        }
+        else
+        {
+            /// Add row level security actions to prewhere.
+            expressions.prewhere_info->row_level_filter_actions = std::move(expressions.filter_info->actions);
+            expressions.prewhere_info->row_level_column_name = std::move(expressions.filter_info->column_name);
+            expressions.prewhere_info->row_level_filter_actions->projectInput(false);
+            expressions.filter_info = nullptr;
+        }
+    }
+
     if (options.only_analyze)
     {
         auto read_nothing = std::make_unique<ReadNothingStep>(source_header);
         query_plan.addStep(std::move(read_nothing));
 
+        if (expressions.filter_info)
+        {
+            auto row_level_security_step = std::make_unique<FilterStep>(
+                query_plan.getCurrentDataStream(),
+                expressions.filter_info->actions,
+                expressions.filter_info->column_name,
+                expressions.filter_info->do_remove_column);
+
+            row_level_security_step->setStepDescription("Row-level security filter");
+            query_plan.addStep(std::move(row_level_security_step));
+        }
+
         if (expressions.prewhere_info)
         {
+            if (expressions.prewhere_info->row_level_filter_actions)
+            {
+                auto row_level_filter_step = std::make_unique<FilterStep>(
+                    query_plan.getCurrentDataStream(),
+                    expressions.prewhere_info->row_level_filter_actions,
+                    expressions.prewhere_info->row_level_column_name,
+                    false);
+
+                row_level_filter_step->setStepDescription("Row-level security filter (PREWHERE)");
+                query_plan.addStep(std::move(row_level_filter_step));
+            }
+
             auto prewhere_step = std::make_unique<FilterStep>(
                     query_plan.getCurrentDataStream(),
                     expressions.prewhere_info->prewhere_actions,
@@ -893,11 +953,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
         if (options.to_stage == QueryProcessingStage::WithMergeableStateAfterAggregation)
             to_aggregation_stage = true;
 
-        if (storage && expressions.filter_info && expressions.prewhere_info)
-            throw Exception("PREWHERE is not supported if the table is filtered by row-level security expression", ErrorCodes::ILLEGAL_PREWHERE);
-
-        /** Read the data from Storage. from_stage - to what stage the request was completed in Storage. */
-        executeFetchColumns(from_stage, query_plan, expressions.prewhere_info, expressions.columns_to_remove_after_prewhere);
+        /// Read the data from Storage. from_stage - to what stage the request was completed in Storage.
+        executeFetchColumns(from_stage, query_plan);
 
         LOG_TRACE(log, "{} -> {}", QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(options.to_stage));
     }
@@ -962,11 +1019,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
 
         if (expressions.first_stage)
         {
-            if (expressions.hasFilter())
+            if (expressions.filter_info)
             {
                 auto row_level_security_step = std::make_unique<FilterStep>(
                         query_plan.getCurrentDataStream(),
-                        expressions.filter_info->actions_dag,
+                        expressions.filter_info->actions,
                         expressions.filter_info->column_name,
                         expressions.filter_info->do_remove_column);
 
@@ -1225,11 +1282,27 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c
 
     if (query_info.prewhere_info)
     {
-        if (query_info.prewhere_info->alias_actions)
+        auto & prewhere_info = *query_info.prewhere_info;
+
+        if (prewhere_info.alias_actions)
         {
             pipe.addSimpleTransform([&](const Block & header)
             {
-                return std::make_shared<ExpressionTransform>(header, query_info.prewhere_info->alias_actions);
+                return std::make_shared<ExpressionTransform>(
+                    header,
+                    prewhere_info.alias_actions);
+            });
+        }
+
+        if (prewhere_info.row_level_filter)
+        {
+            pipe.addSimpleTransform([&](const Block & header)
+            {
+                return std::make_shared<FilterTransform>(
+                    header,
+                    prewhere_info.row_level_filter,
+                    prewhere_info.row_level_column_name,
+                    true);
             });
         }
 
@@ -1237,21 +1310,22 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c
         {
             return std::make_shared<FilterTransform>(
                 header,
-                query_info.prewhere_info->prewhere_actions,
-                query_info.prewhere_info->prewhere_column_name,
-                query_info.prewhere_info->remove_prewhere_column);
+                prewhere_info.prewhere_actions,
+                prewhere_info.prewhere_column_name,
+                prewhere_info.remove_prewhere_column);
         });
 
         // To remove additional columns
         // In some cases, we did not read any marks so that the pipeline.streams is empty
         // Thus, some columns in prewhere are not removed as expected
         // This leads to mismatched header in distributed table
-        if (query_info.prewhere_info->remove_columns_actions)
+        if (prewhere_info.remove_columns_actions)
         {
             pipe.addSimpleTransform([&](const Block & header)
             {
                 return std::make_shared<ExpressionTransform>(
-                        header, query_info.prewhere_info->remove_columns_actions);
+                    header,
+                    prewhere_info.remove_columns_actions);
             });
         }
     }
@@ -1261,12 +1335,13 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c
     query_plan.addStep(std::move(read_from_pipe));
 }
 
-void InterpreterSelectQuery::executeFetchColumns(
-    QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan,
-    const PrewhereDAGInfoPtr & prewhere_info, const NameSet & columns_to_remove_after_prewhere)
+void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan)
 {
     auto & query = getSelectQuery();
     const Settings & settings = context->getSettingsRef();
+    auto & expressions = analysis_result;
+    auto & prewhere_info = expressions.prewhere_info;
+    auto & columns_to_remove_after_prewhere = expressions.columns_to_remove_after_prewhere;
 
     /// Optimization for trivial query like SELECT count() FROM table.
     bool optimize_trivial_count =
@@ -1274,7 +1349,7 @@ void InterpreterSelectQuery::executeFetchColumns(
         && (settings.max_parallel_replicas <= 1)
         && storage
         && storage->getName() != "MaterializeMySQL"
-        && !filter_info
+        && !row_policy_filter
         && processing_stage == QueryProcessingStage::FetchColumns
         && query_analyzer->hasAggregation()
         && (query_analyzer->aggregates().size() == 1)
@@ -1340,22 +1415,6 @@ void InterpreterSelectQuery::executeFetchColumns(
 
     if (storage)
     {
-        /// Append columns from the table filter to required
-        auto row_policy_filter = context->getRowPolicyCondition(table_id.getDatabaseName(), table_id.getTableName(), RowPolicy::SELECT_FILTER);
-        if (row_policy_filter)
-        {
-            auto initial_required_columns = required_columns;
-            ActionsDAGPtr actions_dag;
-            generateFilterActions(actions_dag, row_policy_filter, initial_required_columns);
-            auto required_columns_from_filter = actions_dag->getRequiredColumns();
-
-            for (const auto & column : required_columns_from_filter)
-            {
-                if (required_columns.end() == std::find(required_columns.begin(), required_columns.end(), column.name))
-                    required_columns.push_back(column.name);
-            }
-        }
-
         /// Detect, if ALIAS columns are required for query execution
         auto alias_columns_required = false;
         const ColumnsDescription & storage_columns = metadata_snapshot->getColumns();
@@ -1386,6 +1445,12 @@ void InterpreterSelectQuery::executeFetchColumns(
                 /// Get some columns directly from PREWHERE expression actions
                 auto prewhere_required_columns = prewhere_info->prewhere_actions->getRequiredColumns().getNames();
                 required_columns_from_prewhere.insert(prewhere_required_columns.begin(), prewhere_required_columns.end());
+
+                if (prewhere_info->row_level_filter_actions)
+                {
+                    auto row_level_required_columns = prewhere_info->row_level_filter_actions->getRequiredColumns().getNames();
+                    required_columns_from_prewhere.insert(row_level_required_columns.begin(), row_level_required_columns.end());
+                }
             }
 
             /// Expression, that contains all raw required columns
@@ -1593,16 +1658,20 @@ void InterpreterSelectQuery::executeFetchColumns(
 
         if (prewhere_info)
         {
-            query_info.prewhere_info = std::make_shared<PrewhereInfo>(
-                    std::make_shared<ExpressionActions>(prewhere_info->prewhere_actions),
-                    prewhere_info->prewhere_column_name);
+            query_info.prewhere_info = std::make_shared<PrewhereInfo>();
 
+            query_info.prewhere_info->prewhere_actions = std::make_shared<ExpressionActions>(prewhere_info->prewhere_actions);
+
+            if (prewhere_info->row_level_filter_actions)
+                query_info.prewhere_info->row_level_filter = std::make_shared<ExpressionActions>(prewhere_info->row_level_filter_actions);
             if (prewhere_info->alias_actions)
                 query_info.prewhere_info->alias_actions = std::make_shared<ExpressionActions>(prewhere_info->alias_actions);
             if (prewhere_info->remove_columns_actions)
                 query_info.prewhere_info->remove_columns_actions = std::make_shared<ExpressionActions>(prewhere_info->remove_columns_actions);
 
+            query_info.prewhere_info->prewhere_column_name = prewhere_info->prewhere_column_name;
             query_info.prewhere_info->remove_prewhere_column = prewhere_info->remove_prewhere_column;
+            query_info.prewhere_info->row_level_column_name = prewhere_info->row_level_column_name;
             query_info.prewhere_info->need_filter = prewhere_info->need_filter;
         }
 
diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h
index 1fff316e1d4..13f4755d431 100644
--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@@ -89,6 +89,8 @@ public:
 
     static void addEmptySourceToQueryPlan(QueryPlan & query_plan, const Block & source_header, const SelectQueryInfo & query_info);
 
+    Names getRequiredColumns() { return required_columns; }
+
 private:
     InterpreterSelectQuery(
         const ASTPtr & query_ptr_,
@@ -108,12 +110,7 @@ private:
 
     /// Different stages of query execution.
 
-    void executeFetchColumns(
-        QueryProcessingStage::Enum processing_stage,
-        QueryPlan & query_plan,
-        const PrewhereDAGInfoPtr & prewhere_info,
-        const NameSet & columns_to_remove_after_prewhere);
-
+    void executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan);
     void executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter);
     void executeAggregation(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info);
     void executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final);
@@ -136,8 +133,7 @@ private:
     void executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, std::unordered_map<String, SubqueryForSet> & subqueries_for_sets);
     void executeMergeSorted(QueryPlan & query_plan, const SortDescription & sort_description, UInt64 limit, const std::string & description);
 
-    String generateFilterActions(
-            ActionsDAGPtr & actions, const ASTPtr & row_policy_filter, const Names & prerequisite_columns = {}) const;
+    String generateFilterActions(ActionsDAGPtr & actions, const Names & prerequisite_columns = {}) const;
 
     enum class Modificator
     {
@@ -162,7 +158,8 @@ private:
     /// Is calculated in getSampleBlock. Is used later in readImpl.
     ExpressionAnalysisResult analysis_result;
     /// For row-level security.
-    FilterInfoPtr filter_info;
+    ASTPtr row_policy_filter;
+    FilterDAGInfoPtr filter_info;
 
     QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns;
 
diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
index 59fcff61936..5f6f01b4401 100644
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@@ -24,110 +24,8 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH;
-    extern const int EXPECTED_ALL_OR_DISTINCT;
 }
 
-struct CustomizeASTSelectWithUnionQueryNormalize
-{
-    using TypeToVisit = ASTSelectWithUnionQuery;
-
-    const UnionMode & union_default_mode;
-
-    static void getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects)
-    {
-        if (auto * inner_union = ast_select->as<ASTSelectWithUnionQuery>())
-        {
-            for (auto & child : inner_union->list_of_selects->children)
-                getSelectsFromUnionListNode(child, selects);
-
-            return;
-        }
-
-        selects.push_back(std::move(ast_select));
-    }
-
-    void visit(ASTSelectWithUnionQuery & ast, ASTPtr &) const
-    {
-        auto & union_modes = ast.list_of_modes;
-        ASTs selects;
-        auto & select_list = ast.list_of_selects->children;
-
-        int i;
-        for (i = union_modes.size() - 1; i >= 0; --i)
-        {
-            /// Rewrite UNION Mode
-            if (union_modes[i] == ASTSelectWithUnionQuery::Mode::Unspecified)
-            {
-                if (union_default_mode == UnionMode::ALL)
-                    union_modes[i] = ASTSelectWithUnionQuery::Mode::ALL;
-                else if (union_default_mode == UnionMode::DISTINCT)
-                    union_modes[i] = ASTSelectWithUnionQuery::Mode::DISTINCT;
-                else
-                    throw Exception(
-                        "Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty",
-                        DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT);
-            }
-
-            if (union_modes[i] == ASTSelectWithUnionQuery::Mode::ALL)
-            {
-                if (auto * inner_union = select_list[i + 1]->as<ASTSelectWithUnionQuery>())
-                {
-                    /// Inner_union is an UNION ALL list, just lift up
-                    for (auto child = inner_union->list_of_selects->children.rbegin();
-                         child != inner_union->list_of_selects->children.rend();
-                         ++child)
-                        selects.push_back(std::move(*child));
-                }
-                else
-                    selects.push_back(std::move(select_list[i + 1]));
-            }
-            /// flatten all left nodes and current node to a UNION DISTINCT list
-            else if (union_modes[i] == ASTSelectWithUnionQuery::Mode::DISTINCT)
-            {
-                auto distinct_list = std::make_shared<ASTSelectWithUnionQuery>();
-                distinct_list->list_of_selects = std::make_shared<ASTExpressionList>();
-                distinct_list->children.push_back(distinct_list->list_of_selects);
-
-                for (int j = 0; j <= i + 1; ++j)
-                {
-                    getSelectsFromUnionListNode(select_list[j], distinct_list->list_of_selects->children);
-                }
-
-                distinct_list->union_mode = ASTSelectWithUnionQuery::Mode::DISTINCT;
-                distinct_list->is_normalized = true;
-                selects.push_back(std::move(distinct_list));
-                break;
-            }
-        }
-
-        /// No UNION DISTINCT or only one child in select_list
-        if (i == -1)
-        {
-            if (auto * inner_union = select_list[0]->as<ASTSelectWithUnionQuery>())
-            {
-                /// Inner_union is an UNION ALL list, just lift it up
-                for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend();
-                     ++child)
-                    selects.push_back(std::move(*child));
-            }
-            else
-                selects.push_back(std::move(select_list[0]));
-        }
-
-        // reverse children list
-        std::reverse(selects.begin(), selects.end());
-
-        ast.is_normalized = true;
-        ast.union_mode = ASTSelectWithUnionQuery::Mode::ALL;
-
-        ast.list_of_selects->children = std::move(selects);
-    }
-};
-
-/// We need normalize children first, so we should visit AST tree bottom up
-using CustomizeASTSelectWithUnionQueryNormalizeVisitor
-    = InDepthNodeVisitor<OneTypeMatcher<CustomizeASTSelectWithUnionQueryNormalize>, false>;
-
 InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
     const ASTPtr & query_ptr_, const Context & context_, const SelectQueryOptions & options_, const Names & required_result_column_names)
     : IInterpreterUnionOrSelectQuery(query_ptr_, context_, options_)
@@ -138,21 +36,6 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
     if (options.subquery_depth == 0 && (settings.limit > 0 || settings.offset > 0))
         settings_limit_offset_needed = true;
 
-    /// Normalize AST Tree
-    if (!ast->is_normalized)
-    {
-        CustomizeASTSelectWithUnionQueryNormalizeVisitor::Data union_default_mode{settings.union_default_mode};
-        CustomizeASTSelectWithUnionQueryNormalizeVisitor(union_default_mode).visit(query_ptr);
-
-        /// After normalization, if it only has one ASTSelectWithUnionQuery child,
-        /// we can lift it up, this can reduce one unnecessary recursion later.
-        if (ast->list_of_selects->children.size() == 1 && ast->list_of_selects->children.at(0)->as<ASTSelectWithUnionQuery>())
-        {
-            query_ptr = std::move(ast->list_of_selects->children.at(0));
-            ast = query_ptr->as<ASTSelectWithUnionQuery>();
-        }
-    }
-
     size_t num_children = ast->list_of_selects->children.size();
     if (!num_children)
         throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR);
@@ -413,7 +296,7 @@ BlockIO InterpreterSelectWithUnionQuery::execute()
     QueryPlan query_plan;
     buildQueryPlan(query_plan);
 
-    auto pipeline = query_plan.buildQueryPipeline();
+    auto pipeline = query_plan.buildQueryPipeline(QueryPlanOptimizationSettings(context->getSettingsRef()));
 
     res.pipeline = std::move(*pipeline);
     res.pipeline.addInterpreterContext(context);
diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp
index e1e4cd9e8a3..ddeaf053225 100644
--- a/src/Interpreters/MergeJoin.cpp
+++ b/src/Interpreters/MergeJoin.cpp
@@ -496,7 +496,7 @@ void MergeJoin::setTotals(const Block & totals_block)
 
 void MergeJoin::joinTotals(Block & block) const
 {
-    JoinCommon::joinTotals(totals, right_columns_to_add, table_join->keyNamesRight(), block);
+    JoinCommon::joinTotals(totals, right_columns_to_add, *table_join, block);
 }
 
 void MergeJoin::mergeRightBlocks()
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index c393b214ee8..05f1a6be2b3 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -756,7 +756,7 @@ QueryPipelinePtr MutationsInterpreter::addStreamsForLaterStages(const std::vecto
         }
     }
 
-    auto pipeline = plan.buildQueryPipeline();
+    auto pipeline = plan.buildQueryPipeline(QueryPlanOptimizationSettings(context.getSettingsRef()));
     pipeline->addSimpleTransform([&](const Block & header)
     {
         return std::make_shared<MaterializingTransform>(header);
diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp
new file mode 100644
index 00000000000..d65755f98ba
--- /dev/null
+++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp
@@ -0,0 +1,116 @@
+#include <Interpreters/NormalizeSelectWithUnionQueryVisitor.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Common/typeid_cast.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int EXPECTED_ALL_OR_DISTINCT;
+}
+
+void NormalizeSelectWithUnionQueryMatcher::getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects)
+{
+    if (auto * inner_union = ast_select->as<ASTSelectWithUnionQuery>())
+    {
+        for (auto & child : inner_union->list_of_selects->children)
+            getSelectsFromUnionListNode(child, selects);
+
+        return;
+        }
+
+        selects.push_back(ast_select);
+}
+
+void NormalizeSelectWithUnionQueryMatcher::visit(ASTPtr & ast, Data & data)
+{
+    if (auto * select_union = ast->as<ASTSelectWithUnionQuery>())
+        visit(*select_union, data);
+}
+
+void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Data & data)
+{
+    auto & union_modes = ast.list_of_modes;
+    ASTs selects;
+    auto & select_list = ast.list_of_selects->children;
+
+    int i;
+    for (i = union_modes.size() - 1; i >= 0; --i)
+    {
+        /// Rewrite UNION Mode
+        if (union_modes[i] == ASTSelectWithUnionQuery::Mode::Unspecified)
+        {
+            if (data.union_default_mode == UnionMode::ALL)
+                union_modes[i] = ASTSelectWithUnionQuery::Mode::ALL;
+            else if (data.union_default_mode == UnionMode::DISTINCT)
+                union_modes[i] = ASTSelectWithUnionQuery::Mode::DISTINCT;
+            else
+                throw Exception(
+                    "Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty",
+                    DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT);
+        }
+
+        if (union_modes[i] == ASTSelectWithUnionQuery::Mode::ALL)
+        {
+            if (auto * inner_union = select_list[i + 1]->as<ASTSelectWithUnionQuery>();
+                inner_union && inner_union->union_mode == ASTSelectWithUnionQuery::Mode::ALL)
+            {
+                /// Inner_union is an UNION ALL list, just lift up
+                for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend();
+                     ++child)
+                    selects.push_back(*child);
+            }
+            else
+                selects.push_back(select_list[i + 1]);
+        }
+        /// flatten all left nodes and current node to a UNION DISTINCT list
+        else if (union_modes[i] == ASTSelectWithUnionQuery::Mode::DISTINCT)
+        {
+            auto distinct_list = std::make_shared<ASTSelectWithUnionQuery>();
+            distinct_list->list_of_selects = std::make_shared<ASTExpressionList>();
+            distinct_list->children.push_back(distinct_list->list_of_selects);
+
+            for (int j = 0; j <= i + 1; ++j)
+            {
+                getSelectsFromUnionListNode(select_list[j], distinct_list->list_of_selects->children);
+            }
+
+            distinct_list->union_mode = ASTSelectWithUnionQuery::Mode::DISTINCT;
+            distinct_list->is_normalized = true;
+            selects.push_back(std::move(distinct_list));
+            break;
+        }
+    }
+
+    /// No UNION DISTINCT or only one child in select_list
+    if (i == -1)
+    {
+        if (auto * inner_union = select_list[0]->as<ASTSelectWithUnionQuery>();
+            inner_union && inner_union->union_mode == ASTSelectWithUnionQuery::Mode::ALL)
+        {
+            /// Inner_union is an UNION ALL list, just lift it up
+            for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend();
+                 ++child)
+                selects.push_back(*child);
+        }
+        else
+            selects.push_back(select_list[0]);
+    }
+
+    /// Just one union type child, lift it up
+    if (selects.size() == 1 && selects[0]->as<ASTSelectWithUnionQuery>())
+    {
+        ast = *(selects[0]->as<ASTSelectWithUnionQuery>());
+        return;
+    }
+
+    // reverse children list
+    std::reverse(selects.begin(), selects.end());
+
+    ast.is_normalized = true;
+    ast.union_mode = ASTSelectWithUnionQuery::Mode::ALL;
+
+    ast.list_of_selects->children = std::move(selects);
+}
+}
diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h
new file mode 100644
index 00000000000..cec2e4265e2
--- /dev/null
+++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <unordered_set>
+
+#include <Parsers/IAST.h>
+#include <Interpreters/InDepthNodeVisitor.h>
+
+#include <Core/Settings.h>
+#include <Parsers/ASTSelectWithUnionQuery.h>
+
+namespace DB
+{
+
+class ASTFunction;
+
+class NormalizeSelectWithUnionQueryMatcher
+{
+public:
+    struct Data
+    {
+        const UnionMode & union_default_mode;
+    };
+
+    static void getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects);
+
+    static void visit(ASTPtr & ast, Data &);
+    static void visit(ASTSelectWithUnionQuery &, Data &);
+    static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
+};
+
+/// We need normalize children first, so we should visit AST tree bottom up
+using NormalizeSelectWithUnionQueryVisitor
+    = InDepthNodeVisitor<NormalizeSelectWithUnionQueryMatcher, false>;
+}
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 37f49874e0a..bcfdf6869c3 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -715,18 +715,17 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
 
         if (storage)
         {
-            String hint_name{};
+            std::vector<String> hint_name{};
             for (const auto & name : columns_context.requiredColumns())
             {
                 auto hints = storage->getHints(name);
-                if (!hints.empty())
-                    hint_name = hint_name + " '" + toString(hints) + "'";
+                hint_name.insert(hint_name.end(), hints.begin(), hints.end());
             }
 
             if (!hint_name.empty())
             {
                 ss << ", maybe you meant: ";
-                ss << hint_name;
+                ss << toString(hint_name);
             }
         }
         else
diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp
index 73bf493fa65..1d93ef56dea 100644
--- a/src/Interpreters/convertFieldToType.cpp
+++ b/src/Interpreters/convertFieldToType.cpp
@@ -276,6 +276,44 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
             return have_unconvertible_element ? Field(Null()) : Field(res);
         }
     }
+    else if (const DataTypeMap * type_map = typeid_cast<const DataTypeMap *>(&type))
+    {
+        if (src.getType() == Field::Types::Map)
+        {
+            const auto & key_type = *type_map->getKeyType();
+            const auto & value_type = *type_map->getValueType();
+
+            const auto & map = src.get<Map>();
+            size_t map_size = map.size();
+
+            Map res(map_size);
+
+            bool have_unconvertible_element = false;
+
+            for (size_t i = 0; i < map_size; ++i)
+            {
+                const auto & map_entry = map[i].get<Tuple>();
+
+                const auto & key = map_entry[0];
+                const auto & value = map_entry[1];
+
+                Tuple updated_entry(2);
+
+                updated_entry[0] = convertFieldToType(key, key_type);
+
+                if (updated_entry[0].isNull() && !key_type.isNullable())
+                    have_unconvertible_element = true;
+
+                updated_entry[1] = convertFieldToType(value, value_type);
+                if (updated_entry[1].isNull() && !value_type.isNullable())
+                    have_unconvertible_element = true;
+
+                res[i] = updated_entry;
+            }
+
+            return have_unconvertible_element ? Field(Null()) : Field(res);
+        }
+    }
     else if (const DataTypeAggregateFunction * agg_func_type = typeid_cast<const DataTypeAggregateFunction *>(&type))
     {
         if (src.getType() != Field::Types::AggregateFunctionState)
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index d786e1146be..1a0aa031d6f 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -39,16 +39,17 @@
 #include <Storages/StorageInput.h>
 
 #include <Access/EnabledQuota.h>
-#include <Interpreters/InterpreterFactory.h>
-#include <Interpreters/ProcessList.h>
-#include <Interpreters/OpenTelemetrySpanLog.h>
-#include <Interpreters/QueryLog.h>
-#include <Interpreters/InterpreterSetQuery.h>
 #include <Interpreters/ApplyWithGlobalVisitor.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/InterpreterFactory.h>
+#include <Interpreters/InterpreterSetQuery.h>
+#include <Interpreters/NormalizeSelectWithUnionQueryVisitor.h>
+#include <Interpreters/OpenTelemetrySpanLog.h>
+#include <Interpreters/ProcessList.h>
+#include <Interpreters/QueryLog.h>
 #include <Interpreters/ReplaceQueryParameterVisitor.h>
 #include <Interpreters/SelectQueryOptions.h>
 #include <Interpreters/executeQuery.h>
-#include <Interpreters/Context.h>
 #include <Common/ProfileEvents.h>
 
 #include <Common/SensitiveDataMasker.h>
@@ -472,9 +473,12 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         if (settings.enable_global_with_statement)
         {
             ApplyWithGlobalVisitor().visit(ast);
-            query = serializeAST(*ast);
         }
 
+        /// Normalize SelectWithUnionQuery
+        NormalizeSelectWithUnionQueryVisitor::Data data{context.getSettingsRef().union_default_mode};
+        NormalizeSelectWithUnionQueryVisitor{data}.visit(ast);
+
         /// Check the limits.
         checkASTSizeLimits(*ast, settings);
 
diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp
index e341a5637f4..b56b90cdf3f 100644
--- a/src/Interpreters/getHeaderForProcessingStage.cpp
+++ b/src/Interpreters/getHeaderForProcessingStage.cpp
@@ -44,9 +44,19 @@ Block getHeaderForProcessingStage(
             Block header = metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals(), storage.getStorageID());
             if (query_info.prewhere_info)
             {
-                query_info.prewhere_info->prewhere_actions->execute(header);
-                if (query_info.prewhere_info->remove_prewhere_column)
-                    header.erase(query_info.prewhere_info->prewhere_column_name);
+                auto & prewhere_info = *query_info.prewhere_info;
+
+                if (prewhere_info.row_level_filter)
+                {
+                    prewhere_info.row_level_filter->execute(header);
+                    header.erase(prewhere_info.row_level_column_name);
+                }
+
+                if (prewhere_info.prewhere_actions)
+                    prewhere_info.prewhere_actions->execute(header);
+
+                if (prewhere_info.remove_prewhere_column)
+                    header.erase(prewhere_info.prewhere_column_name);
             }
             return header;
         }
diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp
index a4c39a45efa..69727a2a6bb 100644
--- a/src/Interpreters/join_common.cpp
+++ b/src/Interpreters/join_common.cpp
@@ -251,13 +251,23 @@ void createMissedColumns(Block & block)
     }
 }
 
-void joinTotals(const Block & totals, const Block & columns_to_add, const Names & key_names_right, Block & block)
+/// Append totals from right to left block, correct types if needed
+void joinTotals(const Block & totals, const Block & columns_to_add, const TableJoin & table_join, Block & block)
 {
+    if (table_join.forceNullableLeft())
+        convertColumnsToNullable(block);
+
     if (Block totals_without_keys = totals)
     {
-        for (const auto & name : key_names_right)
+        for (const auto & name : table_join.keyNamesRight())
             totals_without_keys.erase(totals_without_keys.getPositionByName(name));
 
+        for (auto & col : totals_without_keys)
+        {
+            if (table_join.rightBecomeNullable(col.type))
+                JoinCommon::convertColumnToNullable(col);
+        }
+
         for (size_t i = 0; i < totals_without_keys.columns(); ++i)
             block.insert(totals_without_keys.safeGetByPosition(i));
     }
diff --git a/src/Interpreters/join_common.h b/src/Interpreters/join_common.h
index 6f9f7dd1210..76fbcf8d87d 100644
--- a/src/Interpreters/join_common.h
+++ b/src/Interpreters/join_common.h
@@ -32,7 +32,7 @@ ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_nam
 void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right, const Names & key_names_right);
 
 void createMissedColumns(Block & block);
-void joinTotals(const Block & totals, const Block & columns_to_add, const Names & key_names_right, Block & block);
+void joinTotals(const Block & totals, const Block & columns_to_add, const TableJoin & table_join, Block & block);
 
 void addDefaultValues(IColumn & column, const DataTypePtr & type, size_t count);
 
diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make
index 879333db507..3eab077df86 100644
--- a/src/Interpreters/ya.make
+++ b/src/Interpreters/ya.make
@@ -111,6 +111,7 @@ SRCS(
     MetricLog.cpp
     MutationsInterpreter.cpp
     MySQL/InterpretersMySQLDDLQuery.cpp
+    NormalizeSelectWithUnionQueryVisitor.cpp
     NullableUtils.cpp
     OpenTelemetrySpanLog.cpp
     OptimizeIfChains.cpp
diff --git a/src/Processors/Executors/PollingQueue.cpp b/src/Processors/Executors/PollingQueue.cpp
index 3636fa82f73..a601d426a5d 100644
--- a/src/Processors/Executors/PollingQueue.cpp
+++ b/src/Processors/Executors/PollingQueue.cpp
@@ -23,24 +23,14 @@ namespace ErrorCodes
 
 PollingQueue::PollingQueue()
 {
-    epoll_fd = epoll_create(1);
-    if (-1 == epoll_fd)
-        throwFromErrno("Cannot create epoll descriptor", ErrorCodes::CANNOT_OPEN_FILE);
-
     if (-1 == pipe2(pipe_fd, O_NONBLOCK))
         throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_OPEN_FILE);
 
-    epoll_event socket_event;
-    socket_event.events = EPOLLIN | EPOLLPRI;
-    socket_event.data.ptr = pipe_fd;
-
-    if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, pipe_fd[0], &socket_event))
-        throwFromErrno("Cannot add pipe descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+    epoll.add(pipe_fd[0], pipe_fd);
 }
 
 PollingQueue::~PollingQueue()
 {
-    close(epoll_fd);
     close(pipe_fd[0]);
     close(pipe_fd[1]);
 }
@@ -52,13 +42,7 @@ void PollingQueue::addTask(size_t thread_number, void * data, int fd)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Task {} was already added to task queue", key);
 
     tasks[key] = TaskData{thread_number, data, fd};
-
-    epoll_event socket_event;
-    socket_event.events = EPOLLIN | EPOLLPRI;
-    socket_event.data.ptr = data;
-
-    if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &socket_event))
-        throwFromErrno("Cannot add socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+    epoll.add(fd, data);
 }
 
 static std::string dumpTasks(const std::unordered_map<std::uintptr_t, PollingQueue::TaskData> & tasks)
@@ -86,15 +70,7 @@ PollingQueue::TaskData PollingQueue::wait(std::unique_lock<std::mutex> & lock)
 
     epoll_event event;
     event.data.ptr = nullptr;
-    int num_events = 0;
-
-    while (num_events <= 0)
-    {
-        num_events = epoll_wait(epoll_fd, &event, 1, -1);
-
-        if (num_events == -1 && errno != EINTR)
-                throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
-    }
+    epoll.getManyReady(1, &event, true);
 
     lock.lock();
 
@@ -112,9 +88,7 @@ PollingQueue::TaskData PollingQueue::wait(std::unique_lock<std::mutex> & lock)
 
     auto res = it->second;
     tasks.erase(it);
-
-    if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_DEL, res.fd, &event))
-        throwFromErrno("Cannot remove socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+    epoll.remove(res.fd);
 
     return res;
 }
diff --git a/src/Processors/Executors/PollingQueue.h b/src/Processors/Executors/PollingQueue.h
index 9d37bf0a2cc..0d306ddf2f7 100644
--- a/src/Processors/Executors/PollingQueue.h
+++ b/src/Processors/Executors/PollingQueue.h
@@ -4,6 +4,7 @@
 #include <mutex>
 #include <atomic>
 #include <unordered_map>
+#include <Common/Epoll.h>
 
 namespace DB
 {
@@ -25,7 +26,7 @@ public:
     };
 
 private:
-    int epoll_fd;
+    Epoll epoll;
     int pipe_fd[2];
     std::atomic_bool is_finished = false;
     std::unordered_map<std::uintptr_t, TaskData> tasks;
diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp
index e4bcf6dc0ab..c975153d317 100644
--- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp
+++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp
@@ -14,6 +14,7 @@ struct PullingAsyncPipelineExecutor::Data
 {
     PipelineExecutorPtr executor;
     std::exception_ptr exception;
+    LazyOutputFormat * lazy_format = nullptr;
     std::atomic_bool is_finished = false;
     std::atomic_bool has_exception = false;
     ThreadFromGlobalPool thread;
@@ -82,6 +83,10 @@ static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGrou
     {
         data.exception = std::current_exception();
         data.has_exception = true;
+
+        /// Finish lazy format in case of exception. Otherwise thread.join() may hung.
+        if (data.lazy_format)
+            data.lazy_format->finalize();
     }
 
     data.is_finished = true;
@@ -95,6 +100,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds)
     {
         data = std::make_unique<Data>();
         data->executor = pipeline.execute();
+        data->lazy_format = lazy_format.get();
 
         auto func = [&, thread_group = CurrentThread::getGroup()]()
         {
@@ -105,14 +111,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds)
     }
 
     if (data->has_exception)
-    {
-        /// Finish lazy format in case of exception. Otherwise thread.join() may hung.
-        if (lazy_format)
-            lazy_format->finish();
-
-        data->has_exception = false;
         std::rethrow_exception(std::move(data->exception));
-    }
 
     bool is_execution_finished = lazy_format ? lazy_format->isFinished()
                                              : data->is_finished.load();
@@ -121,7 +120,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds)
     {
         /// If lazy format is finished, we don't cancel pipeline but wait for main thread to be finished.
         data->is_finished = true;
-        /// Wait thread ant rethrow exception if any.
+        /// Wait thread and rethrow exception if any.
         cancel();
         return false;
     }
@@ -133,7 +132,12 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds)
     }
 
     chunk.clear();
-    data->finish_event.tryWait(milliseconds);
+
+    if (milliseconds)
+        data->finish_event.tryWait(milliseconds);
+    else
+        data->finish_event.wait();
+
     return true;
 }
 
diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
index 1685688f02d..f599b7c87e9 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@@ -17,6 +17,7 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/convertFieldToType.h>
 #include <Interpreters/ExpressionActions.h>
+#include <Interpreters/castColumn.h>
 #include <IO/ReadHelpers.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
@@ -209,6 +210,14 @@ private:
                 if (map.size() % 2)
                     return false;
             }
+            else if (literal->value.getType() == Field::Types::Tuple)
+            {
+                const Tuple & tuple = literal->value.get<Tuple>();
+
+                for (const auto & value : tuple)
+                    if (value.isNull())
+                        return true;
+            }
 
             String column_name = "_dummy_" + std::to_string(replaced_literals.size());
             replaced_literals.emplace_back(literal, column_name, force_nullable);
@@ -581,7 +590,7 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(ReadBuffer & istr, co
     }
 }
 
-ColumnPtr ConstantExpressionTemplate::evaluateAll(BlockMissingValues & nulls, size_t column_idx, size_t offset)
+ColumnPtr ConstantExpressionTemplate::evaluateAll(BlockMissingValues & nulls, size_t column_idx, const DataTypePtr & expected_type, size_t offset)
 {
     Block evaluated = structure->literals.cloneWithColumns(std::move(columns));
     columns = structure->literals.cloneEmptyColumns();
@@ -599,12 +608,13 @@ ColumnPtr ConstantExpressionTemplate::evaluateAll(BlockMissingValues & nulls, si
                         ErrorCodes::LOGICAL_ERROR);
 
     rows_count = 0;
-    ColumnPtr res = evaluated.getByName(structure->result_column_name).column->convertToFullColumnIfConst();
+    auto res = evaluated.getByName(structure->result_column_name);
+    res.column = res.column->convertToFullColumnIfConst();
     if (!structure->null_as_default)
-        return res;
+        return castColumn(res, expected_type);
 
     /// Extract column with evaluated expression and mask for NULLs
-    const auto & tuple = assert_cast<const ColumnTuple &>(*res);
+    const auto & tuple = assert_cast<const ColumnTuple &>(*res.column);
     if (tuple.tupleSize() != 2)
         throw Exception("Invalid tuple size, it'a a bug", ErrorCodes::LOGICAL_ERROR);
     const auto & is_null = assert_cast<const ColumnUInt8 &>(tuple.getColumn(1));
@@ -613,7 +623,9 @@ ColumnPtr ConstantExpressionTemplate::evaluateAll(BlockMissingValues & nulls, si
         if (is_null.getUInt(i))
             nulls.setBit(column_idx, offset + i);
 
-    return tuple.getColumnPtr(0);
+    res.column = tuple.getColumnPtr(0);
+    res.type = assert_cast<const DataTypeTuple &>(*res.type).getElements()[0];
+    return castColumn(res, expected_type);
 }
 
 void ConstantExpressionTemplate::TemplateStructure::addNodesToCastResult(const IDataType & result_column_type, ASTPtr & expr, bool null_as_default)
diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h
index 931b05673c6..299ce4c9925 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h
@@ -72,7 +72,7 @@ public:
 
     /// Evaluate batch of expressions were parsed using template.
     /// If template was deduced with null_as_default == true, set bits in nulls for NULL values in column_idx, starting from offset.
-    ColumnPtr evaluateAll(BlockMissingValues & nulls, size_t column_idx, size_t offset = 0);
+    ColumnPtr evaluateAll(BlockMissingValues & nulls, size_t column_idx, const DataTypePtr & expected_type, size_t offset = 0);
 
     size_t rowsCount() const { return rows_count; }
 
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
index 34a4a98f16b..1455b8f6740 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
@@ -8,10 +8,14 @@
 #include <Formats/FormatFactory.h>
 #include <Common/FieldVisitors.h>
 #include <Core/Block.h>
-#include <Common/typeid_cast.h>
 #include <common/find_symbols.h>
+#include <Common/typeid_cast.h>
+#include <Common/checkStackSize.h>
 #include <Parsers/ASTLiteral.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeMap.h>
 
 
 namespace DB
@@ -69,11 +73,13 @@ Chunk ValuesBlockInputFormat::generate()
     {
         if (!templates[i] || !templates[i]->rowsCount())
             continue;
+
+        const auto & expected_type = header.getByPosition(i).type;
         if (columns[i]->empty())
-            columns[i] = IColumn::mutate(templates[i]->evaluateAll(block_missing_values, i));
+            columns[i] = IColumn::mutate(templates[i]->evaluateAll(block_missing_values, i, expected_type));
         else
         {
-            ColumnPtr evaluated = templates[i]->evaluateAll(block_missing_values, i, columns[i]->size());
+            ColumnPtr evaluated = templates[i]->evaluateAll(block_missing_values, i, expected_type, columns[i]->size());
             columns[i]->insertRangeFrom(*evaluated, 0, evaluated->size());
         }
     }
@@ -131,13 +137,16 @@ bool ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr &
         return true;
     }
 
+    const auto & header = getPort().getHeader();
+    const auto & expected_type = header.getByPosition(column_idx).type;
+
     /// Expression in the current row is not match template deduced on the first row.
     /// Evaluate expressions, which were parsed using this template.
     if (column->empty())
-        column = IColumn::mutate(templates[column_idx]->evaluateAll(block_missing_values, column_idx));
+        column = IColumn::mutate(templates[column_idx]->evaluateAll(block_missing_values, column_idx, expected_type));
     else
     {
-        ColumnPtr evaluated = templates[column_idx]->evaluateAll(block_missing_values, column_idx, column->size());
+        ColumnPtr evaluated = templates[column_idx]->evaluateAll(block_missing_values, column_idx, expected_type, column->size());
         column->insertRangeFrom(*evaluated, 0, evaluated->size());
     }
     /// Do not use this template anymore
@@ -181,6 +190,87 @@ bool ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx)
     }
 }
 
+namespace
+{
+    void tryToReplaceNullFieldsInComplexTypesWithDefaultValues(Field & value, const IDataType & data_type)
+    {
+        checkStackSize();
+
+        WhichDataType type(data_type);
+
+        if (type.isTuple() && value.getType() == Field::Types::Tuple)
+        {
+            const DataTypeTuple & type_tuple = static_cast<const DataTypeTuple &>(data_type);
+
+            Tuple & tuple_value = value.get<Tuple>();
+
+            size_t src_tuple_size = tuple_value.size();
+            size_t dst_tuple_size = type_tuple.getElements().size();
+
+            if (src_tuple_size != dst_tuple_size)
+                throw Exception(fmt::format("Bad size of tuple. Expected size: {}, actual size: {}.",
+                    std::to_string(src_tuple_size), std::to_string(dst_tuple_size)), ErrorCodes::TYPE_MISMATCH);
+
+            for (size_t i = 0; i < src_tuple_size; ++i)
+            {
+                const auto & element_type = *(type_tuple.getElements()[i]);
+
+                if (tuple_value[i].isNull() && !element_type.isNullable())
+                    tuple_value[i] = element_type.getDefault();
+
+                tryToReplaceNullFieldsInComplexTypesWithDefaultValues(tuple_value[i], element_type);
+            }
+        }
+        else if (type.isArray() && value.getType() == Field::Types::Array)
+        {
+            const DataTypeArray & type_aray = static_cast<const DataTypeArray &>(data_type);
+            const auto & element_type = *(type_aray.getNestedType());
+
+            if (element_type.isNullable())
+                return;
+
+            Array & array_value = value.get<Array>();
+            size_t array_value_size = array_value.size();
+
+            for (size_t i = 0; i < array_value_size; ++i)
+            {
+                if (array_value[i].isNull())
+                    array_value[i] = element_type.getDefault();
+
+                tryToReplaceNullFieldsInComplexTypesWithDefaultValues(array_value[i], element_type);
+            }
+        }
+        else if (type.isMap() && value.getType() == Field::Types::Map)
+        {
+            const DataTypeMap & type_map = static_cast<const DataTypeMap &>(data_type);
+
+            const auto & key_type = *type_map.getKeyType();
+            const auto & value_type = *type_map.getValueType();
+
+            auto & map = value.get<Map>();
+            size_t map_size = map.size();
+
+            for (size_t i = 0; i < map_size; ++i)
+            {
+                auto & map_entry = map[i].get<Tuple>();
+
+                auto & entry_key = map_entry[0];
+                auto & entry_value = map_entry[1];
+
+                if (entry_key.isNull() && !key_type.isNullable())
+                    entry_key = key_type.getDefault();
+
+                tryToReplaceNullFieldsInComplexTypesWithDefaultValues(entry_key, key_type);
+
+                if (entry_value.isNull() && !value_type.isNullable())
+                    entry_value = value_type.getDefault();
+
+                tryToReplaceNullFieldsInComplexTypesWithDefaultValues(entry_value, value_type);
+            }
+        }
+    }
+}
+
 bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx)
 {
     const Block & header = getPort().getHeader();
@@ -255,9 +345,15 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
             bool found_in_cache = false;
             const auto & result_type = header.getByPosition(column_idx).type;
             const char * delimiter = (column_idx + 1 == num_columns) ? ")" : ",";
-            auto structure = templates_cache.getFromCacheOrConstruct(result_type, format_settings.null_as_default,
-                                                                     TokenIterator(tokens), token_iterator,
-                                                                     ast, *context, &found_in_cache, delimiter);
+            auto structure = templates_cache.getFromCacheOrConstruct(
+                result_type,
+                !result_type->isNullable() && format_settings.null_as_default,
+                TokenIterator(tokens),
+                token_iterator,
+                ast,
+                *context,
+                &found_in_cache,
+                delimiter);
             templates[column_idx].emplace(structure);
             if (found_in_cache)
                 ++attempts_to_deduce_template_cached[column_idx];
@@ -298,7 +394,13 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
     buf.position() = const_cast<char *>(token_iterator->begin);
 
     std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(ast, *context);
-    Field value = convertFieldToType(value_raw.first, type, value_raw.second.get());
+
+    Field & expression_value = value_raw.first;
+
+    if (format_settings.null_as_default)
+        tryToReplaceNullFieldsInComplexTypesWithDefaultValues(expression_value, type);
+
+    Field value = convertFieldToType(expression_value, type, value_raw.second.get());
 
     /// Check that we are indeed allowed to insert a NULL.
     if (value.isNull() && !type.isNullable())
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
index 01deb2865bb..a541870e484 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
@@ -48,7 +48,7 @@ private:
         SingleExpressionEvaluation
     };
 
-    typedef std::vector<std::optional<ConstantExpressionTemplate>> ConstantExpressionTemplates;
+    using ConstantExpressionTemplates = std::vector<std::optional<ConstantExpressionTemplate>>;
 
     Chunk generate() override;
 
diff --git a/src/Processors/Formats/LazyOutputFormat.cpp b/src/Processors/Formats/LazyOutputFormat.cpp
index 46287d1cce9..0663ff28f84 100644
--- a/src/Processors/Formats/LazyOutputFormat.cpp
+++ b/src/Processors/Formats/LazyOutputFormat.cpp
@@ -16,8 +16,13 @@ Chunk LazyOutputFormat::getChunk(UInt64 milliseconds)
     }
 
     Chunk chunk;
-    if (!queue.tryPop(chunk, milliseconds))
-        return {};
+    if (milliseconds)
+    {
+        if (!queue.tryPop(chunk, milliseconds))
+            return {};
+    }
+    else
+        queue.pop(chunk);
 
     if (chunk)
         info.update(chunk.getNumRows(), chunk.allocatedBytes());
diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h
index 06ec116f3dd..15ea5022f82 100644
--- a/src/Processors/Formats/LazyOutputFormat.h
+++ b/src/Processors/Formats/LazyOutputFormat.h
@@ -36,6 +36,14 @@ public:
         queue.clear();
     }
 
+    void finalize() override
+    {
+        finished_processing = true;
+
+        /// In case we are waiting for result.
+        queue.emplace(Chunk());
+    }
+
 protected:
     void consume(Chunk chunk) override
     {
@@ -46,14 +54,6 @@ protected:
     void consumeTotals(Chunk chunk) override { totals = std::move(chunk); }
     void consumeExtremes(Chunk chunk) override { extremes = std::move(chunk); }
 
-    void finalize() override
-    {
-        finished_processing = true;
-
-        /// In case we are waiting for result.
-        queue.emplace(Chunk());
-    }
-
 private:
 
     ConcurrentBoundedQueue<Chunk> queue;
diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h
index 853173895b3..6be92394fab 100644
--- a/src/Processors/QueryPlan/AggregatingStep.h
+++ b/src/Processors/QueryPlan/AggregatingStep.h
@@ -32,6 +32,8 @@ public:
     void describeActions(FormatSettings &) const override;
     void describePipeline(FormatSettings & settings) const override;
 
+    const Aggregator::Params & getParams() const { return params; }
+
 private:
     Aggregator::Params params;
     bool final;
diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h
index ec13ab2052e..97821cb63d3 100644
--- a/src/Processors/QueryPlan/CreatingSetsStep.h
+++ b/src/Processors/QueryPlan/CreatingSetsStep.h
@@ -34,7 +34,7 @@ private:
 class CreatingSetsStep : public IQueryPlanStep
 {
 public:
-    CreatingSetsStep(DataStreams input_streams_);
+    explicit CreatingSetsStep(DataStreams input_streams_);
 
     String getName() const override { return "CreatingSets"; }
 
diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp
index de8bb2b3d43..6a0ec33402b 100644
--- a/src/Processors/QueryPlan/CubeStep.cpp
+++ b/src/Processors/QueryPlan/CubeStep.cpp
@@ -43,4 +43,9 @@ void CubeStep::transformPipeline(QueryPipeline & pipeline)
     });
 }
 
+const Aggregator::Params & CubeStep::getParams() const
+{
+    return params->params;
+}
+
 }
diff --git a/src/Processors/QueryPlan/CubeStep.h b/src/Processors/QueryPlan/CubeStep.h
index 707f62ce7d6..f67a03dc7e2 100644
--- a/src/Processors/QueryPlan/CubeStep.h
+++ b/src/Processors/QueryPlan/CubeStep.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <Processors/QueryPlan/ITransformingStep.h>
 #include <DataStreams/SizeLimits.h>
+#include <Interpreters/Aggregator.h>
 
 namespace DB
 {
@@ -18,6 +19,7 @@ public:
 
     void transformPipeline(QueryPipeline & pipeline) override;
 
+    const Aggregator::Params & getParams() const;
 private:
     AggregatingTransformParamsPtr params;
 };
diff --git a/src/Processors/QueryPlan/FillingStep.h b/src/Processors/QueryPlan/FillingStep.h
index 85736464a6c..c8d1f74c6ca 100644
--- a/src/Processors/QueryPlan/FillingStep.h
+++ b/src/Processors/QueryPlan/FillingStep.h
@@ -17,6 +17,8 @@ public:
 
     void describeActions(FormatSettings & settings) const override;
 
+    const SortDescription & getSortDescription() const { return sort_description; }
+
 private:
     SortDescription sort_description;
 };
diff --git a/src/Processors/QueryPlan/MaterializingStep.cpp b/src/Processors/QueryPlan/MaterializingStep.cpp
deleted file mode 100644
index f5313369020..00000000000
--- a/src/Processors/QueryPlan/MaterializingStep.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <Processors/QueryPlan/MaterializingStep.h>
-#include <Processors/QueryPipeline.h>
-#include <Processors/Transforms/MaterializingTransform.h>
-
-#include <DataStreams/materializeBlock.h>
-
-namespace DB
-{
-
-static ITransformingStep::Traits getTraits()
-{
-    return ITransformingStep::Traits
-    {
-        {
-            .preserves_distinct_columns = true,
-            .returns_single_stream = false,
-            .preserves_number_of_streams = true,
-            .preserves_sorting = true,
-        },
-        {
-            .preserves_number_of_rows = true,
-        }
-    };
-}
-
-MaterializingStep::MaterializingStep(const DataStream & input_stream_)
-    : ITransformingStep(input_stream_, materializeBlock(input_stream_.header), getTraits())
-{
-}
-
-void MaterializingStep::transformPipeline(QueryPipeline & pipeline)
-{
-    pipeline.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<MaterializingTransform>(header);
-    });
-}
-
-}
diff --git a/src/Processors/QueryPlan/MaterializingStep.h b/src/Processors/QueryPlan/MaterializingStep.h
deleted file mode 100644
index 72b3133dfe4..00000000000
--- a/src/Processors/QueryPlan/MaterializingStep.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-#include <Processors/QueryPlan/ITransformingStep.h>
-
-namespace DB
-{
-
-/// Materialize constants. See MaterializingTransform.
-class MaterializingStep : public ITransformingStep
-{
-public:
-    explicit MaterializingStep(const DataStream & input_stream_);
-
-    String getName() const override { return "Materializing"; }
-
-    void transformPipeline(QueryPipeline & pipeline) override;
-};
-
-}
diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h
index 454eab9649a..f96237fc71a 100644
--- a/src/Processors/QueryPlan/Optimizations/Optimizations.h
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@@ -9,7 +9,7 @@ namespace QueryPlanOptimizations
 {
 
 /// This is the main function which optimizes the whole QueryPlan tree.
-void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
+void optimizeTree(const QueryPlanOptimizationSettings & settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes);
 
 /// Optimization is a function applied to QueryPlan::Node.
 /// It can read and update subtree of specified node.
@@ -38,14 +38,19 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
 /// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
 size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
 
+/// Move FilterStep down if possible.
+/// May split FilterStep and push down only part of it.
+size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
+
 inline const auto & getOptimizations()
 {
-    static const std::array<Optimization, 4> optimizations =
+    static const std::array<Optimization, 5> optimizations =
     {{
         {tryLiftUpArrayJoin, "liftUpArrayJoin"},
         {tryPushDownLimit, "pushDownLimit"},
         {trySplitFilter, "splitFilter"},
         {tryMergeExpressions, "mergeExpressions"},
+        {tryPushDownFilter, "pushDownFilter"},
      }};
 
     return optimizations;
diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
new file mode 100644
index 00000000000..cbd38d46ebf
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
@@ -0,0 +1,12 @@
+#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
+#include <Core/Settings.h>
+
+namespace DB
+{
+
+QueryPlanOptimizationSettings::QueryPlanOptimizationSettings(const Settings & settings)
+{
+    max_optimizations_to_apply = settings.query_plan_max_optimizations_to_apply;
+}
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
new file mode 100644
index 00000000000..074298e24a1
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <cstddef>
+
+namespace DB
+{
+
+struct Settings;
+
+struct QueryPlanOptimizationSettings
+{
+    QueryPlanOptimizationSettings() = delete;
+    explicit QueryPlanOptimizationSettings(const Settings & settings);
+
+    /// If not zero, throw if too many optimizations were applied to query plan.
+    /// It helps to avoid infinite optimization loop.
+    size_t max_optimizations_to_apply = 0;
+};
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
new file mode 100644
index 00000000000..d64f082b7ee
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
@@ -0,0 +1,204 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/ITransformingStep.h>
+#include <Processors/QueryPlan/FilterStep.h>
+#include <Processors/QueryPlan/AggregatingStep.h>
+#include <Processors/QueryPlan/ExpressionStep.h>
+#include <Processors/QueryPlan/ArrayJoinStep.h>
+#include <Processors/QueryPlan/CubeStep.h>
+#include <Processors/QueryPlan/FinishSortingStep.h>
+#include <Processors/QueryPlan/MergeSortingStep.h>
+#include <Processors/QueryPlan/MergingSortedStep.h>
+#include <Processors/QueryPlan/PartialSortingStep.h>
+#include <Processors/QueryPlan/TotalsHavingStep.h>
+#include <Processors/QueryPlan/DistinctStep.h>
+#include <Interpreters/ActionsDAG.h>
+#include <Interpreters/ArrayJoinAction.h>
+#include <Common/typeid_cast.h>
+#include <DataTypes/DataTypeAggregateFunction.h>
+
+#include <Columns/IColumn.h>
+
+namespace DB::ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+namespace DB::QueryPlanOptimizations
+{
+
+static size_t tryAddNewFilterStep(
+    QueryPlan::Node * parent_node,
+    QueryPlan::Nodes & nodes,
+    const Names & allowed_inputs)
+{
+    QueryPlan::Node * child_node = parent_node->children.front();
+
+    auto & parent = parent_node->step;
+    auto & child = child_node->step;
+
+    auto * filter = static_cast<FilterStep *>(parent.get());
+    const auto & expression = filter->getExpression();
+    const auto & filter_column_name = filter->getFilterColumnName();
+    bool removes_filter = filter->removesFilterColumn();
+
+    // std::cerr << "Filter: \n" << expression->dumpDAG() << std::endl;
+
+    auto split_filter = expression->splitActionsForFilter(filter_column_name, removes_filter, allowed_inputs);
+    if (!split_filter)
+        return 0;
+
+    // std::cerr << "===============\n" << expression->dumpDAG() << std::endl;
+    // std::cerr << "---------------\n" << split_filter->dumpDAG() << std::endl;
+
+    const auto & index = expression->getIndex();
+    auto it = index.begin();
+    for (; it != index.end(); ++it)
+        if ((*it)->result_name == filter_column_name)
+            break;
+
+    const bool found_filter_column = it != expression->getIndex().end();
+
+    if (!found_filter_column && !removes_filter)
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+                        "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}",
+                        filter_column_name, expression->dumpDAG());
+
+    /// Filter column was replaced to constant.
+    const bool filter_is_constant = found_filter_column && (*it)->column && isColumnConst(*(*it)->column);
+
+    if (!found_filter_column || filter_is_constant)
+        /// This means that all predicates of filter were pused down.
+        /// Replace current actions to expression, as we don't need to filter anything.
+        parent = std::make_unique<ExpressionStep>(child->getOutputStream(), expression);
+
+    /// Add new Filter step before Aggregating.
+    /// Expression/Filter -> Aggregating -> Something
+    auto & node = nodes.emplace_back();
+    node.children.swap(child_node->children);
+    child_node->children.emplace_back(&node);
+    /// Expression/Filter -> Aggregating -> Filter -> Something
+
+    /// New filter column is added to the end.
+    auto split_filter_column_name = (*split_filter->getIndex().rbegin())->result_name;
+    node.step = std::make_unique<FilterStep>(
+            node.children.at(0)->step->getOutputStream(),
+            std::move(split_filter), std::move(split_filter_column_name), true);
+
+    return 3;
+}
+
+static Names getAggregatinKeys(const Aggregator::Params & params)
+{
+    Names keys;
+    keys.reserve(params.keys.size());
+    for (auto pos : params.keys)
+        keys.push_back(params.src_header.getByPosition(pos).name);
+
+    return keys;
+}
+
+size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
+{
+    if (parent_node->children.size() != 1)
+        return 0;
+
+    QueryPlan::Node * child_node = parent_node->children.front();
+
+    auto & parent = parent_node->step;
+    auto & child = child_node->step;
+    auto * filter = typeid_cast<FilterStep *>(parent.get());
+
+    if (!filter)
+        return 0;
+
+    if (filter->getExpression()->hasStatefulFunctions())
+        return 0;
+
+    if (auto * aggregating = typeid_cast<AggregatingStep *>(child.get()))
+    {
+        const auto & params = aggregating->getParams();
+        Names keys = getAggregatinKeys(params);
+
+        if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, keys))
+            return updated_steps;
+    }
+
+    if (auto * totals_having = typeid_cast<TotalsHavingStep *>(child.get()))
+    {
+        /// If totals step has HAVING expression, skip it for now.
+        /// TODO:
+        /// We can merge HAING expression with current filer.
+        /// Also, we can push down part of HAVING which depend only on aggregation keys.
+        if (totals_having->getActions())
+            return 0;
+
+        Names keys;
+        const auto & header = totals_having->getInputStreams().front().header;
+        for (const auto & column : header)
+            if (typeid_cast<const DataTypeAggregateFunction *>(column.type.get()) == nullptr)
+                keys.push_back(column.name);
+
+        /// NOTE: this optimization changes TOTALS value. Example:
+        ///   `select * from (select y, sum(x) from (
+        ///        select number as x, number % 4 as y from numbers(10)
+        ///    ) group by y with totals) where y != 2`
+        /// Optimization will replace totals row `y, sum(x)` from `(0, 45)` to `(0, 37)`.
+        /// It is expected to ok, cause AST optimization `enable_optimize_predicate_expression = 1` also brakes it.
+        if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, keys))
+            return updated_steps;
+    }
+
+    if (auto * array_join = typeid_cast<ArrayJoinStep *>(child.get()))
+    {
+        const auto & array_join_actions = array_join->arrayJoin();
+        const auto & keys = array_join_actions->columns;
+        const auto & array_join_header = array_join->getInputStreams().front().header;
+
+        Names allowed_inputs;
+        for (const auto & column : array_join_header)
+            if (keys.count(column.name) == 0)
+                allowed_inputs.push_back(column.name);
+
+        // for (const auto & name : allowed_inputs)
+        //     std::cerr << name << std::endl;
+
+        if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs))
+            return updated_steps;
+    }
+
+    if (auto * distinct = typeid_cast<DistinctStep *>(child.get()))
+    {
+        Names allowed_inputs = distinct->getOutputStream().header.getNames();
+        if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs))
+            return updated_steps;
+    }
+
+    /// TODO.
+    /// We can filter earlier if expression does not depend on WITH FILL columns.
+    /// But we cannot just push down condition, because other column may be filled with defaults.
+    ///
+    /// It is possible to filter columns before and after WITH FILL, but such change is not idempotent.
+    /// So, appliying this to pair (Filter -> Filling) several times will create several similar filters.
+    // if (auto * filling = typeid_cast<FillingStep *>(child.get()))
+    // {
+    // }
+
+    /// Same reason for Cube
+    // if (auto * cube = typeid_cast<CubeStep *>(child.get()))
+    // {
+    // }
+
+    if (typeid_cast<PartialSortingStep *>(child.get())
+        || typeid_cast<MergeSortingStep *>(child.get())
+        || typeid_cast<MergingSortedStep *>(child.get())
+        || typeid_cast<FinishSortingStep *>(child.get()))
+    {
+        Names allowed_inputs = child->getOutputStream().header.getNames();
+        if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs))
+            return updated_steps;
+    }
+
+    return 0;
+}
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index e5ccc173ed8..858bde9c660 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -1,10 +1,20 @@
 #include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
+#include <Common/Exception.h>
 #include <stack>
 
-namespace DB::QueryPlanOptimizations
+namespace DB
 {
 
-void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
+namespace ErrorCodes
+{
+    extern const int TOO_MANY_QUERY_PLAN_OPTIMIZATIONS;
+}
+
+namespace QueryPlanOptimizations
+{
+
+void optimizeTree(const QueryPlanOptimizationSettings & settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes)
 {
     const auto & optimizations = getOptimizations();
 
@@ -23,6 +33,9 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
     std::stack<Frame> stack;
     stack.push(Frame{.node = &root});
 
+    size_t max_optimizations_to_apply = settings.max_optimizations_to_apply;
+    size_t total_applied_optimizations = 0;
+
     while (!stack.empty())
     {
         auto & frame = stack.top();
@@ -54,8 +67,15 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
             if (!optimization.apply)
                 continue;
 
+            if (max_optimizations_to_apply && max_optimizations_to_apply < total_applied_optimizations)
+                throw Exception(ErrorCodes::TOO_MANY_QUERY_PLAN_OPTIMIZATIONS,
+                                "Too many optimizations applied to query plan. Current limit {}",
+                                max_optimizations_to_apply);
+
             /// Try to apply optimization.
             auto update_depth = optimization.apply(frame.node, nodes);
+            if (update_depth)
+                ++total_applied_optimizations;
             max_update_depth = std::max<size_t>(max_update_depth, update_depth);
         }
 
@@ -73,3 +93,4 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
 }
 
 }
+}
diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp
index 755944fdf9f..f5d5e0d99b7 100644
--- a/src/Processors/QueryPlan/QueryPlan.cpp
+++ b/src/Processors/QueryPlan/QueryPlan.cpp
@@ -130,10 +130,10 @@ void QueryPlan::addStep(QueryPlanStepPtr step)
                     " input expected", ErrorCodes::LOGICAL_ERROR);
 }
 
-QueryPipelinePtr QueryPlan::buildQueryPipeline()
+QueryPipelinePtr QueryPlan::buildQueryPipeline(const QueryPlanOptimizationSettings & optimization_settings)
 {
     checkInitialized();
-    optimize();
+    optimize(optimization_settings);
 
     struct Frame
     {
@@ -177,7 +177,7 @@ QueryPipelinePtr QueryPlan::buildQueryPipeline()
     return last_pipeline;
 }
 
-Pipe QueryPlan::convertToPipe()
+Pipe QueryPlan::convertToPipe(const QueryPlanOptimizationSettings & optimization_settings)
 {
     if (!isInitialized())
         return {};
@@ -185,7 +185,7 @@ Pipe QueryPlan::convertToPipe()
     if (isCompleted())
         throw Exception("Cannot convert completed QueryPlan to Pipe", ErrorCodes::LOGICAL_ERROR);
 
-    return QueryPipeline::getPipe(std::move(*buildQueryPipeline()));
+    return QueryPipeline::getPipe(std::move(*buildQueryPipeline(optimization_settings)));
 }
 
 void QueryPlan::addInterpreterContext(std::shared_ptr<Context> context)
@@ -333,9 +333,9 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
     }
 }
 
-void QueryPlan::optimize()
+void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_settings)
 {
-    QueryPlanOptimizations::optimizeTree(*root, nodes);
+    QueryPlanOptimizations::optimizeTree(optimization_settings, *root, nodes);
 }
 
 }
diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h
index 9d2d7d93a36..7973f9af45a 100644
--- a/src/Processors/QueryPlan/QueryPlan.h
+++ b/src/Processors/QueryPlan/QueryPlan.h
@@ -5,6 +5,7 @@
 #include <set>
 
 #include <Core/Names.h>
+#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 
 namespace DB
 {
@@ -27,7 +28,7 @@ class Pipe;
 
 /// A tree of query steps.
 /// The goal of QueryPlan is to build QueryPipeline.
-/// QueryPlan let delay pipeline creation which is helpful for pipeline-level optimisations.
+/// QueryPlan let delay pipeline creation which is helpful for pipeline-level optimizations.
 class QueryPlan
 {
 public:
@@ -43,12 +44,12 @@ public:
     bool isCompleted() const; /// Tree is not empty and root hasOutputStream()
     const DataStream & getCurrentDataStream() const; /// Checks that (isInitialized() && !isCompleted())
 
-    void optimize();
+    void optimize(const QueryPlanOptimizationSettings & optimization_settings);
 
-    QueryPipelinePtr buildQueryPipeline();
+    QueryPipelinePtr buildQueryPipeline(const QueryPlanOptimizationSettings & optimization_settings);
 
     /// If initialized, build pipeline and convert to pipe. Otherwise, return empty pipe.
-    Pipe convertToPipe();
+    Pipe convertToPipe(const QueryPlanOptimizationSettings & optimization_settings);
 
     struct ExplainPlanOptions
     {
diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h
index 7c1638013e5..57d5cf7aad5 100644
--- a/src/Processors/QueryPlan/TotalsHavingStep.h
+++ b/src/Processors/QueryPlan/TotalsHavingStep.h
@@ -28,6 +28,8 @@ public:
 
     void describeActions(FormatSettings & settings) const override;
 
+    const ActionsDAGPtr & getActions() const { return actions_dag; }
+
 private:
     bool overflow_row;
     ActionsDAGPtr actions_dag;
diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index 34ff61d03c5..4b376cdbfb2 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -108,12 +108,13 @@ SRCS(
     QueryPlan/ITransformingStep.cpp
     QueryPlan/LimitByStep.cpp
     QueryPlan/LimitStep.cpp
-    QueryPlan/MaterializingStep.cpp
     QueryPlan/MergeSortingStep.cpp
     QueryPlan/MergingAggregatedStep.cpp
     QueryPlan/MergingFinal.cpp
     QueryPlan/MergingSortedStep.cpp
     QueryPlan/OffsetStep.cpp
+    QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
+    QueryPlan/Optimizations/filterPushDown.cpp
     QueryPlan/Optimizations/liftUpArrayJoin.cpp
     QueryPlan/Optimizations/limitPushDown.cpp
     QueryPlan/Optimizations/mergeExpressions.cpp
diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
index 81f8cc30468..355af038da9 100644
--- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
@@ -171,6 +171,8 @@ void WriteBufferFromHTTPServerResponse::finalize()
     try
     {
         next();
+        if (out)
+            out->finalize();
         out.reset();
     }
     catch (...)
diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp
index ea2813cf639..75c88a6ff93 100644
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@@ -92,6 +92,7 @@ void MySQLHandler::run()
     connection_context.makeSessionContext();
     connection_context.getClientInfo().interface = ClientInfo::Interface::MYSQL;
     connection_context.setDefaultFormat("MySQLWire");
+    connection_context.getClientInfo().connection_id = connection_id;
 
     in = std::make_shared<ReadBufferFromPocoSocket>(socket());
     out = std::make_shared<WriteBufferFromPocoSocket>(socket());
diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h
index 5568805cdfe..1418d068ffd 100644
--- a/src/Server/MySQLHandler.h
+++ b/src/Server/MySQLHandler.h
@@ -61,7 +61,7 @@ protected:
     std::shared_ptr<MySQLProtocol::PacketEndpoint> packet_endpoint;
 
 private:
-    size_t connection_id = 0;
+    UInt64 connection_id = 0;
 
     size_t server_capability_flags = 0;
     size_t client_capability_flags = 0;
diff --git a/src/Server/NuKeeperTCPHandler.cpp b/src/Server/NuKeeperTCPHandler.cpp
index e855e2c68f7..cc8f8f2ff2d 100644
--- a/src/Server/NuKeeperTCPHandler.cpp
+++ b/src/Server/NuKeeperTCPHandler.cpp
@@ -40,7 +40,7 @@ namespace ErrorCodes
 
 struct PollResult
 {
-    size_t ready_responses_count{0};
+    size_t responses_count{0};
     bool has_requests{false};
     bool error{false};
 };
@@ -70,14 +70,14 @@ struct SocketInterruptablePollWrapper
         if (epollfd < 0)
             throwFromErrno("Cannot epoll_create", ErrorCodes::SYSTEM_ERROR);
 
-        socket_event.events = EPOLLIN | EPOLLERR;
+        socket_event.events = EPOLLIN | EPOLLERR | EPOLLPRI;
         socket_event.data.fd = sockfd;
         if (epoll_ctl(epollfd, EPOLL_CTL_ADD, sockfd, &socket_event) < 0)
         {
             ::close(epollfd);
             throwFromErrno("Cannot insert socket into epoll queue", ErrorCodes::SYSTEM_ERROR);
         }
-        pipe_event.events = EPOLLIN | EPOLLERR;
+        pipe_event.events = EPOLLIN | EPOLLERR | EPOLLPRI;
         pipe_event.data.fd = pipe.fds_rw[0];
         if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipe.fds_rw[0], &pipe_event) < 0)
         {
@@ -92,97 +92,92 @@ struct SocketInterruptablePollWrapper
         return pipe.fds_rw[1];
     }
 
-    PollResult poll(Poco::Timespan remaining_time)
+    PollResult poll(Poco::Timespan remaining_time, const std::shared_ptr<ReadBufferFromPocoSocket> & in)
     {
-        std::array<int, 2> outputs = {-1, -1};
+
+        bool socket_ready = false;
+        bool fd_ready = false;
+
+        if (in->available() != 0)
+            socket_ready = true;
+
+        if (response_in.available() != 0)
+            fd_ready = true;
+
+        int rc = 0;
+        if (!fd_ready)
+        {
 #if defined(POCO_HAVE_FD_EPOLL)
-        int rc;
-        epoll_event evout[2];
-        memset(evout, 0, sizeof(evout));
-        do
-        {
-            Poco::Timestamp start;
-            rc = epoll_wait(epollfd, evout, 2, remaining_time.totalMilliseconds());
-            if (rc < 0 && errno == EINTR)
+            epoll_event evout[2];
+            evout[0].data.fd = evout[1].data.fd = -1;
+            do
             {
-                Poco::Timestamp end;
-                Poco::Timespan waited = end - start;
-                if (waited < remaining_time)
-                    remaining_time -= waited;
-                else
-                    remaining_time = 0;
-            }
-        }
-        while (rc < 0 && errno == EINTR);
-
-        if (rc >= 1 && evout[0].events & EPOLLIN)
-            outputs[0] = evout[0].data.fd;
-        if (rc == 2 && evout[1].events & EPOLLIN)
-            outputs[1] = evout[1].data.fd;
-#else
-        pollfd poll_buf[2];
-        poll_buf[0].fd = sockfd;
-        poll_buf[0].events = POLLIN;
-        poll_buf[1].fd = pipe.fds_rw[0];
-        poll_buf[1].events = POLLIN;
-
-        int rc;
-        do
-        {
-            Poco::Timestamp start;
-            rc = ::poll(poll_buf, 2, remaining_time.totalMilliseconds());
-            if (rc < 0 && errno == POCO_EINTR)
-            {
-                Poco::Timestamp end;
-                Poco::Timespan waited = end - start;
-                if (waited < remaining_time)
-                    remaining_time -= waited;
-                else
-                    remaining_time = 0;
-            }
-        }
-        while (rc < 0 && errno == POCO_EINTR);
-        if (rc >= 1 && poll_buf[0].revents & POLLIN)
-            outputs[0] = sockfd;
-        if (rc == 2 && poll_buf[1].revents & POLLIN)
-            outputs[1] = pipe.fds_rw[0];
-#endif
-
-        PollResult result{};
-        if (rc < 0)
-        {
-            result.error = true;
-            return result;
-        }
-        else if (rc == 0)
-        {
-            return result;
-        }
-        else
-        {
-            for (auto fd : outputs)
-            {
-                if (fd != -1)
+                Poco::Timestamp start;
+                rc = epoll_wait(epollfd, evout, 2, remaining_time.totalMilliseconds());
+                if (rc < 0 && errno == EINTR)
                 {
-                    if (fd == sockfd)
-                        result.has_requests = true;
+                    Poco::Timestamp end;
+                    Poco::Timespan waited = end - start;
+                    if (waited < remaining_time)
+                        remaining_time -= waited;
                     else
-                    {
-                        UInt8 dummy;
-                        do
-                        {
-                            /// All ready responses stored in responses queue,
-                            /// but we have to count amount of ready responses in pipe
-                            /// and process them only. Otherwise states of response_in
-                            /// and response queue will be inconsistent and race condition is possible.
-                            readIntBinary(dummy, response_in);
-                            result.ready_responses_count++;
-                        }
-                        while (response_in.available());
-                    }
+                        remaining_time = 0;
                 }
             }
+            while (rc < 0 && errno == EINTR);
+
+            for (int i = 0; i < rc; ++i)
+            {
+                if (evout[i].data.fd == sockfd)
+                    socket_ready = true;
+                if (evout[i].data.fd == pipe.fds_rw[0])
+                    fd_ready = true;
+            }
+#else
+            pollfd poll_buf[2];
+            poll_buf[0].fd = sockfd;
+            poll_buf[0].events = POLLIN;
+            poll_buf[1].fd = pipe.fds_rw[0];
+            poll_buf[1].events = POLLIN;
+
+            do
+            {
+                Poco::Timestamp start;
+                rc = ::poll(poll_buf, 2, remaining_time.totalMilliseconds());
+                if (rc < 0 && errno == POCO_EINTR)
+                {
+                    Poco::Timestamp end;
+                    Poco::Timespan waited = end - start;
+                    if (waited < remaining_time)
+                        remaining_time -= waited;
+                    else
+                        remaining_time = 0;
+                }
+            }
+            while (rc < 0 && errno == POCO_EINTR);
+
+            if (rc >= 1 && poll_buf[0].revents & POLLIN)
+                socket_ready = true;
+            if (rc == 2 && poll_buf[1].revents & POLLIN)
+                fd_ready = true;
+#endif
         }
+
+        PollResult result{};
+        result.has_requests = socket_ready;
+        if (fd_ready)
+        {
+            UInt8 dummy;
+            readIntBinary(dummy, response_in);
+            result.responses_count = 1;
+            auto available = response_in.available();
+            response_in.ignore(available);
+            result.responses_count += available;
+        }
+
+        if (rc < 0)
+            result.error = true;
+
         return result;
     }
 
@@ -339,43 +334,40 @@ void NuKeeperTCPHandler::runImpl()
         {
             using namespace std::chrono_literals;
 
-            PollResult result = poll_wrapper->poll(session_timeout);
+            PollResult result = poll_wrapper->poll(session_timeout, in);
             if (result.has_requests && !close_received)
             {
-                do
-                {
-                    auto [received_op, received_xid] = receiveRequest();
+                auto [received_op, received_xid] = receiveRequest();
 
-                    if (received_op == Coordination::OpNum::Close)
-                    {
-                        LOG_DEBUG(log, "Received close event with xid {} for session id #{}", received_xid, session_id);
-                        close_xid = received_xid;
-                        close_received = true;
-                        break;
-                    }
-                    else if (received_op == Coordination::OpNum::Heartbeat)
-                    {
-                        LOG_TRACE(log, "Received heartbeat for session #{}", session_id);
-                        session_stopwatch.restart();
-                    }
+                if (received_op == Coordination::OpNum::Close)
+                {
+                    LOG_DEBUG(log, "Received close event with xid {} for session id #{}", received_xid, session_id);
+                    close_xid = received_xid;
+                    close_received = true;
+                }
+                else if (received_op == Coordination::OpNum::Heartbeat)
+                {
+                    LOG_TRACE(log, "Received heartbeat for session #{}", session_id);
+                    session_stopwatch.restart();
                 }
-                while (in->available());
             }
 
             /// Process exact amount of responses from pipe
             /// otherwise state of responses queue and signaling pipe
             /// became inconsistent and race condition is possible.
-            while (result.ready_responses_count != 0)
+            while (result.responses_count != 0)
             {
                 Coordination::ZooKeeperResponsePtr response;
+
                 if (!responses->tryPop(response))
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "We must have at least {} ready responses, but queue is empty. It's a bug.", result.ready_responses_count);
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "We must have ready response, but queue is empty. It's a bug.");
 
                 if (response->xid == close_xid)
                 {
                     LOG_DEBUG(log, "Session #{} successfully closed", session_id);
                     return;
                 }
+
                 response->write(*out);
                 if (response->error == Coordination::Error::ZSESSIONEXPIRED)
                 {
@@ -383,7 +375,8 @@ void NuKeeperTCPHandler::runImpl()
                     nu_keeper_storage_dispatcher->finishSession(session_id);
                     return;
                 }
-                result.ready_responses_count--;
+
+                result.responses_count--;
             }
 
             if (result.error)
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index d2ce2a409a9..5765c3ec43e 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -717,7 +717,18 @@ void TCPHandler::processTablesStatusRequest()
         response.table_states_by_id.emplace(table_name, std::move(status));
     }
 
+
     writeVarUInt(Protocol::Server::TablesStatusResponse, *out);
+
+    /// For testing hedged requests
+    const Settings & settings = query_context->getSettingsRef();
+    if (settings.sleep_in_send_tables_status)
+    {
+        out->next();
+        std::chrono::seconds sec(settings.sleep_in_send_tables_status);
+        std::this_thread::sleep_for(sec);
+    }
+
     response.write(*out, client_tcp_protocol_version);
 }
 
@@ -1402,6 +1413,15 @@ void TCPHandler::sendData(const Block & block)
     /// Send external table name (empty name is the main table)
     writeStringBinary("", *out);
 
+    /// For testing hedged requests
+    const Settings & settings = query_context->getSettingsRef();
+    if (block.rows() > 0 && settings.sleep_in_send_data)
+    {
+        out->next();
+        std::chrono::seconds sec(settings.sleep_in_send_data);
+        std::this_thread::sleep_for(sec);
+    }
+
     state.block_out->write(block);
     state.maybe_compressed_out->next();
     out->next();
diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp
index 5f500518516..2cbc36e02fe 100644
--- a/src/Storages/IStorage.cpp
+++ b/src/Storages/IStorage.cpp
@@ -134,7 +134,7 @@ void IStorage::alter(const AlterCommands & params, const Context & context, Tabl
 }
 
 
-void IStorage::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const
+void IStorage::checkAlterIsPossible(const AlterCommands & commands, const Context & /* context */) const
 {
     for (const auto & command : commands)
     {
@@ -145,6 +145,11 @@ void IStorage::checkAlterIsPossible(const AlterCommands & commands, const Settin
     }
 }
 
+void IStorage::checkMutationIsPossible(const MutationCommands & /*commands*/, const Settings & /*settings*/) const
+{
+    throw Exception("Table engine " + getName() + " doesn't support mutations", ErrorCodes::NOT_IMPLEMENTED);
+}
+
 void IStorage::checkAlterPartitionIsPossible(
     const PartitionCommands & /*commands*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & /*settings*/) const
 {
@@ -177,6 +182,24 @@ Names IStorage::getAllRegisteredNames() const
     return result;
 }
 
+NameDependencies IStorage::getDependentViewsByColumn(const Context & context) const
+{
+    NameDependencies name_deps;
+    auto dependencies = DatabaseCatalog::instance().getDependencies(storage_id);
+    for (const auto & depend_id : dependencies)
+    {
+        auto depend_table = DatabaseCatalog::instance().getTable(depend_id, context);
+        if (depend_table->getInMemoryMetadataPtr()->select.inner_query)
+        {
+            const auto & select_query = depend_table->getInMemoryMetadataPtr()->select.inner_query;
+            auto required_columns = InterpreterSelectQuery(select_query, context, SelectQueryOptions{}.noModify()).getRequiredColumns();
+            for (const auto & col_name : required_columns)
+                name_deps[col_name].push_back(depend_id.table_name);
+        }
+    }
+    return name_deps;
+}
+
 std::string PrewhereDAGInfo::dump() const
 {
     WriteBufferFromOwnString ss;
@@ -203,14 +226,14 @@ std::string PrewhereDAGInfo::dump() const
     return ss.str();
 }
 
-std::string FilterInfo::dump() const
+std::string FilterDAGInfo::dump() const
 {
     WriteBufferFromOwnString ss;
-    ss << "FilterInfo for column '" << column_name <<"', do_remove_column "
+    ss << "FilterDAGInfo for column '" << column_name <<"', do_remove_column "
        << do_remove_column << "\n";
-    if (actions_dag)
+    if (actions)
     {
-        ss << "actions_dag " << actions_dag->dumpDAG() << "\n";
+        ss << "actions " << actions->dumpDAG() << "\n";
     }
 
     return ss.str();
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 651688f41bb..4dfd2ca50f3 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -57,6 +57,8 @@ struct StreamLocalLimits;
 class EnabledQuota;
 struct SelectQueryInfo;
 
+using NameDependencies = std::unordered_map<String, std::vector<String>>;
+
 struct ColumnSize
 {
     size_t marks = 0;
@@ -173,8 +175,10 @@ public:
     virtual NamesAndTypesList getVirtuals() const;
 
     Names getAllRegisteredNames() const override;
-protected:
 
+    NameDependencies getDependentViewsByColumn(const Context & context) const;
+
+protected:
     /// Returns whether the column is virtual - by default all columns are real.
     /// Initially reserved virtual column name may be shadowed by real column.
     bool isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const;
@@ -362,7 +366,12 @@ public:
     /** Checks that alter commands can be applied to storage. For example, columns can be modified,
       * or primary key can be changes, etc.
       */
-    virtual void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const;
+    virtual void checkAlterIsPossible(const AlterCommands & commands, const Context & context) const;
+
+    /**
+      * Checks that mutation commands can be applied to storage.
+      */
+    virtual void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const;
 
     /** ALTER tables with regard to its partitions.
       * Should handle locks for each command on its own.
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index f80020991b0..de7f3b6c0f4 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -363,7 +363,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
     new_data_part->uuid = part_uuid;
     new_data_part->is_temp = true;
     new_data_part->setColumns(block.getNamesAndTypesList());
-    new_data_part->minmax_idx.update(block, data.minmax_idx_columns);
+    new_data_part->minmax_idx.update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()));
     new_data_part->partition.create(metadata_snapshot, block, 0);
 
     MergedBlockOutputStream part_out(new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, CompressionCodecFactory::instance().get("NONE", {}));
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 2f6513bbb12..1568ca16254 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -57,13 +57,18 @@ static std::unique_ptr<ReadBufferFromFileBase> openForReading(const DiskPtr & di
 
 void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const DiskPtr & disk_, const String & part_path)
 {
-    size_t minmax_idx_size = data.minmax_idx_column_types.size();
+    auto metadata_snapshot = data.getInMemoryMetadataPtr();
+    const auto & partition_key = metadata_snapshot->getPartitionKey();
+
+    auto minmax_column_names = data.getMinMaxColumnsNames(partition_key);
+    auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key);
+    size_t minmax_idx_size = minmax_column_types.size();
     hyperrectangle.reserve(minmax_idx_size);
     for (size_t i = 0; i < minmax_idx_size; ++i)
     {
-        String file_name = part_path + "minmax_" + escapeForFileName(data.minmax_idx_columns[i]) + ".idx";
+        String file_name = part_path + "minmax_" + escapeForFileName(minmax_column_names[i]) + ".idx";
         auto file = openForReading(disk_, file_name);
-        const DataTypePtr & data_type = data.minmax_idx_column_types[i];
+        const DataTypePtr & data_type = minmax_column_types[i];
 
         Field min_val;
         data_type->deserializeBinary(min_val, *file);
@@ -78,7 +83,13 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Dis
 void IMergeTreeDataPart::MinMaxIndex::store(
     const MergeTreeData & data, const DiskPtr & disk_, const String & part_path, Checksums & out_checksums) const
 {
-    store(data.minmax_idx_columns, data.minmax_idx_column_types, disk_, part_path, out_checksums);
+    auto metadata_snapshot = data.getInMemoryMetadataPtr();
+    const auto & partition_key = metadata_snapshot->getPartitionKey();
+
+    auto minmax_column_names = data.getMinMaxColumnsNames(partition_key);
+    auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key);
+
+    store(minmax_column_names, minmax_column_types, disk_, part_path, out_checksums);
 }
 
 void IMergeTreeDataPart::MinMaxIndex::store(
@@ -1168,6 +1179,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const
 
     auto metadata_snapshot = storage.getInMemoryMetadataPtr();
     const auto & pk = metadata_snapshot->getPrimaryKey();
+    const auto & partition_key = metadata_snapshot->getPartitionKey();
     if (!checksums.empty())
     {
         if (!pk.column_names.empty() && !checksums.files.count("primary.idx"))
@@ -1183,7 +1195,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const
 
             if (!isEmpty())
             {
-                for (const String & col_name : storage.minmax_idx_columns)
+                for (const String & col_name : storage.getMinMaxColumnsNames(partition_key))
                 {
                     if (!checksums.files.count("minmax_" + escapeForFileName(col_name) + ".idx"))
                         throw Exception("No minmax idx file checksum for column " + col_name, ErrorCodes::NO_FILE_IN_DATA_PART);
@@ -1214,7 +1226,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const
             if (metadata_snapshot->hasPartitionKey())
                 check_file_not_empty(volume->getDisk(), path + "partition.dat");
 
-            for (const String & col_name : storage.minmax_idx_columns)
+            for (const String & col_name : storage.getMinMaxColumnsNames(partition_key))
                 check_file_not_empty(volume->getDisk(), path + "minmax_" + escapeForFileName(col_name) + ".idx");
         }
     }
diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index ce60856505e..6bf164dd824 100644
--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -337,12 +337,28 @@ void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const P
         if (prewhere_info->alias_actions)
             prewhere_info->alias_actions->execute(block);
 
-        prewhere_info->prewhere_actions->execute(block);
-        auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name);
+        if (prewhere_info->row_level_filter)
+        {
+            prewhere_info->row_level_filter->execute(block);
+            auto & row_level_column = block.getByName(prewhere_info->row_level_column_name);
+            if (!row_level_column.type->canBeUsedInBooleanContext())
+            {
+                throw Exception("Invalid type for filter in PREWHERE: " + row_level_column.type->getName(),
+                    ErrorCodes::LOGICAL_ERROR);
+            }
 
+            block.erase(prewhere_info->row_level_column_name);
+        }
+
+        if (prewhere_info->prewhere_actions)
+            prewhere_info->prewhere_actions->execute(block);
+
+        auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name);
         if (!prewhere_column.type->canBeUsedInBooleanContext())
+        {
             throw Exception("Invalid type for filter in PREWHERE: " + prewhere_column.type->getName(),
-                            ErrorCodes::LOGICAL_ERROR);
+                ErrorCodes::LOGICAL_ERROR);
+        }
 
         if (prewhere_info->remove_prewhere_column)
             block.erase(prewhere_info->prewhere_column_name);
diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
index f8b5e0a9c0a..10ce061a864 100644
--- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
+++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
@@ -272,8 +272,21 @@ MergeTreeReadTaskColumns getReadTaskColumns(
         if (prewhere_info->alias_actions)
             pre_column_names = prewhere_info->alias_actions->getRequiredColumns();
         else
+        {
             pre_column_names = prewhere_info->prewhere_actions->getRequiredColumns();
 
+            if (prewhere_info->row_level_filter)
+            {
+                NameSet names(pre_column_names.begin(), pre_column_names.end());
+
+                for (auto & name : prewhere_info->row_level_filter->getRequiredColumns())
+                {
+                    if (names.count(name) == 0)
+                        pre_column_names.push_back(name);
+                }
+            }
+        }
+
         if (pre_column_names.empty())
             pre_column_names.push_back(column_names[0]);
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index a0d23b8ab22..0c22d5fbc0f 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -4,23 +4,23 @@
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeEnum.h>
-#include <DataTypes/DataTypeUUID.h>
-#include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/NestedUtils.h>
 #include <Formats/FormatFactory.h>
-#include <Processors/Formats/InputStreamFromInputFormat.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/IFunction.h>
 #include <IO/ConcatReadBuffer.h>
-#include <IO/HexWriteBuffer.h>
 #include <IO/Operators.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/WriteBufferFromString.h>
+#include <Interpreters/Context.h>
 #include <Interpreters/ExpressionAnalyzer.h>
+#include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/PartLog.h>
 #include <Interpreters/TreeRewriter.h>
-#include <Interpreters/Context.h>
+#include <Interpreters/inplaceBlockConversions.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTNameTypePair.h>
@@ -29,10 +29,11 @@
 #include <Parsers/ExpressionListParsers.h>
 #include <Parsers/parseQuery.h>
 #include <Parsers/queryToString.h>
+#include <Processors/Formats/InputStreamFromInputFormat.h>
 #include <Storages/AlterCommands.h>
 #include <Storages/MergeTree/MergeTreeData.h>
-#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergeTreeDataPartCompact.h>
+#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergeTreeDataPartWide.h>
 #include <Storages/MergeTree/MergeTreeSequentialSource.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
@@ -165,7 +166,9 @@ MergeTreeData::MergeTreeData(
     {
         try
         {
+
             checkPartitionKeyAndInitMinMax(metadata_.partition_key);
+            setProperties(metadata_, metadata_, attach);
             if (minmax_idx_date_column_pos == -1)
                 throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD);
         }
@@ -182,7 +185,6 @@ MergeTreeData::MergeTreeData(
         checkPartitionKeyAndInitMinMax(metadata_.partition_key);
         min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING;
     }
-
     setProperties(metadata_, metadata_, attach);
 
     /// NOTE: using the same columns list as is read when performing actual merges.
@@ -422,6 +424,29 @@ ExpressionActionsPtr getCombinedIndicesExpression(
 
 }
 
+ExpressionActionsPtr MergeTreeData::getMinMaxExpr(const KeyDescription & partition_key)
+{
+    NamesAndTypesList partition_key_columns;
+    if (!partition_key.column_names.empty())
+        partition_key_columns = partition_key.expression->getRequiredColumnsWithTypes();
+
+    return std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(partition_key_columns));
+}
+
+Names MergeTreeData::getMinMaxColumnsNames(const KeyDescription & partition_key)
+{
+    if (!partition_key.column_names.empty())
+        return partition_key.expression->getRequiredColumns();
+    return {};
+}
+
+DataTypes MergeTreeData::getMinMaxColumnsTypes(const KeyDescription & partition_key)
+{
+    if (!partition_key.column_names.empty())
+        return partition_key.expression->getRequiredColumnsWithTypes().getTypes();
+    return {};
+}
+
 ExpressionActionsPtr MergeTreeData::getPrimaryKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const
 {
     return getCombinedIndicesExpression(metadata_snapshot->getPrimaryKey(), metadata_snapshot->getSecondaryIndices(), metadata_snapshot->getColumns(), global_context);
@@ -441,19 +466,13 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa
     checkKeyExpression(*new_partition_key.expression, new_partition_key.sample_block, "Partition", allow_nullable_key);
 
     /// Add all columns used in the partition key to the min-max index.
-    const NamesAndTypesList & minmax_idx_columns_with_types = new_partition_key.expression->getRequiredColumnsWithTypes();
-    minmax_idx_expr = std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(minmax_idx_columns_with_types));
-    for (const NameAndTypePair & column : minmax_idx_columns_with_types)
-    {
-        minmax_idx_columns.emplace_back(column.name);
-        minmax_idx_column_types.emplace_back(column.type);
-    }
+    DataTypes minmax_idx_columns_types = getMinMaxColumnsTypes(new_partition_key);
 
     /// Try to find the date column in columns used by the partition key (a common case).
     bool encountered_date_column = false;
-    for (size_t i = 0; i < minmax_idx_column_types.size(); ++i)
+    for (size_t i = 0; i < minmax_idx_columns_types.size(); ++i)
     {
-        if (typeid_cast<const DataTypeDate *>(minmax_idx_column_types[i].get()))
+        if (typeid_cast<const DataTypeDate *>(minmax_idx_columns_types[i].get()))
         {
             if (!encountered_date_column)
             {
@@ -469,9 +488,9 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa
     }
     if (!encountered_date_column)
     {
-        for (size_t i = 0; i < minmax_idx_column_types.size(); ++i)
+        for (size_t i = 0; i < minmax_idx_columns_types.size(); ++i)
         {
-            if (typeid_cast<const DataTypeDateTime *>(minmax_idx_column_types[i].get()))
+            if (typeid_cast<const DataTypeDateTime *>(minmax_idx_columns_types[i].get()))
             {
                 if (!encountered_date_column)
                 {
@@ -1406,12 +1425,14 @@ void checkVersionColumnTypesConversion(const IDataType * old_type, const IDataTy
 
 }
 
-void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const
+void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const Context & context) const
 {
     /// Check that needed transformations can be applied to the list of columns without considering type conversions.
     StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
     StorageInMemoryMetadata old_metadata = getInMemoryMetadata();
 
+    const auto & settings = context.getSettingsRef();
+
     if (!settings.allow_non_metadata_alters)
     {
 
@@ -1482,6 +1503,8 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
     for (const auto & column : old_metadata.getColumns().getAllPhysical())
         old_types.emplace(column.name, column.type.get());
 
+    NamesAndTypesList columns_to_check_conversion;
+    auto name_deps = getDependentViewsByColumn(context);
     for (const AlterCommand & command : commands)
     {
         /// Just validate partition expression
@@ -1561,6 +1584,16 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
                     "Trying to ALTER DROP key " + backQuoteIfNeed(command.column_name) + " column which is a part of key expression",
                     ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
             }
+
+            const auto & deps_mv = name_deps[command.column_name];
+            if (!deps_mv.empty())
+            {
+                throw Exception(
+                    "Trying to ALTER DROP column " + backQuoteIfNeed(command.column_name) + " which is referenced by materialized view "
+                        + toString(deps_mv),
+                    ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
+            }
+
             dropped_columns.emplace(command.column_name);
         }
         else if (command.isRequireMutationStage(getInMemoryMetadata()))
@@ -1571,9 +1604,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
                 throw Exception("ALTER of key column " + backQuoteIfNeed(command.column_name) + " is forbidden",
                     ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
 
-            if (columns_alter_type_check_safe_for_partition.count(command.column_name))
+            if (command.type == AlterCommand::MODIFY_COLUMN)
             {
-                if (command.type == AlterCommand::MODIFY_COLUMN)
+                if (columns_alter_type_check_safe_for_partition.count(command.column_name))
                 {
                     auto it = old_types.find(command.column_name);
 
@@ -1584,11 +1617,8 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
                                 + " is not safe because it can change the representation of partition key",
                             ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
                 }
-            }
 
-            if (columns_alter_type_metadata_only.count(command.column_name))
-            {
-                if (command.type == AlterCommand::MODIFY_COLUMN)
+                if (columns_alter_type_metadata_only.count(command.column_name))
                 {
                     auto it = old_types.find(command.column_name);
                     assert(it != old_types.end());
@@ -1598,6 +1628,12 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
                                     + " is not safe because it can change the representation of primary key",
                             ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
                 }
+
+                if (old_metadata.getColumns().has(command.column_name))
+                {
+                    columns_to_check_conversion.push_back(
+                        new_metadata.getColumns().getPhysical(command.column_name));
+                }
             }
         }
     }
@@ -1605,6 +1641,12 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
     checkProperties(new_metadata, old_metadata);
     checkTTLExpressions(new_metadata, old_metadata);
 
+    if (!columns_to_check_conversion.empty())
+    {
+        auto old_header = old_metadata.getSampleBlock();
+        performRequiredConversions(old_header, columns_to_check_conversion, global_context);
+    }
+
     if (old_metadata.hasSettingsChanges())
     {
         const auto current_changes = old_metadata.getSettingsChanges()->as<const ASTSetQuery &>().changes;
@@ -1659,6 +1701,12 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
     }
 }
 
+
+void MergeTreeData::checkMutationIsPossible(const MutationCommands & /*commands*/, const Settings & /*settings*/) const
+{
+    /// Some validation will be added
+}
+
 MergeTreeDataPartType MergeTreeData::choosePartType(size_t bytes_uncompressed, size_t rows_count) const
 {
     const auto settings = getSettings();
@@ -3487,7 +3535,7 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(
         if (column_name == name)
             return true;
 
-    for (const auto & name : minmax_idx_columns)
+    for (const auto & name : getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()))
         if (column_name == name)
             return true;
 
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 2aefa66ac58..15059ab47e5 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -517,7 +517,11 @@ public:
     /// - all type conversions can be done.
     /// - columns corresponding to primary key, indices, sign, sampling expression and date are not affected.
     /// If something is wrong, throws an exception.
-    void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const override;
+    void checkAlterIsPossible(const AlterCommands & commands, const Context & context) const override;
+
+    /// Checks if the Mutation can be performed.
+    /// (currently no additional checks: always ok)
+    void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override;
 
     /// Checks that partition name in all commands is valid
     void checkAlterPartitionIsPossible(const PartitionCommands & commands, const StorageMetadataPtr & metadata_snapshot, const Settings & settings) const override;
@@ -688,12 +692,17 @@ public:
 
     bool is_custom_partitioned = false;
 
-    ExpressionActionsPtr minmax_idx_expr;
-    Names minmax_idx_columns;
-    DataTypes minmax_idx_column_types;
+    /// Used only for old syntax tables. Never changes after init.
     Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column.
     Int64 minmax_idx_time_column_pos = -1; /// In other cases, minmax index often includes a dateTime column.
 
+    /// Get partition key expression on required columns
+    static ExpressionActionsPtr getMinMaxExpr(const KeyDescription & partition_key);
+    /// Get column names required for partition key
+    static Names getMinMaxColumnsNames(const KeyDescription & partition_key);
+    /// Get column types required for partition key
+    static DataTypes getMinMaxColumnsTypes(const KeyDescription & partition_key);
+
     ExpressionActionsPtr getPrimaryKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const;
     ExpressionActionsPtr getSortingKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const;
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index c571a53d4c8..f2f8172837c 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -1779,7 +1779,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns(
     Block block;
     while (checkOperationIsNotCanceled(merge_entry) && (block = mutating_stream->read()))
     {
-        minmax_idx.update(block, data.minmax_idx_columns);
+        minmax_idx.update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()));
         out.write(block);
 
         merge_entry->rows_written += block.rows();
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index d23413f4a84..b1f3f524beb 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -242,16 +242,21 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
 
     std::optional<KeyCondition> minmax_idx_condition;
     std::optional<PartitionPruner> partition_pruner;
-    if (data.minmax_idx_expr)
+    DataTypes minmax_columns_types;
+    if (metadata_snapshot->hasPartitionKey())
     {
-        minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr);
+        const auto & partition_key = metadata_snapshot->getPartitionKey();
+        auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key);
+        minmax_columns_types = data.getMinMaxColumnsTypes(partition_key);
+
+        minmax_idx_condition.emplace(query_info, context, minmax_columns_names, data.getMinMaxExpr(partition_key));
         partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context, false /* strict */);
 
         if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless()))
         {
             String msg = "Neither MinMax index by columns (";
             bool first = true;
-            for (const String & col : data.minmax_idx_columns)
+            for (const String & col : minmax_columns_names)
             {
                 if (first)
                     first = false;
@@ -268,9 +273,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
     const Context & query_context = context.hasQueryContext() ? context.getQueryContext() : context;
 
     if (query_context.getSettingsRef().allow_experimental_query_deduplication)
-        selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, partition_pruner, max_block_numbers_to_read, query_context);
+        selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, query_context);
     else
-        selectPartsToRead(parts, part_values, minmax_idx_condition, partition_pruner, max_block_numbers_to_read);
+        selectPartsToRead(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read);
 
 
     /// Sampling.
@@ -1885,8 +1890,9 @@ void MergeTreeDataSelectExecutor::selectPartsToRead(
     MergeTreeData::DataPartsVector & parts,
     const std::unordered_set<String> & part_values,
     const std::optional<KeyCondition> & minmax_idx_condition,
+    const DataTypes & minmax_columns_types,
     std::optional<PartitionPruner> & partition_pruner,
-    const PartitionIdToMaxBlock * max_block_numbers_to_read) const
+    const PartitionIdToMaxBlock * max_block_numbers_to_read)
 {
     auto prev_parts = parts;
     parts.clear();
@@ -1900,7 +1906,7 @@ void MergeTreeDataSelectExecutor::selectPartsToRead(
             continue;
 
         if (minmax_idx_condition && !minmax_idx_condition->checkInHyperrectangle(
-                part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true)
+                part->minmax_idx.hyperrectangle, minmax_columns_types).can_be_true)
             continue;
 
         if (partition_pruner)
@@ -1924,6 +1930,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
     MergeTreeData::DataPartsVector & parts,
     const std::unordered_set<String> & part_values,
     const std::optional<KeyCondition> & minmax_idx_condition,
+    const DataTypes & minmax_columns_types,
     std::optional<PartitionPruner> & partition_pruner,
     const PartitionIdToMaxBlock * max_block_numbers_to_read,
     const Context & query_context) const
@@ -1950,7 +1957,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
                 continue;
 
             if (minmax_idx_condition
-                && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, data.minmax_idx_column_types)
+                && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, minmax_columns_types)
                         .can_be_true)
                 continue;
 
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index 7692424dfb5..634719639ad 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -119,18 +119,20 @@ private:
 
     /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`,
     ///  as well as `max_block_number_to_read`.
-    void selectPartsToRead(
+    static void selectPartsToRead(
         MergeTreeData::DataPartsVector & parts,
         const std::unordered_set<String> & part_values,
         const std::optional<KeyCondition> & minmax_idx_condition,
+        const DataTypes & minmax_columns_types,
         std::optional<PartitionPruner> & partition_pruner,
-        const PartitionIdToMaxBlock * max_block_numbers_to_read) const;
+        const PartitionIdToMaxBlock * max_block_numbers_to_read);
 
     /// Same as previous but also skip parts uuids if any to the query context, or skip parts which uuids marked as excluded.
     void selectPartsToReadWithUUIDFilter(
         MergeTreeData::DataPartsVector & parts,
         const std::unordered_set<String> & part_values,
         const std::optional<KeyCondition> & minmax_idx_condition,
+        const DataTypes & minmax_columns_types,
         std::optional<PartitionPruner> & partition_pruner,
         const PartitionIdToMaxBlock * max_block_numbers_to_read,
         const Context & query_context) const;
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 5a9bdd90bc8..f478cdba40a 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -268,7 +268,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     Int64 temp_index = data.insert_increment.get();
 
     IMergeTreeDataPart::MinMaxIndex minmax_idx;
-    minmax_idx.update(block, data.minmax_idx_columns);
+    minmax_idx.update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()));
 
     MergeTreePartition partition(std::move(block_with_partition.partition));
 
diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index 6d4d8737f10..e72039f7172 100644
--- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -33,6 +33,25 @@ static void filterColumns(Columns & columns, const IColumn::Filter & filter)
     }
 }
 
+static void filterColumns(Columns & columns, const ColumnPtr & filter)
+{
+    ConstantFilterDescription const_descr(*filter);
+    if (const_descr.always_true)
+        return;
+
+    if (const_descr.always_false)
+    {
+        for (auto & col : columns)
+            if (col)
+                col = col->cloneEmpty();
+
+        return;
+    }
+
+    FilterDescription descr(*filter);
+    filterColumns(columns, *descr.data);
+}
+
 
 MergeTreeRangeReader::DelayedStream::DelayedStream(
         size_t from_mark, IMergeTreeReader * merge_tree_reader_)
@@ -284,6 +303,13 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns)
     {
         if (!column)
             continue;
+
+        if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
+        {
+            column = column_const->cloneResized(total_rows_per_granule);
+            continue;
+        }
+
         auto new_column = column->cloneEmpty();
         new_column->reserve(total_rows_per_granule);
         for (size_t j = 0, pos = 0; j < rows_per_granule_original.size(); pos += rows_per_granule_original[j++])
@@ -308,7 +334,7 @@ void MergeTreeRangeReader::ReadResult::setFilterConstFalse()
     num_rows = 0;
 }
 
-void MergeTreeRangeReader::ReadResult::optimize(bool can_read_incomplete_granules)
+void MergeTreeRangeReader::ReadResult::optimize(bool can_read_incomplete_granules, bool allow_filter_columns)
 {
     if (total_rows_per_granule == 0 || filter == nullptr)
         return;
@@ -340,7 +366,7 @@ void MergeTreeRangeReader::ReadResult::optimize(bool can_read_incomplete_granule
         filter_holder_original = std::move(filter_holder);
 
         /// Check if const 1 after shrink
-        if (countBytesInResultFilter(filter->getData()) + total_zero_rows_in_tails == total_rows_per_granule)
+        if (allow_filter_columns && countBytesInResultFilter(filter->getData()) + total_zero_rows_in_tails == total_rows_per_granule)
         {
             total_rows_per_granule = total_rows_per_granule - total_zero_rows_in_tails;
             num_rows = total_rows_per_granule;
@@ -490,11 +516,14 @@ size_t MergeTreeRangeReader::ReadResult::countBytesInResultFilter(const IColumn:
 MergeTreeRangeReader::MergeTreeRangeReader(
     IMergeTreeReader * merge_tree_reader_,
     MergeTreeRangeReader * prev_reader_,
-    const PrewhereInfoPtr & prewhere_,
+    const PrewhereInfoPtr & prewhere_info_,
     bool last_reader_in_chain_)
     : merge_tree_reader(merge_tree_reader_)
-    , index_granularity(&(merge_tree_reader->data_part->index_granularity)), prev_reader(prev_reader_)
-    , prewhere(prewhere_), last_reader_in_chain(last_reader_in_chain_), is_initialized(true)
+    , index_granularity(&(merge_tree_reader->data_part->index_granularity))
+    , prev_reader(prev_reader_)
+    , prewhere_info(prewhere_info_)
+    , last_reader_in_chain(last_reader_in_chain_)
+    , is_initialized(true)
 {
     if (prev_reader)
         sample_block = prev_reader->getSampleBlock();
@@ -502,16 +531,22 @@ MergeTreeRangeReader::MergeTreeRangeReader(
     for (const auto & name_and_type : merge_tree_reader->getColumns())
         sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name});
 
-    if (prewhere)
+    if (prewhere_info)
     {
-        if (prewhere->alias_actions)
-            prewhere->alias_actions->execute(sample_block, true);
+        if (prewhere_info->alias_actions)
+            prewhere_info->alias_actions->execute(sample_block, true);
 
-        if (prewhere->prewhere_actions)
-            prewhere->prewhere_actions->execute(sample_block, true);
+        if (prewhere_info->row_level_filter)
+        {
+            prewhere_info->row_level_filter->execute(sample_block, true);
+            sample_block.erase(prewhere_info->row_level_column_name);
+        }
 
-        if (prewhere->remove_prewhere_column)
-            sample_block.erase(prewhere->prewhere_column_name);
+        if (prewhere_info->prewhere_actions)
+            prewhere_info->prewhere_actions->execute(sample_block, true);
+
+        if (prewhere_info->remove_prewhere_column)
+            sample_block.erase(prewhere_info->prewhere_column_name);
     }
 }
 
@@ -799,9 +834,63 @@ Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t &
     return columns;
 }
 
+static void checkCombindeFiltersSize(size_t bytes_in_first_filter, size_t second_filter_size)
+{
+    if (bytes_in_first_filter != second_filter_size)
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+            "Cannot combine filters because number of bytes in a first filter ({}) "
+            "does not match second filter size ({})", bytes_in_first_filter, second_filter_size);
+}
+
+static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second)
+{
+    ConstantFilterDescription firsrt_const_descr(*first);
+
+    if (firsrt_const_descr.always_true)
+    {
+        checkCombindeFiltersSize(first->size(), second->size());
+        return second;
+    }
+
+    if (firsrt_const_descr.always_false)
+    {
+        checkCombindeFiltersSize(0, second->size());
+        return first;
+    }
+
+    auto mut_first = IColumn::mutate(std::move(first));
+    FilterDescription firsrt_descr(*mut_first);
+
+    size_t bytes_in_first_filter = countBytesInFilter(*firsrt_descr.data);
+    checkCombindeFiltersSize(bytes_in_first_filter, second->size());
+
+    ConstantFilterDescription second_const_descr(*second);
+
+    if (second_const_descr.always_true)
+        return mut_first;
+
+    if (second_const_descr.always_false)
+        return second->cloneResized(mut_first->size());
+
+    FilterDescription second_descr(*second);
+    auto & first_data = const_cast<IColumn::Filter &>(*firsrt_descr.data);
+    const auto * second_data = second_descr.data->data();
+
+    for (auto & val : first_data)
+    {
+        if (val)
+        {
+            val = *second_data;
+            ++second_data;
+        }
+    }
+
+    return mut_first;
+}
+
 void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result)
 {
-    if (!prewhere)
+    if (!prewhere_info)
         return;
 
     const auto & header = merge_tree_reader->getColumns();
@@ -813,6 +902,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
                         "got " + toString(result.columns.size()), ErrorCodes::LOGICAL_ERROR);
 
     ColumnPtr filter;
+    ColumnPtr row_level_filter;
     size_t prewhere_column_pos;
 
     {
@@ -832,14 +922,27 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
         for (auto name_and_type = header.begin(); pos < num_columns; ++pos, ++name_and_type)
             block.insert({result.columns[pos], name_and_type->type, name_and_type->name});
 
-        if (prewhere->alias_actions)
-            prewhere->alias_actions->execute(block);
+        if (prewhere_info->alias_actions)
+            prewhere_info->alias_actions->execute(block);
 
         /// Columns might be projected out. We need to store them here so that default columns can be evaluated later.
         result.block_before_prewhere = block;
-        prewhere->prewhere_actions->execute(block);
 
-        prewhere_column_pos = block.getPositionByName(prewhere->prewhere_column_name);
+        if (prewhere_info->row_level_filter)
+        {
+            prewhere_info->row_level_filter->execute(block);
+            auto row_level_filter_pos = block.getPositionByName(prewhere_info->row_level_column_name);
+            row_level_filter = block.getByPosition(row_level_filter_pos).column;
+            block.erase(row_level_filter_pos);
+
+            auto columns = block.getColumns();
+            filterColumns(columns, row_level_filter);
+            block.setColumns(columns);
+        }
+
+        prewhere_info->prewhere_actions->execute(block);
+
+        prewhere_column_pos = block.getPositionByName(prewhere_info->prewhere_column_name);
 
         result.columns.clear();
         result.columns.reserve(block.columns());
@@ -857,17 +960,24 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
                         ErrorCodes::LOGICAL_ERROR);
     }
 
-    result.setFilter(filter);
+    if (filter && row_level_filter)
+    {
+        row_level_filter = combineFilters(std::move(row_level_filter), filter);
+        result.setFilter(row_level_filter);
+
+    }
+    else
+        result.setFilter(filter);
 
     /// If there is a WHERE, we filter in there, and only optimize IO and shrink columns here
     if (!last_reader_in_chain)
-        result.optimize(merge_tree_reader->canReadIncompleteGranules());
+        result.optimize(merge_tree_reader->canReadIncompleteGranules(), prewhere_info->row_level_filter == nullptr);
 
     /// If we read nothing or filter gets optimized to nothing
     if (result.totalRowsPerGranule() == 0)
         result.setFilterConstFalse();
     /// If we need to filter in PREWHERE
-    else if (prewhere->need_filter || result.need_filter || prewhere->remove_prewhere_column)
+    else if (prewhere_info->need_filter || result.need_filter || prewhere_info->row_level_filter)
     {
         /// If there is a filter and without optimized
         if (result.getFilter() && last_reader_in_chain)
@@ -886,7 +996,12 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
         {
             /// filter might be shrunk while columns not
             const auto * result_filter = result.getFilterOriginal();
-            filterColumns(result.columns, result_filter->getData());
+
+            if (row_level_filter)
+                filterColumns(result.columns, filter);
+            else
+                filterColumns(result.columns, result_filter->getData());
+
             result.need_filter = true;
 
             bool has_column = false;
@@ -908,11 +1023,11 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
         /// Check if the PREWHERE column is needed
         if (!result.columns.empty())
         {
-            if (prewhere->remove_prewhere_column)
+            if (prewhere_info->remove_prewhere_column)
                 result.columns.erase(result.columns.begin() + prewhere_column_pos);
             else
                 result.columns[prewhere_column_pos] =
-                        getSampleBlock().getByName(prewhere->prewhere_column_name).type->
+                        getSampleBlock().getByName(prewhere_info->prewhere_column_name).type->
                                 createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst();
         }
     }
@@ -920,7 +1035,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
     else
     {
         result.columns[prewhere_column_pos] = result.getFilterHolder()->convertToFullColumnIfConst();
-        if (getSampleBlock().getByName(prewhere->prewhere_column_name).type->isNullable())
+        if (getSampleBlock().getByName(prewhere_info->prewhere_column_name).type->isNullable())
             result.columns[prewhere_column_pos] = makeNullable(std::move(result.columns[prewhere_column_pos]));
         result.clearFilter(); // Acting as a flag to not filter in PREWHERE
     }
diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h
index 381b87ecffd..18075e52bdd 100644
--- a/src/Storages/MergeTree/MergeTreeRangeReader.h
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.h
@@ -24,7 +24,7 @@ public:
     MergeTreeRangeReader(
         IMergeTreeReader * merge_tree_reader_,
         MergeTreeRangeReader * prev_reader_,
-        const PrewhereInfoPtr & prewhere_,
+        const PrewhereInfoPtr & prewhere_info_,
         bool last_reader_in_chain_);
 
     MergeTreeRangeReader() = default;
@@ -156,7 +156,7 @@ public:
         /// Set filter or replace old one. Filter must have more zeroes than previous.
         void setFilter(const ColumnPtr & new_filter);
         /// For each granule calculate the number of filtered rows at the end. Remove them and update filter.
-        void optimize(bool can_read_incomplete_granules);
+        void optimize(bool can_read_incomplete_granules, bool allow_filter_columns);
         /// Remove all rows from granules.
         void clear();
 
@@ -217,7 +217,7 @@ private:
     IMergeTreeReader * merge_tree_reader = nullptr;
     const MergeTreeIndexGranularity * index_granularity = nullptr;
     MergeTreeRangeReader * prev_reader = nullptr; /// If not nullptr, read from prev_reader firstly.
-    PrewhereInfoPtr prewhere;
+    PrewhereInfoPtr prewhere_info;
 
     Stream stream;
 
diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
index e726f0ffd51..4ca20572e90 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
@@ -147,7 +147,6 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor
             }
             else if (action_type == ActionType::ADD_PART)
             {
-                auto part_disk = storage.reserveSpace(0)->getDisk();
                 auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + part_name, disk, 0);
 
                 part = storage.createPart(
@@ -192,7 +191,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor
         {
             MergedBlockOutputStream part_out(part, metadata_snapshot, block.getNamesAndTypesList(), {}, CompressionCodecFactory::instance().get("NONE", {}));
 
-            part->minmax_idx.update(block, storage.minmax_idx_columns);
+            part->minmax_idx.update(block, storage.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()));
             part->partition.create(metadata_snapshot, block, 0);
             if (metadata_snapshot->hasSortingKey())
                 metadata_snapshot->getSortingKey().expression->execute(block);
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp
index d06706f9109..ac1c92849d5 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp
@@ -26,7 +26,10 @@ static String formattedAST(const ASTPtr & ast)
 ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTreeData & data, const StorageMetadataPtr & metadata_snapshot)
 {
     if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
-        date_column = data.minmax_idx_columns[data.minmax_idx_date_column_pos];
+    {
+        auto minmax_idx_column_names = data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey());
+        date_column = minmax_idx_column_names[data.minmax_idx_date_column_pos];
+    }
 
     const auto data_settings = data.getSettings();
     sampling_expression = formattedAST(metadata_snapshot->getSamplingKeyAST());
diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
index 98e1880de54..1d011effc69 100644
--- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
@@ -33,7 +33,7 @@ public:
             std::move(*MergeTreeDataSelectExecutor(part->storage)
                       .readFromParts({part}, column_names, metadata_snapshot, query_info, context, max_block_size, num_streams));
 
-        return query_plan.convertToPipe();
+        return query_plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef()));
     }
 
 
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 10ebfa5ce1d..d48f85f3e4d 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -427,7 +427,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
                     "No replica name in config" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::NO_REPLICA_NAME_GIVEN);
             ++arg_num;
         }
-        else if (is_extended_storage_def && arg_cnt == 0)
+        else if (is_extended_storage_def && (arg_cnt == 0 || !engine_args[arg_num]->as<ASTLiteral>() || (arg_cnt == 1 && merging_params.mode == MergeTreeData::MergingParams::Graphite)))
         {
             /// Try use default values if arguments are not specified.
             /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic.
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index 48305ab1b61..0ecf85e5c3d 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -99,9 +99,7 @@ StorageRabbitMQ::StorageRabbitMQ(
         , unique_strbase(getRandomName())
         , queue_size(std::max(QUEUE_SIZE, static_cast<uint32_t>(getMaxBlockSize())))
 {
-    loop = std::make_unique<uv_loop_t>();
-    uv_loop_init(loop.get());
-    event_handler = std::make_shared<RabbitMQHandler>(loop.get(), log);
+    event_handler = std::make_shared<RabbitMQHandler>(loop.getLoop(), log);
     restoreConnection(false);
 
     StorageInMemoryMetadata storage_metadata;
@@ -482,7 +480,7 @@ bool StorageRabbitMQ::restoreConnection(bool reconnecting)
         /* Connection is not closed immediately (firstly, all pending operations are completed, and then
          * an AMQP closing-handshake is  performed). But cannot open a new connection until previous one is properly closed
          */
-        while (!connection->closed() && ++cnt_retries != RETRIES_MAX)
+        while (!connection->closed() && cnt_retries++ != RETRIES_MAX)
             event_handler->iterateLoop();
 
         /// This will force immediate closure if not yet closed
@@ -498,7 +496,7 @@ bool StorageRabbitMQ::restoreConnection(bool reconnecting)
                 AMQP::Login(login_password.first, login_password.second), vhost));
 
     cnt_retries = 0;
-    while (!connection->ready() && !stream_cancelled && ++cnt_retries != RETRIES_MAX)
+    while (!connection->ready() && !stream_cancelled && cnt_retries++ != RETRIES_MAX)
     {
         event_handler->iterateLoop();
         std::this_thread::sleep_for(std::chrono::milliseconds(CONNECT_SLEEP));
@@ -653,7 +651,7 @@ void StorageRabbitMQ::shutdown()
     connection->close();
 
     size_t cnt_retries = 0;
-    while (!connection->closed() && ++cnt_retries != RETRIES_MAX)
+    while (!connection->closed() && cnt_retries++ != RETRIES_MAX)
         event_handler->iterateLoop();
 
     /// Should actually force closure, if not yet closed, but it generates distracting error logs
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h
index aa316e7a842..9f573ea4a3e 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.h
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h
@@ -9,6 +9,7 @@
 #include <Storages/RabbitMQ/Buffer_fwd.h>
 #include <Storages/RabbitMQ/RabbitMQHandler.h>
 #include <Storages/RabbitMQ/RabbitMQSettings.h>
+#include <Storages/RabbitMQ/UVLoop.h>
 #include <Common/thread_local_rng.h>
 #include <amqpcpp/libuv.h>
 #include <uv.h>
@@ -96,7 +97,7 @@ private:
     std::pair<String, String> login_password;
     String vhost;
 
-    std::unique_ptr<uv_loop_t> loop;
+    UVLoop loop;
     std::shared_ptr<RabbitMQHandler> event_handler;
     std::unique_ptr<AMQP::TcpConnection> connection; /// Connection for all consumers
 
diff --git a/src/Storages/RabbitMQ/UVLoop.h b/src/Storages/RabbitMQ/UVLoop.h
new file mode 100644
index 00000000000..4de67cbc206
--- /dev/null
+++ b/src/Storages/RabbitMQ/UVLoop.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <memory>
+
+#include <boost/noncopyable.hpp>
+#include <uv.h>
+
+#include <Common/Exception.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int SYSTEM_ERROR;
+}
+
+/// RAII wrapper around uv event loop
+class UVLoop : public boost::noncopyable
+{
+public:
+    UVLoop(): loop_ptr(new uv_loop_t())
+    {
+        int res = uv_loop_init(loop_ptr.get());
+
+        if (res != 0)
+            throw Exception("UVLoop could not initialize", ErrorCodes::SYSTEM_ERROR);
+    }
+
+    ~UVLoop()
+    {
+        if (loop_ptr)
+            uv_loop_close(loop_ptr.get());
+    }
+
+    inline uv_loop_t * getLoop() { return loop_ptr.get(); }
+
+    inline const uv_loop_t * getLoop() const { return loop_ptr.get(); }
+
+private:
+    std::unique_ptr<uv_loop_t> loop_ptr;
+};
+
+}
diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
index ac1b253b4bb..ebee27faf17 100644
--- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
+++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
@@ -57,9 +57,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer(
         , max_rows(rows_per_message)
         , chunk_size(chunk_size_)
 {
-    loop = std::make_unique<uv_loop_t>();
-    uv_loop_init(loop.get());
-    event_handler = std::make_unique<RabbitMQHandler>(loop.get(), log);
+    event_handler = std::make_unique<RabbitMQHandler>(loop.getLoop(), log);
 
     if (setupConnection(false))
     {
@@ -97,7 +95,7 @@ WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer()
     connection->close();
 
     size_t cnt_retries = 0;
-    while (!connection->closed() && ++cnt_retries != RETRIES_MAX)
+    while (!connection->closed() && cnt_retries++ != RETRIES_MAX)
     {
         event_handler->iterateLoop();
         std::this_thread::sleep_for(std::chrono::milliseconds(CONNECT_SLEEP));
@@ -189,11 +187,12 @@ void WriteBufferToRabbitMQProducer::setupChannel()
         /// Delivery tags are scoped per channel.
         delivery_record.clear();
         delivery_tag = 0;
+        producer_ready = false;
     });
 
     producer_channel->onReady([&]()
     {
-        channel_id = channel_id_base + std::to_string(channel_id_counter++);
+        channel_id = channel_id_base + "_" + std::to_string(channel_id_counter++);
         LOG_DEBUG(log, "Producer's channel {} is ready", channel_id);
 
         /* if persistent == true, onAck is received when message is persisted to disk or when it is consumed on every queue. If fails,
@@ -211,6 +210,7 @@ void WriteBufferToRabbitMQProducer::setupChannel()
         {
             removeRecord(nacked_delivery_tag, multiple, true);
         });
+        producer_ready = true;
     });
 }
 
@@ -218,30 +218,27 @@ void WriteBufferToRabbitMQProducer::setupChannel()
 void WriteBufferToRabbitMQProducer::removeRecord(UInt64 received_delivery_tag, bool multiple, bool republish)
 {
     auto record_iter = delivery_record.find(received_delivery_tag);
+    assert(record_iter != delivery_record.end());
 
-    if (record_iter != delivery_record.end())
+    if (multiple)
     {
-        if (multiple)
-        {
-            /// If multiple is true, then all delivery tags up to and including current are confirmed (with ack or nack).
-            ++record_iter;
+        /// If multiple is true, then all delivery tags up to and including current are confirmed (with ack or nack).
+        ++record_iter;
 
-            if (republish)
-                for (auto record = delivery_record.begin(); record != record_iter; ++record)
-                    returned.tryPush(record->second);
+        if (republish)
+            for (auto record = delivery_record.begin(); record != record_iter; ++record)
+                returned.tryPush(record->second);
 
-            /// Delete the records even in case when republished because new delivery tags will be assigned by the server.
-            delivery_record.erase(delivery_record.begin(), record_iter);
-        }
-        else
-        {
-            if (republish)
-                returned.tryPush(record_iter->second);
-
-            delivery_record.erase(record_iter);
-        }
+        /// Delete the records even in case when republished because new delivery tags will be assigned by the server.
+        delivery_record.erase(delivery_record.begin(), record_iter);
+    }
+    else
+    {
+        if (republish)
+            returned.tryPush(record_iter->second);
+
+        delivery_record.erase(record_iter);
     }
-    /// else is theoretically not possible
 }
 
 
@@ -308,13 +305,18 @@ void WriteBufferToRabbitMQProducer::writingFunc()
 {
     while ((!payloads.empty() || wait_all) && wait_confirm.load())
     {
-        /* Publish main paylods only when there are no returned messages. This way it is ensured that returned messages are republished
-         * as fast as possible and no new publishes are made before returned messages are handled
-         */
-        if (!returned.empty() && producer_channel->usable())
-            publish(returned, true);
-        else if (!payloads.empty() && producer_channel->usable())
-            publish(payloads, false);
+        /// If onReady callback is not received, producer->usable() will anyway return true,
+        /// but must publish only after onReady callback.
+        if (producer_ready)
+        {
+            /* Publish main paylods only when there are no returned messages. This way it is ensured that returned messages are republished
+             * as fast as possible and no new publishes are made before returned messages are handled
+             */
+            if (!returned.empty() && producer_channel->usable())
+                publish(returned, true);
+            else if (!payloads.empty() && producer_channel->usable())
+                publish(payloads, false);
+        }
 
         iterateEventLoop();
 
diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
index e88f92239ca..e88f5e10e74 100644
--- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
+++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
@@ -7,6 +7,7 @@
 #include <atomic>
 #include <amqpcpp.h>
 #include <Storages/RabbitMQ/RabbitMQHandler.h>
+#include <Storages/RabbitMQ/UVLoop.h>
 #include <Common/ConcurrentBoundedQueue.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <Core/Names.h>
@@ -69,10 +70,11 @@ private:
     AMQP::Table key_arguments;
     BackgroundSchedulePool::TaskHolder writing_task;
 
-    std::unique_ptr<uv_loop_t> loop;
+    UVLoop loop;
     std::unique_ptr<RabbitMQHandler> event_handler;
     std::unique_ptr<AMQP::TcpConnection> connection;
     std::unique_ptr<AMQP::TcpChannel> producer_channel;
+    bool producer_ready = false;
 
     /// Channel errors lead to channel closure, need to count number of recreated channels to update channel id
     UInt64 channel_id_counter = 0;
diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h
index 5a3ada6288b..fea9a7bad68 100644
--- a/src/Storages/SelectQueryInfo.h
+++ b/src/Storages/SelectQueryInfo.h
@@ -15,29 +15,55 @@ using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
 class ActionsDAG;
 using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
 
+struct PrewhereInfo;
+using PrewhereInfoPtr = std::shared_ptr<PrewhereInfo>;
+
+struct PrewhereDAGInfo;
+using PrewhereDAGInfoPtr = std::shared_ptr<PrewhereDAGInfo>;
+
+struct FilterInfo;
+using FilterInfoPtr = std::shared_ptr<FilterInfo>;
+
+struct FilterDAGInfo;
+using FilterDAGInfoPtr = std::shared_ptr<FilterDAGInfo>;
+
+struct InputOrderInfo;
+using InputOrderInfoPtr = std::shared_ptr<const InputOrderInfo>;
+
+struct TreeRewriterResult;
+using TreeRewriterResultPtr = std::shared_ptr<const TreeRewriterResult>;
+
+class ReadInOrderOptimizer;
+using ReadInOrderOptimizerPtr = std::shared_ptr<const ReadInOrderOptimizer>;
+
+class Cluster;
+using ClusterPtr = std::shared_ptr<Cluster>;
+
 struct PrewhereInfo
 {
     /// Actions which are executed in order to alias columns are used for prewhere actions.
     ExpressionActionsPtr alias_actions;
+    /// Actions for row level security filter. Applied separately before prewhere_actions.
+    /// This actions are separate because prewhere condition should not be executed over filtered rows.
+    ExpressionActionsPtr row_level_filter;
     /// Actions which are executed on block in order to get filter column for prewhere step.
     ExpressionActionsPtr prewhere_actions;
     /// Actions which are executed after reading from storage in order to remove unused columns.
     ExpressionActionsPtr remove_columns_actions;
+    String row_level_column_name;
     String prewhere_column_name;
     bool remove_prewhere_column = false;
     bool need_filter = false;
-
-    PrewhereInfo() = default;
-    explicit PrewhereInfo(ExpressionActionsPtr prewhere_actions_, String prewhere_column_name_)
-        : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {}
 };
 
-/// Same as PrewhereInfo, but with ActionsDAG
+/// Same as PrewhereInfo, but with ActionsDAG.
 struct PrewhereDAGInfo
 {
     ActionsDAGPtr alias_actions;
+    ActionsDAGPtr row_level_filter_actions;
     ActionsDAGPtr prewhere_actions;
     ActionsDAGPtr remove_columns_actions;
+    String row_level_column_name;
     String prewhere_column_name;
     bool remove_prewhere_column = false;
     bool need_filter = false;
@@ -52,7 +78,16 @@ struct PrewhereDAGInfo
 /// Helper struct to store all the information about the filter expression.
 struct FilterInfo
 {
-    ActionsDAGPtr actions_dag;
+    ExpressionActionsPtr alias_actions;
+    ExpressionActionsPtr actions;
+    String column_name;
+    bool do_remove_column = false;
+};
+
+/// Same as FilterInfo, but with ActionsDAG.
+struct FilterDAGInfo
+{
+    ActionsDAGPtr actions;
     String column_name;
     bool do_remove_column = false;
 
@@ -75,20 +110,6 @@ struct InputOrderInfo
     bool operator !=(const InputOrderInfo & other) const { return !(*this == other); }
 };
 
-using PrewhereInfoPtr = std::shared_ptr<PrewhereInfo>;
-using PrewhereDAGInfoPtr = std::shared_ptr<PrewhereDAGInfo>;
-using FilterInfoPtr = std::shared_ptr<FilterInfo>;
-using InputOrderInfoPtr = std::shared_ptr<const InputOrderInfo>;
-
-struct TreeRewriterResult;
-using TreeRewriterResultPtr = std::shared_ptr<const TreeRewriterResult>;
-
-class ReadInOrderOptimizer;
-using ReadInOrderOptimizerPtr = std::shared_ptr<const ReadInOrderOptimizer>;
-
-class Cluster;
-using ClusterPtr = std::shared_ptr<Cluster>;
-
 /** Query along with some additional data,
   *  that can be used during query processing
   *  inside storage engines.
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index e28d5f4d6d1..07850560c23 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -58,6 +58,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int INFINITE_LOOP;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;
 }
 
 
@@ -166,7 +167,7 @@ Pipe StorageBuffer::read(
 {
     QueryPlan plan;
     read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
-    return plan.convertToPipe();
+    return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef()));
 }
 
 void StorageBuffer::read(
@@ -321,20 +322,36 @@ void StorageBuffer::read(
     {
         if (query_info.prewhere_info)
         {
-            pipe_from_buffers.addSimpleTransform([&](const Block & header)
-            {
-                return std::make_shared<FilterTransform>(
-                        header, query_info.prewhere_info->prewhere_actions,
-                        query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column);
-            });
-
             if (query_info.prewhere_info->alias_actions)
             {
                 pipe_from_buffers.addSimpleTransform([&](const Block & header)
                 {
-                    return std::make_shared<ExpressionTransform>(header, query_info.prewhere_info->alias_actions);
+                    return std::make_shared<ExpressionTransform>(
+                        header,
+                        query_info.prewhere_info->alias_actions);
                 });
             }
+
+            if (query_info.prewhere_info->row_level_filter)
+            {
+                pipe_from_buffers.addSimpleTransform([&](const Block & header)
+                {
+                    return std::make_shared<FilterTransform>(
+                            header,
+                            query_info.prewhere_info->row_level_filter,
+                            query_info.prewhere_info->row_level_column_name,
+                            false);
+                });
+            }
+
+            pipe_from_buffers.addSimpleTransform([&](const Block & header)
+            {
+                return std::make_shared<FilterTransform>(
+                        header,
+                        query_info.prewhere_info->prewhere_actions,
+                        query_info.prewhere_info->prewhere_column_name,
+                        query_info.prewhere_info->remove_prewhere_column);
+            });
         }
 
         auto read_from_buffers = std::make_unique<ReadFromPreparedSource>(std::move(pipe_from_buffers));
@@ -910,8 +927,9 @@ void StorageBuffer::reschedule()
     flush_handle->scheduleAfter(std::min(min, max) * 1000);
 }
 
-void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const
+void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, const Context & context) const
 {
+    auto name_deps = getDependentViewsByColumn(context);
     for (const auto & command : commands)
     {
         if (command.type != AlterCommand::Type::ADD_COLUMN && command.type != AlterCommand::Type::MODIFY_COLUMN
@@ -919,6 +937,17 @@ void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, const S
             throw Exception(
                 "Alter of type '" + alterTypeToString(command.type) + "' is not supported by storage " + getName(),
                 ErrorCodes::NOT_IMPLEMENTED);
+        if (command.type == AlterCommand::Type::DROP_COLUMN)
+        {
+            const auto & deps_mv = name_deps[command.column_name];
+            if (!deps_mv.empty())
+            {
+                throw Exception(
+                    "Trying to ALTER DROP column " + backQuoteIfNeed(command.column_name) + " which is referenced by materialized view "
+                        + toString(deps_mv),
+                    ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
+            }
+        }
     }
 }
 
@@ -955,7 +984,7 @@ std::optional<UInt64> StorageBuffer::totalBytes(const Settings & /*settings*/) c
 void StorageBuffer::alter(const AlterCommands & params, const Context & context, TableLockHolder &)
 {
     auto table_id = getStorageID();
-    checkAlterIsPossible(params, context.getSettingsRef());
+    checkAlterIsPossible(params, context);
     auto metadata_snapshot = getInMemoryMetadataPtr();
 
     /// Flush all buffers to storages, so that no non-empty blocks of the old
diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h
index 46907ca196b..f6904ddb0e4 100644
--- a/src/Storages/StorageBuffer.h
+++ b/src/Storages/StorageBuffer.h
@@ -99,7 +99,7 @@ public:
 
     bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context, const StorageMetadataPtr & metadata_snapshot) const override;
 
-    void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override;
+    void checkAlterIsPossible(const AlterCommands & commands, const Context & context) const override;
 
     /// The structure of the subordinate table is not checked and does not change.
     void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override;
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index c08dc38fa2d..22d51929bd0 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -84,6 +84,7 @@ namespace ErrorCodes
     extern const int TOO_MANY_ROWS;
     extern const int UNABLE_TO_SKIP_UNUSED_SHARDS;
     extern const int INVALID_SHARD_ID;
+    extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;
 }
 
 namespace ActionLocks
@@ -501,7 +502,7 @@ Pipe StorageDistributed::read(
 {
     QueryPlan plan;
     read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
-    return plan.convertToPipe();
+    return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef()));
 }
 
 void StorageDistributed::read(
@@ -577,8 +578,9 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMeta
 }
 
 
-void StorageDistributed::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const
+void StorageDistributed::checkAlterIsPossible(const AlterCommands & commands, const Context & context) const
 {
+    auto name_deps = getDependentViewsByColumn(context);
     for (const auto & command : commands)
     {
         if (command.type != AlterCommand::Type::ADD_COLUMN
@@ -589,6 +591,17 @@ void StorageDistributed::checkAlterIsPossible(const AlterCommands & commands, co
 
             throw Exception("Alter of type '" + alterTypeToString(command.type) + "' is not supported by storage " + getName(),
                 ErrorCodes::NOT_IMPLEMENTED);
+        if (command.type == AlterCommand::DROP_COLUMN)
+        {
+            const auto & deps_mv = name_deps[command.column_name];
+            if (!deps_mv.empty())
+            {
+                throw Exception(
+                    "Trying to ALTER DROP column " + backQuoteIfNeed(command.column_name) + " which is referenced by materialized view "
+                        + toString(deps_mv),
+                    ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
+            }
+        }
     }
 }
 
@@ -596,7 +609,7 @@ void StorageDistributed::alter(const AlterCommands & params, const Context & con
 {
     auto table_id = getStorageID();
 
-    checkAlterIsPossible(params, context.getSettingsRef());
+    checkAlterIsPossible(params, context);
     StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
     params.apply(new_metadata, context);
     DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata);
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index 4d3869f7c5c..3980ea8244d 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -85,7 +85,7 @@ public:
     void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override;
     void renameOnDisk(const String & new_path_to_table_data);
 
-    void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override;
+    void checkAlterIsPossible(const AlterCommands & commands, const Context & context) const override;
 
     /// in the sub-tables, you need to manually add and delete columns
     /// the structure of the sub-table is not checked
diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp
index 8d4f0b3b3be..a449cebba51 100644
--- a/src/Storages/StorageJoin.cpp
+++ b/src/Storages/StorageJoin.cpp
@@ -79,7 +79,7 @@ void StorageJoin::truncate(
 }
 
 
-HashJoinPtr StorageJoin::getJoin(std::shared_ptr<TableJoin> analyzed_join) const
+HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr<TableJoin> analyzed_join) const
 {
     auto metadata_snapshot = getInMemoryMetadataPtr();
     if (!analyzed_join->sameStrictnessAndKind(strictness, kind))
@@ -96,17 +96,47 @@ HashJoinPtr StorageJoin::getJoin(std::shared_ptr<TableJoin> analyzed_join) const
     analyzed_join->setRightKeys(key_names);
 
     HashJoinPtr join_clone = std::make_shared<HashJoin>(analyzed_join, metadata_snapshot->getSampleBlock().sortColumns());
+    join_clone->setLock(rwlock);
     join_clone->reuseJoinedData(*join);
+
     return join_clone;
 }
 
 
-void StorageJoin::insertBlock(const Block & block) { join->addJoinedBlock(block, true); }
+void StorageJoin::insertBlock(const Block & block)
+{
+    std::unique_lock<std::shared_mutex> lock(rwlock);
+    join->addJoinedBlock(block, true);
+}
 
-size_t StorageJoin::getSize() const { return join->getTotalRowCount(); }
-std::optional<UInt64> StorageJoin::totalRows(const Settings &) const { return join->getTotalRowCount(); }
-std::optional<UInt64> StorageJoin::totalBytes(const Settings &) const { return join->getTotalByteCount(); }
+size_t StorageJoin::getSize() const
+{
+    std::shared_lock<std::shared_mutex> lock(rwlock);
+    return join->getTotalRowCount();
+}
 
+std::optional<UInt64> StorageJoin::totalRows(const Settings &) const
+{
+    std::shared_lock<std::shared_mutex> lock(rwlock);
+    return join->getTotalRowCount();
+}
+
+std::optional<UInt64> StorageJoin::totalBytes(const Settings &) const
+{
+    std::shared_lock<std::shared_mutex> lock(rwlock);
+    return join->getTotalByteCount();
+}
+
+DataTypePtr StorageJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const
+{
+    return join->joinGetCheckAndGetReturnType(data_types, column_name, or_null);
+}
+
+ColumnWithTypeAndName StorageJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const
+{
+    std::shared_lock<std::shared_mutex> lock(rwlock);
+    return join->joinGet(block, block_with_columns_to_add);
+}
 
 void registerStorageJoin(StorageFactory & factory)
 {
@@ -264,24 +294,24 @@ size_t rawSize(const StringRef & t)
 class JoinSource : public SourceWithProgress
 {
 public:
-    JoinSource(const HashJoin & parent_, UInt64 max_block_size_, Block sample_block_)
+    JoinSource(HashJoinPtr join_, std::shared_mutex & rwlock, UInt64 max_block_size_, Block sample_block_)
         : SourceWithProgress(sample_block_)
-        , parent(parent_)
-        , lock(parent.data->rwlock)
+        , join(join_)
+        , lock(rwlock)
         , max_block_size(max_block_size_)
         , sample_block(std::move(sample_block_))
     {
         column_indices.resize(sample_block.columns());
 
-        auto & saved_block = parent.getJoinedData()->sample_block;
+        auto & saved_block = join->getJoinedData()->sample_block;
 
         for (size_t i = 0; i < sample_block.columns(); ++i)
         {
             auto & [_, type, name] = sample_block.getByPosition(i);
-            if (parent.right_table_keys.has(name))
+            if (join->right_table_keys.has(name))
             {
                 key_pos = i;
-                const auto & column = parent.right_table_keys.getByName(name);
+                const auto & column = join->right_table_keys.getByName(name);
                 restored_block.insert(column);
             }
             else
@@ -300,19 +330,20 @@ public:
 protected:
     Chunk generate() override
     {
-        if (parent.data->blocks.empty())
+        if (join->data->blocks.empty())
             return {};
 
         Chunk chunk;
-        if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps,
+        if (!joinDispatch(join->kind, join->strictness, join->data->maps,
                 [&](auto kind, auto strictness, auto & map) { chunk = createChunk<kind, strictness>(map); }))
             throw Exception("Logical error: unknown JOIN strictness", ErrorCodes::LOGICAL_ERROR);
         return chunk;
     }
 
 private:
-    const HashJoin & parent;
+    HashJoinPtr join;
     std::shared_lock<std::shared_mutex> lock;
+
     UInt64 max_block_size;
     Block sample_block;
     Block restored_block; /// sample_block with parent column types
@@ -330,7 +361,7 @@ private:
 
         size_t rows_added = 0;
 
-        switch (parent.data->type)
+        switch (join->data->type)
         {
 #define M(TYPE)                                           \
     case HashJoin::Type::TYPE:                                \
@@ -340,7 +371,7 @@ private:
 #undef M
 
             default:
-                throw Exception("Unsupported JOIN keys in StorageJoin. Type: " + toString(static_cast<UInt32>(parent.data->type)),
+                throw Exception("Unsupported JOIN keys in StorageJoin. Type: " + toString(static_cast<UInt32>(join->data->type)),
                                 ErrorCodes::UNSUPPORTED_JOIN_KEYS);
         }
 
@@ -468,7 +499,8 @@ Pipe StorageJoin::read(
 {
     metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
 
-    return Pipe(std::make_shared<JoinSource>(*join, max_block_size, metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID())));
+    Block source_sample_block = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID());
+    return Pipe(std::make_shared<JoinSource>(join, rwlock, max_block_size, source_sample_block));
 }
 
 }
diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h
index c453c036b65..5f0f9f92404 100644
--- a/src/Storages/StorageJoin.h
+++ b/src/Storages/StorageJoin.h
@@ -14,7 +14,6 @@ class TableJoin;
 class HashJoin;
 using HashJoinPtr = std::shared_ptr<HashJoin>;
 
-
 /** Allows you save the state for later use on the right side of the JOIN.
   * When inserted into a table, the data will be inserted into the state,
   *  and also written to the backup file, to restore after the restart.
@@ -30,12 +29,17 @@ public:
 
     void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override;
 
-    /// Access the innards.
-    HashJoinPtr & getJoin() { return join; }
-    HashJoinPtr getJoin(std::shared_ptr<TableJoin> analyzed_join) const;
+    /// Return instance of HashJoin holding lock that protects from insertions to StorageJoin.
+    /// HashJoin relies on structure of hash table that's why we need to return it with locked mutex.
+    HashJoinPtr getJoinLocked(std::shared_ptr<TableJoin> analyzed_join) const;
 
-    /// Verify that the data structure is suitable for implementing this type of JOIN.
-    void assertCompatible(ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_) const;
+    /// Get result type for function "joinGet(OrNull)"
+    DataTypePtr joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const;
+
+    /// Execute function "joinGet(OrNull)" on data block.
+    /// Takes rwlock for read to prevent parallel StorageJoin updates during processing data block
+    /// (but not during processing whole query, it's safe for joinGet that doesn't involve `used_flags` from HashJoin)
+    ColumnWithTypeAndName joinGet(const Block & block, const Block & block_with_columns_to_add) const;
 
     Pipe read(
         const Names & column_names,
@@ -61,6 +65,10 @@ private:
     std::shared_ptr<TableJoin> table_join;
     HashJoinPtr join;
 
+    /// Protect state for concurrent use in insertFromBlock and joinBlock.
+    /// Lock is stored in HashJoin instance during query and blocks concurrent insertions.
+    mutable std::shared_mutex rwlock;
+
     void insertBlock(const Block & block) override;
     void finishInsert() override {}
     size_t getSize() const override;
diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index 325bf3d2f74..dec4620bc92 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -127,7 +127,7 @@ Pipe StorageMaterializedView::read(
 {
     QueryPlan plan;
     read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
-    return plan.convertToPipe();
+    return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef()));
 }
 
 void StorageMaterializedView::read(
@@ -296,8 +296,9 @@ void StorageMaterializedView::alter(
 }
 
 
-void StorageMaterializedView::checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const
+void StorageMaterializedView::checkAlterIsPossible(const AlterCommands & commands, const Context & context) const
 {
+    const auto & settings = context.getSettingsRef();
     if (settings.allow_experimental_alter_materialized_view_structure)
     {
         for (const auto & command : commands)
@@ -320,6 +321,12 @@ void StorageMaterializedView::checkAlterIsPossible(const AlterCommands & command
     }
 }
 
+void StorageMaterializedView::checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const
+{
+    checkStatementCanBeForwarded();
+    getTargetTable()->checkMutationIsPossible(commands, settings);
+}
+
 Pipe StorageMaterializedView::alterPartition(
     const StorageMetadataPtr & metadata_snapshot, const PartitionCommands & commands, const Context & context)
 {
diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h
index 94e4295cd34..ccb56ec9fe5 100644
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@@ -52,7 +52,9 @@ public:
 
     void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override;
 
-    void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const override;
+    void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override;
+
+    void checkAlterIsPossible(const AlterCommands & commands, const Context & context) const override;
 
     Pipe alterPartition(const StorageMetadataPtr & metadata_snapshot, const PartitionCommands & commands, const Context & context) override;
 
diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index d7b0ae055ab..d98cd4212e9 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -253,6 +253,11 @@ static inline void updateBlockData(Block & old_block, const Block & new_block)
     }
 }
 
+void StorageMemory::checkMutationIsPossible(const MutationCommands & /*commands*/, const Settings & /*settings*/) const
+{
+    /// Some validation will be added
+}
+
 void StorageMemory::mutate(const MutationCommands & commands, const Context & context)
 {
     std::lock_guard lock(mutex);
diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h
index db71c13ca99..b7fa4d7b222 100644
--- a/src/Storages/StorageMemory.h
+++ b/src/Storages/StorageMemory.h
@@ -51,6 +51,7 @@ public:
 
     void drop() override;
 
+    void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override;
     void mutate(const MutationCommands & commands, const Context & context) override;
 
     void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override;
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 91ebfaa3a27..46be91ba258 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -37,6 +37,7 @@ namespace ErrorCodes
     extern const int ILLEGAL_PREWHERE;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int SAMPLING_NOT_SUPPORTED;
+    extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;
 }
 
 namespace
@@ -472,8 +473,9 @@ DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator(const Context & cont
 }
 
 
-void StorageMerge::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const
+void StorageMerge::checkAlterIsPossible(const AlterCommands & commands, const Context & context) const
 {
+    auto name_deps = getDependentViewsByColumn(context);
     for (const auto & command : commands)
     {
         if (command.type != AlterCommand::Type::ADD_COLUMN && command.type != AlterCommand::Type::MODIFY_COLUMN
@@ -481,6 +483,17 @@ void StorageMerge::checkAlterIsPossible(const AlterCommands & commands, const Se
             throw Exception(
                 "Alter of type '" + alterTypeToString(command.type) + "' is not supported by storage " + getName(),
                 ErrorCodes::NOT_IMPLEMENTED);
+        if (command.type == AlterCommand::Type::DROP_COLUMN)
+        {
+            const auto & deps_mv = name_deps[command.column_name];
+            if (!deps_mv.empty())
+            {
+                throw Exception(
+                    "Trying to ALTER DROP column " + backQuoteIfNeed(command.column_name) + " which is referenced by materialized view "
+                        + toString(deps_mv),
+                    ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
+            }
+        }
     }
 }
 
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 3ac251fbe52..eaffd34a379 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -38,7 +38,7 @@ public:
         size_t max_block_size,
         unsigned num_streams) override;
 
-    void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override;
+    void checkAlterIsPossible(const AlterCommands & commands, const Context & context) const override;
 
     /// you need to add and remove columns in the sub-tables manually
     /// the structure of sub-tables is not checked
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 202e909af0f..c8f44c78e6e 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -198,7 +198,7 @@ Pipe StorageMergeTree::read(
 {
     QueryPlan plan;
     read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
-    return plan.convertToPipe();
+    return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef()));
 }
 
 std::optional<UInt64> StorageMergeTree::totalRows(const Settings &) const
diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp
index f324d502834..ed9a7fffc63 100644
--- a/src/Storages/StorageNull.cpp
+++ b/src/Storages/StorageNull.cpp
@@ -16,6 +16,7 @@ namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;
 }
 
 
@@ -35,8 +36,9 @@ void registerStorageNull(StorageFactory & factory)
     });
 }
 
-void StorageNull::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const
+void StorageNull::checkAlterIsPossible(const AlterCommands & commands, const Context & context) const
 {
+    auto name_deps = getDependentViewsByColumn(context);
     for (const auto & command : commands)
     {
         if (command.type != AlterCommand::Type::ADD_COLUMN && command.type != AlterCommand::Type::MODIFY_COLUMN
@@ -44,6 +46,17 @@ void StorageNull::checkAlterIsPossible(const AlterCommands & commands, const Set
             throw Exception(
                 "Alter of type '" + alterTypeToString(command.type) + "' is not supported by storage " + getName(),
                 ErrorCodes::NOT_IMPLEMENTED);
+        if (command.type == AlterCommand::DROP_COLUMN)
+        {
+            const auto & deps_mv = name_deps[command.column_name];
+            if (!deps_mv.empty())
+            {
+                throw Exception(
+                    "Trying to ALTER DROP column " + backQuoteIfNeed(command.column_name) + " which is referenced by materialized view "
+                        + toString(deps_mv),
+                    ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
+            }
+        }
     }
 }
 
diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h
index 7d3d15f1b0f..943c056a588 100644
--- a/src/Storages/StorageNull.h
+++ b/src/Storages/StorageNull.h
@@ -41,7 +41,7 @@ public:
         return std::make_shared<NullBlockOutputStream>(metadata_snapshot->getSampleBlock());
     }
 
-    void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override;
+    void checkAlterIsPossible(const AlterCommands & commands, const Context & context) const override;
 
     void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override;
 
diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h
index fed9dd04e76..0349319d8fa 100644
--- a/src/Storages/StorageProxy.h
+++ b/src/Storages/StorageProxy.h
@@ -97,9 +97,9 @@ public:
         IStorage::setInMemoryMetadata(getNested()->getInMemoryMetadata());
     }
 
-    void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const override
+    void checkAlterIsPossible(const AlterCommands & commands, const Context & context) const override
     {
-        getNested()->checkAlterIsPossible(commands, settings);
+        getNested()->checkAlterIsPossible(commands, context);
     }
 
     Pipe alterPartition(
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index f2c88cdedd9..68f3b6d80d1 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -144,6 +144,12 @@ static const auto MUTATIONS_FINALIZING_IDLE_SLEEP_MS = 5 * 1000;
 
 void StorageReplicatedMergeTree::setZooKeeper()
 {
+    /// Every ReplicatedMergeTree table is using only one ZooKeeper session.
+    /// But if several ReplicatedMergeTree tables are using different
+    /// ZooKeeper sessions, some queries like ATTACH PARTITION FROM may have
+    /// strange effects. So we always use only one session for all tables.
+    /// (excluding auxiliary zookeepers)
+
     std::lock_guard lock(current_zookeeper_mutex);
     if (zookeeper_name == default_zookeeper_name)
     {
@@ -749,8 +755,12 @@ void StorageReplicatedMergeTree::drop()
     if (has_metadata_in_zookeeper)
     {
         /// Table can be shut down, restarting thread is not active
-        /// and calling StorageReplicatedMergeTree::getZooKeeper() won't suffice.
-        auto zookeeper = global_context.getZooKeeper();
+        /// and calling StorageReplicatedMergeTree::getZooKeeper()/getAuxiliaryZooKeeper() won't suffice.
+        zkutil::ZooKeeperPtr zookeeper;
+        if (zookeeper_name == default_zookeeper_name)
+            zookeeper = global_context.getZooKeeper();
+        else
+            zookeeper = global_context.getAuxiliaryZooKeeper(zookeeper_name);
 
         /// If probably there is metadata in ZooKeeper, we don't allow to drop the table.
         if (!zookeeper)
@@ -887,21 +897,8 @@ void StorageReplicatedMergeTree::setTableStructure(
     StorageInMemoryMetadata old_metadata = getInMemoryMetadata();
 
     if (new_columns != new_metadata.columns)
-    {
         new_metadata.columns = new_columns;
 
-        new_metadata.column_ttls_by_name.clear();
-        for (const auto & [name, ast] : new_metadata.columns.getColumnTTLs())
-        {
-            auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, new_metadata.columns, global_context, new_metadata.primary_key);
-            new_metadata.column_ttls_by_name[name] = new_ttl_entry;
-        }
-
-        /// The type of partition key expression may change
-        if (new_metadata.partition_key.definition_ast != nullptr)
-            new_metadata.partition_key.recalculateWithNewColumns(new_metadata.columns, global_context);
-    }
-
     if (!metadata_diff.empty())
     {
         auto parse_key_expr = [] (const String & key_expr)
@@ -967,6 +964,47 @@ void StorageReplicatedMergeTree::setTableStructure(
         }
     }
 
+    /// Changes in columns may affect following metadata fields
+    if (new_metadata.columns != old_metadata.columns)
+    {
+        new_metadata.column_ttls_by_name.clear();
+        for (const auto & [name, ast] : new_metadata.columns.getColumnTTLs())
+        {
+            auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, new_metadata.columns, global_context, new_metadata.primary_key);
+            new_metadata.column_ttls_by_name[name] = new_ttl_entry;
+        }
+
+        if (new_metadata.partition_key.definition_ast != nullptr)
+            new_metadata.partition_key.recalculateWithNewColumns(new_metadata.columns, global_context);
+
+        if (!metadata_diff.sorting_key_changed) /// otherwise already updated
+            new_metadata.sorting_key.recalculateWithNewColumns(new_metadata.columns, global_context);
+
+        /// Primary key is special, it exists even if not defined
+        if (new_metadata.primary_key.definition_ast != nullptr)
+        {
+            new_metadata.primary_key.recalculateWithNewColumns(new_metadata.columns, global_context);
+        }
+        else
+        {
+            new_metadata.primary_key = KeyDescription::getKeyFromAST(new_metadata.sorting_key.definition_ast, new_metadata.columns, global_context);
+            new_metadata.primary_key.definition_ast = nullptr;
+        }
+
+        if (!metadata_diff.sampling_expression_changed && new_metadata.sampling_key.definition_ast != nullptr)
+            new_metadata.sampling_key.recalculateWithNewColumns(new_metadata.columns, global_context);
+
+        if (!metadata_diff.skip_indices_changed) /// otherwise already updated
+        {
+            for (auto & index : new_metadata.secondary_indices)
+                index.recalculateWithNewColumns(new_metadata.columns, global_context);
+        }
+
+        if (!metadata_diff.ttl_table_changed && new_metadata.table_ttl.definition_ast != nullptr)
+            new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(
+                new_metadata.table_ttl.definition_ast, new_metadata.columns, global_context, new_metadata.primary_key);
+    }
+
     /// Even if the primary/sorting/partition keys didn't change we must reinitialize it
     /// because primary/partition key column types might have changed.
     checkTTLExpressions(new_metadata, old_metadata);
@@ -3811,7 +3849,7 @@ Pipe StorageReplicatedMergeTree::read(
 {
     QueryPlan plan;
     read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
-    return plan.convertToPipe();
+    return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef()));
 }
 
 
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 38349ef8df9..4d2bb6bdf15 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -15,7 +15,6 @@
 
 #include <Processors/Pipe.h>
 #include <Processors/Transforms/MaterializingTransform.h>
-#include <Processors/QueryPlan/MaterializingStep.h>
 #include <Processors/QueryPlan/ExpressionStep.h>
 #include <Processors/QueryPlan/SettingQuotaAndLimitsStep.h>
 
@@ -60,7 +59,7 @@ Pipe StorageView::read(
 {
     QueryPlan plan;
     read(plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
-    return plan.convertToPipe();
+    return plan.convertToPipe(QueryPlanOptimizationSettings(context.getSettingsRef()));
 }
 
 void StorageView::read(
@@ -87,7 +86,10 @@ void StorageView::read(
 
     /// It's expected that the columns read from storage are not constant.
     /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery.
-    auto materializing = std::make_unique<MaterializingStep>(query_plan.getCurrentDataStream());
+    auto materializing_actions = std::make_shared<ActionsDAG>(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
+    materializing_actions->addMaterializingOutputActions();
+
+    auto materializing = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(materializing_actions));
     materializing->setStepDescription("Materialize constants after VIEW subquery");
     query_plan.addStep(std::move(materializing));
 
@@ -105,6 +107,9 @@ void StorageView::read(
 
 static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_query)
 {
+    if (!select_query.tables() || select_query.tables()->children.empty())
+        throw Exception("Logical error: no table expression in view select AST", ErrorCodes::LOGICAL_ERROR);
+
     auto * select_element = select_query.tables()->children[0]->as<ASTTablesInSelectQueryElement>();
 
     if (!select_element->table_expression)
diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index 45e1663cb93..eece092206d 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -81,7 +81,8 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_)
 {
 }
 
-void StorageSystemParts::processNextStorage(MutableColumns & columns_, const StoragesInfo & info, bool has_state_column)
+void StorageSystemParts::processNextStorage(
+    MutableColumns & columns, std::vector<UInt8> & columns_mask, const StoragesInfo & info, bool has_state_column)
 {
     using State = IMergeTreeDataPart::State;
     MergeTreeData::DataPartStateVector all_parts_state;
@@ -96,97 +97,154 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto
 
         ColumnSize columns_size = part->getTotalColumnsSize();
 
-        size_t i = 0;
+        size_t src_index = 0, res_index = 0;
+        if (columns_mask[src_index++])
         {
             WriteBufferFromOwnString out;
             part->partition.serializeText(*info.data, out, format_settings);
-            columns_[i++]->insert(out.str());
+            columns[res_index++]->insert(out.str());
         }
-        columns_[i++]->insert(part->name);
-        columns_[i++]->insert(part->uuid);
-        columns_[i++]->insert(part->getTypeName());
-        columns_[i++]->insert(part_state == State::Committed);
-        columns_[i++]->insert(part->getMarksCount());
-        columns_[i++]->insert(part->rows_count);
-        columns_[i++]->insert(part->getBytesOnDisk());
-        columns_[i++]->insert(columns_size.data_compressed);
-        columns_[i++]->insert(columns_size.data_uncompressed);
-        columns_[i++]->insert(columns_size.marks);
-        columns_[i++]->insert(static_cast<UInt64>(part->modification_time));
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->name);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->uuid);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->getTypeName());
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part_state == State::Committed);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->getMarksCount());
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->rows_count);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->getBytesOnDisk());
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(columns_size.data_compressed);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(columns_size.data_uncompressed);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(columns_size.marks);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(static_cast<UInt64>(part->modification_time));
 
-        time_t remove_time = part->remove_time.load(std::memory_order_relaxed);
-        columns_[i++]->insert(static_cast<UInt64>(remove_time == std::numeric_limits<time_t>::max() ? 0 : remove_time));
+        if (columns_mask[src_index++])
+        {
+            time_t remove_time = part->remove_time.load(std::memory_order_relaxed);
+            columns[res_index++]->insert(static_cast<UInt64>(remove_time == std::numeric_limits<time_t>::max() ? 0 : remove_time));
+        }
 
         /// For convenience, in returned refcount, don't add references that was due to local variables in this method: all_parts, active_parts.
-        columns_[i++]->insert(static_cast<UInt64>(part.use_count() - 1));
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(static_cast<UInt64>(part.use_count() - 1));
 
-        columns_[i++]->insert(part->getMinDate());
-        columns_[i++]->insert(part->getMaxDate());
-        columns_[i++]->insert(static_cast<UInt32>(part->getMinTime()));
-        columns_[i++]->insert(static_cast<UInt32>(part->getMaxTime()));
-        columns_[i++]->insert(part->info.partition_id);
-        columns_[i++]->insert(part->info.min_block);
-        columns_[i++]->insert(part->info.max_block);
-        columns_[i++]->insert(part->info.level);
-        columns_[i++]->insert(static_cast<UInt64>(part->info.getDataVersion()));
-        columns_[i++]->insert(part->getIndexSizeInBytes());
-        columns_[i++]->insert(part->getIndexSizeInAllocatedBytes());
-        columns_[i++]->insert(part->is_frozen.load(std::memory_order_relaxed));
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->getMinDate());
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->getMaxDate());
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(static_cast<UInt32>(part->getMinTime()));
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(static_cast<UInt32>(part->getMaxTime()));
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->info.partition_id);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->info.min_block);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->info.max_block);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->info.level);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(static_cast<UInt64>(part->info.getDataVersion()));
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->getIndexSizeInBytes());
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->getIndexSizeInAllocatedBytes());
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(part->is_frozen.load(std::memory_order_relaxed));
+
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(info.database);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(info.table);
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(info.engine);
 
-        columns_[i++]->insert(info.database);
-        columns_[i++]->insert(info.table);
-        columns_[i++]->insert(info.engine);
         if (part->isStoredOnDisk())
         {
-            columns_[i++]->insert(part->volume->getDisk()->getName());
-            columns_[i++]->insert(part->getFullPath());
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->volume->getDisk()->getName());
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->getFullPath());
         }
         else
         {
-            columns_[i++]->insertDefault();
-            columns_[i++]->insertDefault();
+            if (columns_mask[src_index++])
+                columns[res_index++]->insertDefault();
+            if (columns_mask[src_index++])
+                columns[res_index++]->insertDefault();
         }
 
-        MinimalisticDataPartChecksums helper;
-        helper.computeTotalChecksums(part->checksums);
 
-        auto checksum = helper.hash_of_all_files;
-        columns_[i++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
+        {
+            MinimalisticDataPartChecksums helper;
+            if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2])
+                helper.computeTotalChecksums(part->checksums);
 
-        checksum = helper.hash_of_uncompressed_files;
-        columns_[i++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
-
-        checksum = helper.uncompressed_hash_of_compressed_files;
-        columns_[i++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
+            if (columns_mask[src_index++])
+            {
+                auto checksum = helper.hash_of_all_files;
+                columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
+            }
+            if (columns_mask[src_index++])
+            {
+                auto checksum = helper.hash_of_uncompressed_files;
+                columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
+            }
+            if (columns_mask[src_index++])
+            {
+                auto checksum = helper.uncompressed_hash_of_compressed_files;
+                columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
+            }
+        }
 
         /// delete_ttl_info
-        {
-            columns_[i++]->insert(static_cast<UInt32>(part->ttl_infos.table_ttl.min));
-            columns_[i++]->insert(static_cast<UInt32>(part->ttl_infos.table_ttl.max));
-        }
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(static_cast<UInt32>(part->ttl_infos.table_ttl.min));
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(static_cast<UInt32>(part->ttl_infos.table_ttl.max));
 
         auto add_ttl_info_map = [&](const TTLInfoMap & ttl_info_map)
         {
             Array expression_array;
             Array min_array;
             Array max_array;
-            expression_array.reserve(ttl_info_map.size());
-            min_array.reserve(ttl_info_map.size());
-            max_array.reserve(ttl_info_map.size());
+            if (columns_mask[src_index])
+                expression_array.reserve(ttl_info_map.size());
+            if (columns_mask[src_index + 1])
+                min_array.reserve(ttl_info_map.size());
+            if (columns_mask[src_index + 2])
+                max_array.reserve(ttl_info_map.size());
             for (const auto & [expression, ttl_info] : ttl_info_map)
             {
-                expression_array.emplace_back(expression);
-                min_array.push_back(static_cast<UInt32>(ttl_info.min));
-                max_array.push_back(static_cast<UInt32>(ttl_info.max));
+                if (columns_mask[src_index])
+                    expression_array.emplace_back(expression);
+                if (columns_mask[src_index + 1])
+                    min_array.push_back(static_cast<UInt32>(ttl_info.min));
+                if (columns_mask[src_index + 2])
+                    max_array.push_back(static_cast<UInt32>(ttl_info.max));
             }
-            columns_[i++]->insert(expression_array);
-            columns_[i++]->insert(min_array);
-            columns_[i++]->insert(max_array);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(expression_array);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(min_array);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(max_array);
         };
 
         add_ttl_info_map(part->ttl_infos.moves_ttl);
 
-        columns_[i++]->insert(queryToString(part->default_codec->getCodecDesc()));
+        if (columns_mask[src_index++])
+            columns[res_index++]->insert(queryToString(part->default_codec->getCodecDesc()));
 
         add_ttl_info_map(part->ttl_infos.recompression_ttl);
         add_ttl_info_map(part->ttl_infos.group_by_ttl);
@@ -195,7 +253,7 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto
         /// _state column should be the latest.
         /// Do not use part->getState*, it can be changed from different thread
         if (has_state_column)
-            columns_[i++]->insert(IMergeTreeDataPart::stateToString(part_state));
+            columns[res_index++]->insert(IMergeTreeDataPart::stateToString(part_state));
     }
 }
 
diff --git a/src/Storages/System/StorageSystemParts.h b/src/Storages/System/StorageSystemParts.h
index 2de4cd3f3a4..d67e62049cd 100644
--- a/src/Storages/System/StorageSystemParts.h
+++ b/src/Storages/System/StorageSystemParts.h
@@ -20,7 +20,8 @@ public:
 
 protected:
     explicit StorageSystemParts(const StorageID & table_id_);
-    void processNextStorage(MutableColumns & columns, const StoragesInfo & info, bool has_state_column) override;
+    void processNextStorage(
+        MutableColumns & columns, std::vector<UInt8> & columns_mask, const StoragesInfo & info, bool has_state_column) override;
 };
 
 }
diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp
index 9b5bf5a4b48..39cc651e147 100644
--- a/src/Storages/System/StorageSystemPartsBase.cpp
+++ b/src/Storages/System/StorageSystemPartsBase.cpp
@@ -245,16 +245,29 @@ Pipe StorageSystemPartsBase::read(
 
     /// Create the result.
 
-    MutableColumns res_columns = metadata_snapshot->getSampleBlock().cloneEmptyColumns();
+    NameSet names_set(column_names.begin(), column_names.end());
+
+    Block sample = metadata_snapshot->getSampleBlock();
+    Block header;
+
+    std::vector<UInt8> columns_mask(sample.columns());
+    for (size_t i = 0; i < sample.columns(); ++i)
+    {
+        if (names_set.count(sample.getByPosition(i).name))
+        {
+            columns_mask[i] = 1;
+            header.insert(sample.getByPosition(i));
+        }
+    }
+    MutableColumns res_columns = header.cloneEmptyColumns();
     if (has_state_column)
         res_columns.push_back(ColumnString::create());
 
     while (StoragesInfo info = stream.next())
     {
-        processNextStorage(res_columns, info, has_state_column);
+        processNextStorage(res_columns, columns_mask, info, has_state_column);
     }
 
-    Block header = metadata_snapshot->getSampleBlock();
     if (has_state_column)
         header.insert(ColumnWithTypeAndName(std::make_shared<DataTypeString>(), "_state"));
 
diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h
index eec6d5ab331..3f63d75e2b6 100644
--- a/src/Storages/System/StorageSystemPartsBase.h
+++ b/src/Storages/System/StorageSystemPartsBase.h
@@ -74,7 +74,8 @@ protected:
 
     StorageSystemPartsBase(const StorageID & table_id_, NamesAndTypesList && columns_);
 
-    virtual void processNextStorage(MutableColumns & columns, const StoragesInfo & info, bool has_state_column) = 0;
+    virtual void
+    processNextStorage(MutableColumns & columns, std::vector<UInt8> & columns_mask, const StoragesInfo & info, bool has_state_column) = 0;
 };
 
 }
diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp
index c570cc85e8b..8754e424281 100644
--- a/src/Storages/System/StorageSystemPartsColumns.cpp
+++ b/src/Storages/System/StorageSystemPartsColumns.cpp
@@ -60,7 +60,8 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_
 {
 }
 
-void StorageSystemPartsColumns::processNextStorage(MutableColumns & columns_, const StoragesInfo & info, bool has_state_column)
+void StorageSystemPartsColumns::processNextStorage(
+    MutableColumns & columns, std::vector<UInt8> & columns_mask, const StoragesInfo & info, bool has_state_column)
 {
     /// Prepare information about columns in storage.
     struct ColumnInfo
@@ -105,67 +106,105 @@ void StorageSystemPartsColumns::processNextStorage(MutableColumns & columns_, co
         for (const auto & column : part->getColumns())
         {
             ++column_position;
-            size_t j = 0;
+            size_t src_index = 0, res_index = 0;
+            if (columns_mask[src_index++])
             {
                 WriteBufferFromOwnString out;
                 part->partition.serializeText(*info.data, out, format_settings);
-                columns_[j++]->insert(out.str());
+                columns[res_index++]->insert(out.str());
             }
-            columns_[j++]->insert(part->name);
-            columns_[j++]->insert(part->getTypeName());
-            columns_[j++]->insert(part_state == State::Committed);
-            columns_[j++]->insert(part->getMarksCount());
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->name);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->getTypeName());
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part_state == State::Committed);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->getMarksCount());
 
-            columns_[j++]->insert(part->rows_count);
-            columns_[j++]->insert(part->getBytesOnDisk());
-            columns_[j++]->insert(columns_size.data_compressed);
-            columns_[j++]->insert(columns_size.data_uncompressed);
-            columns_[j++]->insert(columns_size.marks);
-            columns_[j++]->insert(UInt64(part->modification_time));
-            columns_[j++]->insert(UInt64(part->remove_time.load(std::memory_order_relaxed)));
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->rows_count);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->getBytesOnDisk());
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(columns_size.data_compressed);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(columns_size.data_uncompressed);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(columns_size.marks);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(UInt64(part->modification_time));
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(UInt64(part->remove_time.load(std::memory_order_relaxed)));
 
-            columns_[j++]->insert(UInt64(use_count));
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(UInt64(use_count));
 
-            columns_[j++]->insert(min_date);
-            columns_[j++]->insert(max_date);
-            columns_[j++]->insert(part->info.partition_id);
-            columns_[j++]->insert(part->info.min_block);
-            columns_[j++]->insert(part->info.max_block);
-            columns_[j++]->insert(part->info.level);
-            columns_[j++]->insert(UInt64(part->info.getDataVersion()));
-            columns_[j++]->insert(index_size_in_bytes);
-            columns_[j++]->insert(index_size_in_allocated_bytes);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(min_date);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(max_date);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->info.partition_id);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->info.min_block);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->info.max_block);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->info.level);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(UInt64(part->info.getDataVersion()));
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(index_size_in_bytes);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(index_size_in_allocated_bytes);
 
-            columns_[j++]->insert(info.database);
-            columns_[j++]->insert(info.table);
-            columns_[j++]->insert(info.engine);
-            columns_[j++]->insert(part->volume->getDisk()->getName());
-            columns_[j++]->insert(part->getFullPath());
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(info.database);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(info.table);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(info.engine);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->volume->getDisk()->getName());
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(part->getFullPath());
 
-            columns_[j++]->insert(column.name);
-            columns_[j++]->insert(column.type->getName());
-            columns_[j++]->insert(column_position);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(column.name);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(column.type->getName());
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(column_position);
 
             auto column_info_it = columns_info.find(column.name);
             if (column_info_it != columns_info.end())
             {
-                columns_[j++]->insert(column_info_it->second.default_kind);
-                columns_[j++]->insert(column_info_it->second.default_expression);
+                if (columns_mask[src_index++])
+                    columns[res_index++]->insert(column_info_it->second.default_kind);
+                if (columns_mask[src_index++])
+                    columns[res_index++]->insert(column_info_it->second.default_expression);
             }
             else
             {
-                columns_[j++]->insertDefault();
-                columns_[j++]->insertDefault();
+                if (columns_mask[src_index++])
+                    columns[res_index++]->insertDefault();
+                if (columns_mask[src_index++])
+                    columns[res_index++]->insertDefault();
             }
 
             ColumnSize column_size = part->getColumnSize(column.name, *column.type);
-            columns_[j++]->insert(column_size.data_compressed + column_size.marks);
-            columns_[j++]->insert(column_size.data_compressed);
-            columns_[j++]->insert(column_size.data_uncompressed);
-            columns_[j++]->insert(column_size.marks);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(column_size.data_compressed + column_size.marks);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(column_size.data_compressed);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(column_size.data_uncompressed);
+            if (columns_mask[src_index++])
+                columns[res_index++]->insert(column_size.marks);
 
             if (has_state_column)
-                columns_[j++]->insert(part->stateString());
+                columns[res_index++]->insert(part->stateString());
         }
     }
 }
diff --git a/src/Storages/System/StorageSystemPartsColumns.h b/src/Storages/System/StorageSystemPartsColumns.h
index 6347a418875..ec12a608cd1 100644
--- a/src/Storages/System/StorageSystemPartsColumns.h
+++ b/src/Storages/System/StorageSystemPartsColumns.h
@@ -22,7 +22,8 @@ public:
 
 protected:
     StorageSystemPartsColumns(const StorageID & table_id_);
-    void processNextStorage(MutableColumns & columns, const StoragesInfo & info, bool has_state_column) override;
+    void processNextStorage(
+        MutableColumns & columns, std::vector<UInt8> & columns_mask, const StoragesInfo & info, bool has_state_column) override;
 };
 
 }
diff --git a/tests/config/config.d/test_keeper_port.xml b/tests/config/config.d/test_keeper_port.xml
index 97c6d7c2e33..d66183514d5 100644
--- a/tests/config/config.d/test_keeper_port.xml
+++ b/tests/config/config.d/test_keeper_port.xml
@@ -2,12 +2,15 @@
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
 
         <coordination_settings>
             <operation_timeout_ms>10000</operation_timeout_ms>
             <session_timeout_ms>30000</session_timeout_ms>
             <snapshot_distance>0</snapshot_distance>
             <reserved_log_items>0</reserved_log_items>
+            <force_sync>false</force_sync>
+            <startup_timeout>60000</startup_timeout>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 16ceb823f2e..3872234d36c 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -13,6 +13,7 @@ import subprocess
 import time
 import traceback
 import urllib.parse
+import shlex
 
 import cassandra.cluster
 import docker
@@ -573,7 +574,7 @@ class ClickHouseCluster:
         raise Exception("Can't wait Minio to start")
 
     def wait_schema_registry_to_start(self, timeout=10):
-        sr_client = CachedSchemaRegistryClient('http://localhost:8081')
+        sr_client = CachedSchemaRegistryClient({"url":'http://localhost:8081'})
         start = time.time()
         while time.time() - start < timeout:
             try:
@@ -868,6 +869,8 @@ services:
         cap_add:
             - SYS_PTRACE
             - NET_ADMIN
+            - IPC_LOCK
+            - SYS_NICE
         depends_on: {depends_on}
         user: '{user}'
         env_file:
@@ -1079,6 +1082,23 @@ class ClickHouseInstance:
             ["bash", "-c", 'grep "{}" /var/log/clickhouse-server/clickhouse-server.log || true'.format(substring)])
         return len(result) > 0
 
+    def wait_for_log_line(self, regexp, filename='/var/log/clickhouse-server/clickhouse-server.log', timeout=30, repetitions=1, look_behind_lines=100):
+        start_time = time.time()
+        result = self.exec_in_container(
+            ["bash", "-c", 'timeout {} tail -Fn{} "{}" | grep -Em {} {}'.format(timeout, look_behind_lines, filename, repetitions, shlex.quote(regexp))])
+
+        # if repetitions>1 grep will return success even if not enough lines were collected,
+        if repetitions>1 and len(result.splitlines()) < repetitions:
+            print("wait_for_log_line: those lines were found during {} seconds:".format(timeout))
+            print(result)
+            raise Exception("wait_for_log_line: Not enough repetitions: {} found, while {} expected".format(len(result.splitlines()), repetitions))
+
+        wait_duration = time.time() - start_time
+
+        print('{} log line matching "{}" appeared in a {} seconds'.format(repetitions, regexp, wait_duration))
+        return wait_duration
+
+
     def file_exists(self, path):
         return self.exec_in_container(
             ["bash", "-c", "echo $(if [ -e '{}' ]; then echo 'yes'; else echo 'no'; fi)".format(path)]) == 'yes\n'
diff --git a/tests/integration/test_distributed_ddl/test.py b/tests/integration/test_distributed_ddl/test.py
index 58e1d0d06f7..9af27738eed 100755
--- a/tests/integration/test_distributed_ddl/test.py
+++ b/tests/integration/test_distributed_ddl/test.py
@@ -343,6 +343,12 @@ def test_replicated_without_arguments(test_cluster):
                                  "EXCHANGE TABLES test_atomic.rmt AND test_atomic.rmt_renamed ON CLUSTER cluster")
     assert instance.query("SELECT countDistinct(uuid) from clusterAllReplicas('cluster', 'system', 'databases') WHERE uuid != 0 AND name='test_atomic'") == "1\n"
     assert instance.query("SELECT countDistinct(uuid) from clusterAllReplicas('cluster', 'system', 'tables') WHERE uuid != 0 AND name='rmt'") == "1\n"
+    test_cluster.ddl_check_query(instance,
+                                 "CREATE TABLE test_atomic.rrmt ON CLUSTER cluster (n UInt64, m UInt64) ENGINE=ReplicatedReplacingMergeTree(m) ORDER BY n")
+    test_cluster.ddl_check_query(instance,
+                                 "CREATE TABLE test_atomic.rsmt ON CLUSTER cluster (n UInt64, m UInt64, k UInt64) ENGINE=ReplicatedSummingMergeTree((m, k)) ORDER BY n")
+    test_cluster.ddl_check_query(instance,
+                                 "CREATE TABLE test_atomic.rvcmt ON CLUSTER cluster (n UInt64, m Int8, k UInt64) ENGINE=ReplicatedVersionedCollapsingMergeTree(m, k) ORDER BY n")
     test_cluster.ddl_check_query(instance, "DROP DATABASE test_atomic ON CLUSTER cluster")
 
     test_cluster.ddl_check_query(instance, "CREATE DATABASE test_ordinary ON CLUSTER cluster ENGINE=Ordinary")
diff --git a/tests/integration/test_distributed_ddl_parallel/configs/ddl_a.xml b/tests/integration/test_distributed_ddl_parallel/configs/ddl_a.xml
new file mode 100644
index 00000000000..b926f99c687
--- /dev/null
+++ b/tests/integration/test_distributed_ddl_parallel/configs/ddl_a.xml
@@ -0,0 +1,5 @@
+<yandex>
+    <distributed_ddl>
+        <pool_size replace="1">2</pool_size>
+    </distributed_ddl>
+</yandex>
diff --git a/tests/integration/test_distributed_ddl_parallel/configs/ddl_b.xml b/tests/integration/test_distributed_ddl_parallel/configs/ddl_b.xml
new file mode 100644
index 00000000000..2f038919032
--- /dev/null
+++ b/tests/integration/test_distributed_ddl_parallel/configs/ddl_b.xml
@@ -0,0 +1,5 @@
+<yandex>
+    <distributed_ddl>
+        <pool_size replace="1">20</pool_size>
+    </distributed_ddl>
+</yandex>
diff --git a/tests/integration/test_distributed_ddl_parallel/configs/dict.xml b/tests/integration/test_distributed_ddl_parallel/configs/dict.xml
index 610d55841a0..d94b3f61dd9 100644
--- a/tests/integration/test_distributed_ddl_parallel/configs/dict.xml
+++ b/tests/integration/test_distributed_ddl_parallel/configs/dict.xml
@@ -1,26 +1,50 @@
 <?xml version="1.0"?>
 <yandex>
-   <dictionary>
-      <name>slow_dict</name>
-      <source>
-         <executable>
-             <command>sleep 7</command>
-             <format>TabSeparated</format>
-         </executable>
-      </source>
-      <layout>
-         <flat/>
-      </layout>
-      <structure>
-         <id>
-            <name>id</name>
-         </id>
-         <attribute>
-            <name>value</name>
-            <type>String</type>
-            <null_value></null_value>
-         </attribute>
-      </structure>
-      <lifetime>0</lifetime>
-  </dictionary>
+    <dictionary>
+        <name>slow_dict_7</name>
+        <source>
+           <executable>
+               <command>sleep 7</command>
+               <format>TabSeparated</format>
+           </executable>
+        </source>
+        <layout>
+           <flat/>
+        </layout>
+        <structure>
+           <id>
+              <name>id</name>
+           </id>
+           <attribute>
+              <name>value</name>
+              <type>String</type>
+              <null_value></null_value>
+           </attribute>
+        </structure>
+        <lifetime>0</lifetime>
+    </dictionary>
+
+    <dictionary>
+        <name>slow_dict_3</name>
+        <source>
+           <executable>
+               <command>sleep 3</command>
+               <format>TabSeparated</format>
+           </executable>
+        </source>
+        <layout>
+           <flat/>
+        </layout>
+        <structure>
+           <id>
+              <name>id</name>
+           </id>
+           <attribute>
+              <name>value</name>
+              <type>String</type>
+              <null_value></null_value>
+           </attribute>
+        </structure>
+        <lifetime>0</lifetime>
+    </dictionary>
 </yandex>
diff --git a/tests/integration/test_distributed_ddl_parallel/configs/remote_servers.xml b/tests/integration/test_distributed_ddl_parallel/configs/remote_servers.xml
index 8ffa9f024d7..eb0ee60186b 100644
--- a/tests/integration/test_distributed_ddl_parallel/configs/remote_servers.xml
+++ b/tests/integration/test_distributed_ddl_parallel/configs/remote_servers.xml
@@ -1,6 +1,6 @@
 <yandex>
 <remote_servers>
-    <cluster>
+    <cluster_a>
         <shard>
             <replica>
                 <host>n1</host>
@@ -13,6 +13,20 @@
                 <port>9000</port>
             </replica>
         </shard>
-    </cluster>
+    </cluster_a>
+    <cluster_b>
+        <shard>
+            <replica>
+                <host>n3</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+        <shard>
+            <replica>
+                <host>n4</host>
+                <port>9000</port>
+            </replica>
+        </shard>
+    </cluster_b>
 </remote_servers>
 </yandex>
diff --git a/tests/integration/test_distributed_ddl_parallel/test.py b/tests/integration/test_distributed_ddl_parallel/test.py
index 96530b111cb..44971ca3d9e 100644
--- a/tests/integration/test_distributed_ddl_parallel/test.py
+++ b/tests/integration/test_distributed_ddl_parallel/test.py
@@ -10,11 +10,31 @@ from helpers.cluster import ClickHouseCluster
 
 cluster = ClickHouseCluster(__file__)
 
-def add_instance(name):
+# By default the exceptions that was throwed in threads will be ignored
+# (they will not mark the test as failed, only printed to stderr).
+#
+# Wrap thrading.Thread and re-throw exception on join()
+class SafeThread(threading.Thread):
+    def __init__(self, target):
+        super().__init__()
+        self.target = target
+        self.exception = None
+    def run(self):
+        try:
+            self.target()
+        except Exception as e: # pylint: disable=broad-except
+            self.exception = e
+    def join(self, timeout=None):
+        super().join(timeout)
+        if self.exception:
+            raise self.exception
+
+def add_instance(name, ddl_config=None):
     main_configs=[
-        'configs/ddl.xml',
         'configs/remote_servers.xml',
     ]
+    if ddl_config:
+        main_configs.append(ddl_config)
     dictionaries=[
         'configs/dict.xml',
     ]
@@ -24,8 +44,12 @@ def add_instance(name):
         with_zookeeper=True)
 
 initiator = add_instance('initiator')
-n1 = add_instance('n1')
-n2 = add_instance('n2')
+# distributed_ddl.pool_size = 2
+n1 = add_instance('n1', 'configs/ddl_a.xml')
+n2 = add_instance('n2', 'configs/ddl_a.xml')
+# distributed_ddl.pool_size = 20
+n3 = add_instance('n3', 'configs/ddl_b.xml')
+n4 = add_instance('n4', 'configs/ddl_b.xml')
 
 @pytest.fixture(scope='module', autouse=True)
 def start_cluster():
@@ -49,17 +73,32 @@ def longer_then(sec):
         return inner
     return wrapper
 
-# It takes 7 seconds to load slow_dict.
-def thread_reload_dictionary():
-    initiator.query('SYSTEM RELOAD DICTIONARY ON CLUSTER cluster slow_dict')
+# It takes 7 seconds to load slow_dict_7.
+def execute_reload_dictionary_slow_dict_7():
+    initiator.query('SYSTEM RELOAD DICTIONARY ON CLUSTER cluster_a slow_dict_7', settings={
+        'distributed_ddl_task_timeout': 60,
+    })
+def execute_reload_dictionary_slow_dict_3():
+    initiator.query('SYSTEM RELOAD DICTIONARY ON CLUSTER cluster_b slow_dict_3', settings={
+        'distributed_ddl_task_timeout': 60,
+    })
+def execute_smoke_query():
+    initiator.query('DROP DATABASE IF EXISTS foo ON CLUSTER cluster_b', settings={
+        'distributed_ddl_task_timeout': 60,
+    })
+
+def check_log():
+    # ensure that none of tasks processed multiple times
+    for _, instance in list(cluster.instances.items()):
+        assert not instance.contains_in_log('Coordination::Exception: Node exists')
 
 # NOTE: uses inner function to exclude slow start_cluster() from timeout.
 
-def test_dict_load():
+def test_slow_dict_load_7():
     @pytest.mark.timeout(10)
     @longer_then(7)
     def inner_test():
-        initiator.query('SYSTEM RELOAD DICTIONARY slow_dict')
+        initiator.query('SYSTEM RELOAD DICTIONARY slow_dict_7')
     inner_test()
 
 def test_all_in_parallel():
@@ -68,12 +107,13 @@ def test_all_in_parallel():
     def inner_test():
         threads = []
         for _ in range(2):
-            threads.append(threading.Thread(target=thread_reload_dictionary))
+            threads.append(SafeThread(target=execute_reload_dictionary_slow_dict_7))
         for thread in threads:
             thread.start()
         for thread in threads:
-            thread.join()
+            thread.join(70)
     inner_test()
+    check_log()
 
 def test_two_in_parallel_two_queued():
     @pytest.mark.timeout(19)
@@ -81,9 +121,35 @@ def test_two_in_parallel_two_queued():
     def inner_test():
         threads = []
         for _ in range(4):
-            threads.append(threading.Thread(target=thread_reload_dictionary))
+            threads.append(SafeThread(target=execute_reload_dictionary_slow_dict_7))
         for thread in threads:
             thread.start()
         for thread in threads:
-            thread.join()
+            thread.join(70)
     inner_test()
+    check_log()
+
+def test_smoke():
+    for _ in range(100):
+        execute_smoke_query()
+    check_log()
+
+def test_smoke_parallel():
+    threads = []
+    for _ in range(100):
+        threads.append(SafeThread(target=execute_smoke_query))
+    for thread in threads:
+        thread.start()
+    for thread in threads:
+        thread.join(70)
+    check_log()
+
+def test_smoke_parallel_dict_reload():
+    threads = []
+    for _ in range(100):
+        threads.append(SafeThread(target=execute_reload_dictionary_slow_dict_3))
+    for thread in threads:
+        thread.start()
+    for thread in threads:
+        thread.join(70)
+    check_log()
diff --git a/tests/integration/test_distributed_load_balancing/configs/users.xml b/tests/integration/test_distributed_load_balancing/configs/users.xml
new file mode 100644
index 00000000000..b2dcdbcd8f3
--- /dev/null
+++ b/tests/integration/test_distributed_load_balancing/configs/users.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<yandex>
+    <profiles>
+        <default>
+            <use_hedged_requests>0</use_hedged_requests>
+        </default>
+    </profiles>
+</yandex>
diff --git a/tests/integration/test_distributed_load_balancing/test.py b/tests/integration/test_distributed_load_balancing/test.py
index df7b74fcae1..d3ac5c132cd 100644
--- a/tests/integration/test_distributed_load_balancing/test.py
+++ b/tests/integration/test_distributed_load_balancing/test.py
@@ -166,6 +166,7 @@ def test_load_balancing_priority_round_robin(dist_table):
 
 def test_distributed_replica_max_ignored_errors():
     settings = {
+        'use_hedged_requests' : 0,
         'load_balancing': 'in_order',
         'prefer_localhost_replica': 0,
         'connect_timeout': 2,
diff --git a/tests/integration/test_distributed_respect_user_timeouts/test.py b/tests/integration/test_distributed_respect_user_timeouts/test.py
index c19323b2049..662bf7fa6de 100644
--- a/tests/integration/test_distributed_respect_user_timeouts/test.py
+++ b/tests/integration/test_distributed_respect_user_timeouts/test.py
@@ -78,6 +78,7 @@ def _check_exception(exception, expected_tries=3):
         expected_lines = (
             'Code: 209, ' + EXCEPTION_NETWORK + EXCEPTION_TIMEOUT,
             'Code: 209, ' + EXCEPTION_NETWORK + EXCEPTION_CONNECT,
+            EXCEPTION_TIMEOUT,
         )
 
         assert any(line.startswith(expected) for expected in expected_lines), \
diff --git a/tests/integration/test_hedged_requests/__init__.py b/tests/integration/test_hedged_requests/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_hedged_requests/configs/remote_servers.xml b/tests/integration/test_hedged_requests/configs/remote_servers.xml
new file mode 100644
index 00000000000..9d753ca2b6a
--- /dev/null
+++ b/tests/integration/test_hedged_requests/configs/remote_servers.xml
@@ -0,0 +1,22 @@
+<yandex>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>node_1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node_2</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node_3</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</yandex>
+
diff --git a/tests/integration/test_hedged_requests/configs/users.xml b/tests/integration/test_hedged_requests/configs/users.xml
new file mode 100644
index 00000000000..509d3d12508
--- /dev/null
+++ b/tests/integration/test_hedged_requests/configs/users.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<yandex>
+    <profiles>
+        <default>
+            <load_balancing>in_order</load_balancing>
+            <hedged_connection_timeout>100</hedged_connection_timeout>
+            <receive_data_timeout>2</receive_data_timeout>
+        </default>
+    </profiles>
+</yandex>
diff --git a/tests/integration/test_hedged_requests/configs/users1.xml b/tests/integration/test_hedged_requests/configs/users1.xml
new file mode 100644
index 00000000000..2a54396feca
--- /dev/null
+++ b/tests/integration/test_hedged_requests/configs/users1.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<yandex>
+    <profiles>
+        <default>
+        </default>
+    </profiles>
+</yandex>
diff --git a/tests/integration/test_hedged_requests/test.py b/tests/integration/test_hedged_requests/test.py
new file mode 100644
index 00000000000..27fe7905b3a
--- /dev/null
+++ b/tests/integration/test_hedged_requests/test.py
@@ -0,0 +1,302 @@
+import os
+import sys
+import time
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from helpers.cluster import ClickHouseCluster
+from helpers.network import PartitionManager
+from helpers.test_tools import TSV
+
+cluster = ClickHouseCluster(__file__)
+NODES = {'node_' + str(i): None for i in (1, 2, 3)}
+
+NODES['node'] = None
+
+sleep_time = 30
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    NODES['node'] = cluster.add_instance(
+    'node', with_zookeeper=True, stay_alive=True, main_configs=['configs/remote_servers.xml'], user_configs=['configs/users.xml'])
+
+    for name in NODES:
+        if name != 'node':
+            NODES[name] = cluster.add_instance(name, with_zookeeper=True,  user_configs=['configs/users1.xml'])
+
+    try:
+        cluster.start()
+
+        for node_id, node in list(NODES.items()):
+            node.query('''CREATE TABLE replicated (id UInt32, date Date) ENGINE =
+            ReplicatedMergeTree('/clickhouse/tables/replicated', '{}')  ORDER BY id PARTITION BY toYYYYMM(date)'''.format(node_id))
+
+        NODES['node'].query('''CREATE TABLE distributed (id UInt32, date Date) ENGINE =
+            Distributed('test_cluster', 'default', 'replicated')''')
+
+        NODES['node'].query("INSERT INTO distributed select number, toDate(number) from numbers(100);")
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+config = '''<yandex>
+    <profiles>
+        <default>
+            <sleep_in_send_tables_status>{sleep_in_send_tables_status}</sleep_in_send_tables_status>
+            <sleep_in_send_data>{sleep_in_send_data}</sleep_in_send_data>
+        </default>
+    </profiles>
+</yandex>'''
+
+
+def check_query(expected_replica, receive_timeout=300):
+    NODES['node'].restart_clickhouse()
+    
+    # Without hedged requests select query will last more than 30 seconds,
+    # with hedged requests it will last just around 1-2 second
+
+    start = time.time()
+    result = NODES['node'].query("SELECT hostName(), id FROM distributed ORDER BY id LIMIT 1 SETTINGS receive_timeout={}".format(receive_timeout));
+    query_time = time.time() - start
+
+    assert TSV(result) == TSV(expected_replica + "\t0")
+
+    print("Query time:", query_time)
+    assert query_time < 10
+
+
+def check_settings(node_name, sleep_in_send_tables_status, sleep_in_send_data):
+    attempts = 0
+    while attempts < 1000:
+        setting1 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_tables_status'")
+        setting2 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_data'")
+        if int(setting1) == sleep_in_send_tables_status and int(setting2) == sleep_in_send_data:
+            return
+        time.sleep(0.1)
+        attempts += 1
+
+    assert attempts < 1000
+
+
+def test_stuck_replica(started_cluster):
+    cluster.pause_container("node_1")
+    check_query(expected_replica="node_2")
+    cluster.unpause_container("node_1")
+
+
+def test_long_query(started_cluster):
+    result = NODES['node'].query("select hostName(), max(id + sleep(1.5)) from distributed settings max_block_size = 1, max_threads = 1;")
+    assert TSV(result) == TSV("node_1\t99")
+
+    NODES['node'].query("INSERT INTO distributed select number, toDate(number) from numbers(100);")
+
+
+def test_send_table_status_sleep(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
+
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
+    
+    check_settings('node_1', sleep_time, 0)
+    check_settings('node_2', 0, 0)
+    check_settings('node_3', 0, 0)
+
+    check_query(expected_replica="node_2")
+
+
+def test_send_table_status_sleep2(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
+
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
+    
+    check_settings('node_1', sleep_time, 0)
+    check_settings('node_2', sleep_time, 0)
+    check_settings('node_3', 0, 0)
+
+    check_query(expected_replica="node_3")
+
+
+def test_send_data(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
+
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
+
+    check_settings('node_1', 0, sleep_time)
+    check_settings('node_2', 0, 0)
+    check_settings('node_3', 0, 0)
+
+    check_query(expected_replica="node_2")
+
+
+def test_send_data2(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
+    
+    check_settings('node_1', 0, sleep_time)
+    check_settings('node_2', 0, sleep_time)
+    check_settings('node_3', 0, 0)
+
+    check_query(expected_replica="node_3")
+
+
+def test_combination1(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+    
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
+
+    check_settings('node_1', sleep_time, 0)
+    check_settings('node_2', 0, sleep_time)
+    check_settings('node_3', 0, 0)
+
+    check_query(expected_replica="node_3")
+
+
+def test_combination2(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
+
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
+
+    check_settings('node_1', 0, sleep_time)
+    check_settings('node_2', sleep_time, 0)
+    check_settings('node_3', 0, 0)
+    
+    check_query(expected_replica="node_3")
+
+
+def test_combination3(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
+
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+    
+    check_settings('node_1', 0, sleep_time)
+    check_settings('node_2', 1, 0)
+    check_settings('node_3', 0, sleep_time)
+
+    check_query(expected_replica="node_2")
+
+
+def test_combination4(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=1, sleep_in_send_data=sleep_time))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
+
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=2, sleep_in_send_data=0))
+
+    check_settings('node_1', 1, sleep_time)
+    check_settings('node_2', 1, 0)
+    check_settings('node_3', 2, 0)
+
+    check_query(expected_replica="node_2")
+
+
+def test_receive_timeout1(started_cluster):
+    # Check the situation when first two replicas get receive timeout
+    # in establishing connection, but the third replica is ok.
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=3, sleep_in_send_data=0))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=3, sleep_in_send_data=0))
+
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=1))
+    
+    check_settings('node_1', 3, 0)
+    check_settings('node_2', 3, 0)
+    check_settings('node_3', 0, 1)
+
+    check_query(expected_replica="node_3", receive_timeout=2)
+
+
+def test_receive_timeout2(started_cluster):
+    # Check the situation when first replica get receive timeout
+    # in packet receiving but there are replicas in process of
+    # connection establishing.
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=4))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=2, sleep_in_send_data=0))
+
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=2, sleep_in_send_data=0))
+
+    check_settings('node_1', 0, 4)
+    check_settings('node_2', 2, 0)
+    check_settings('node_3', 2, 0)
+
+    check_query(expected_replica="node_2", receive_timeout=3)
+
diff --git a/tests/integration/test_hedged_requests_parallel/__init__.py b/tests/integration/test_hedged_requests_parallel/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_hedged_requests_parallel/configs/remote_servers.xml b/tests/integration/test_hedged_requests_parallel/configs/remote_servers.xml
new file mode 100644
index 00000000000..63767185b34
--- /dev/null
+++ b/tests/integration/test_hedged_requests_parallel/configs/remote_servers.xml
@@ -0,0 +1,26 @@
+<yandex>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>node_1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node_2</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node_3</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node_4</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</yandex>
+
diff --git a/tests/integration/test_hedged_requests_parallel/configs/users.xml b/tests/integration/test_hedged_requests_parallel/configs/users.xml
new file mode 100644
index 00000000000..af9d6d96e60
--- /dev/null
+++ b/tests/integration/test_hedged_requests_parallel/configs/users.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<yandex>
+    <profiles>
+        <default>
+            <load_balancing>in_order</load_balancing>
+            <max_parallel_replicas>2</max_parallel_replicas>
+            <hedged_connection_timeout>100</hedged_connection_timeout>
+            <receive_data_timeout>2</receive_data_timeout>
+        </default>
+    </profiles>
+</yandex>
diff --git a/tests/integration/test_hedged_requests_parallel/configs/users1.xml b/tests/integration/test_hedged_requests_parallel/configs/users1.xml
new file mode 100644
index 00000000000..2a54396feca
--- /dev/null
+++ b/tests/integration/test_hedged_requests_parallel/configs/users1.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<yandex>
+    <profiles>
+        <default>
+        </default>
+    </profiles>
+</yandex>
diff --git a/tests/integration/test_hedged_requests_parallel/test.py b/tests/integration/test_hedged_requests_parallel/test.py
new file mode 100644
index 00000000000..17db4af5d41
--- /dev/null
+++ b/tests/integration/test_hedged_requests_parallel/test.py
@@ -0,0 +1,158 @@
+import os
+import sys
+import time
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from helpers.cluster import ClickHouseCluster
+from helpers.network import PartitionManager
+
+cluster = ClickHouseCluster(__file__)
+
+NODES = {'node_' + str(i): None for i in (1, 2, 3, 4)}
+NODES['node'] = None
+
+sleep_time = 30
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    cluster = ClickHouseCluster(__file__)
+    NODES['node'] = cluster.add_instance(
+    'node', with_zookeeper=True, stay_alive=True, main_configs=['configs/remote_servers.xml'], user_configs=['configs/users.xml'])
+
+    for name in NODES:
+        if name != 'node':
+            NODES[name] = cluster.add_instance(name, with_zookeeper=True, user_configs=['configs/users1.xml'])
+    
+    try:
+        cluster.start()
+
+        for node_id, node in list(NODES.items()):
+            node.query('''CREATE TABLE replicated (id UInt32, date Date) ENGINE =
+            ReplicatedMergeTree('/clickhouse/tables/replicated', '{}')  ORDER BY id PARTITION BY toYYYYMM(date)'''.format(node_id))
+
+        NODES['node'].query('''CREATE TABLE distributed (id UInt32, date Date) ENGINE =
+            Distributed('test_cluster', 'default', 'replicated')''')
+
+        NODES['node'].query("INSERT INTO distributed VALUES (1, '2020-01-01'), (2, '2020-01-02')")
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+config = '''<yandex>
+    <profiles>
+        <default>
+            <sleep_in_send_tables_status>{sleep_in_send_tables_status}</sleep_in_send_tables_status>
+            <sleep_in_send_data>{sleep_in_send_data}</sleep_in_send_data>
+        </default>
+    </profiles>
+</yandex>'''
+
+
+def check_query():
+    NODES['node'].restart_clickhouse()
+
+    # Without hedged requests select query will last more than 30 seconds,
+    # with hedged requests it will last just around 1-2 second
+
+    start = time.time()
+    NODES['node'].query("SELECT * FROM distributed");
+    query_time = time.time() - start
+    print("Query time:", query_time)
+    
+    assert query_time < 5
+
+
+def check_settings(node_name, sleep_in_send_tables_status, sleep_in_send_data):
+    attempts = 0
+    while attempts < 1000:
+        setting1 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_tables_status'")
+        setting2 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_data'")
+        if int(setting1) == sleep_in_send_tables_status and int(setting2) == sleep_in_send_data:
+            return
+        time.sleep(0.1)
+        attempts += 1
+
+    assert attempts < 1000
+
+
+def test_send_table_status_sleep(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
+    
+    check_settings('node_1', sleep_time, 0)
+    check_settings('node_2', sleep_time, 0)
+
+    check_query()
+
+
+def test_send_data(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+
+    check_settings('node_1', 0, sleep_time)
+    check_settings('node_2', 0, sleep_time)
+
+    check_query()
+
+
+def test_combination1(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
+
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+
+    check_settings('node_1', 1, 0)
+    check_settings('node_2', 1, 0)
+    check_settings('node_3', 0, sleep_time)
+
+    check_query()
+
+
+def test_combination2(started_cluster):
+    NODES['node_1'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+
+    NODES['node_2'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
+
+    NODES['node_3'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
+
+    NODES['node_4'].replace_config(
+        '/etc/clickhouse-server/users.d/users1.xml',
+        config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
+
+    
+    check_settings('node_1', 0, sleep_time)
+    check_settings('node_2', 1, 0)
+    check_settings('node_3', 0, sleep_time)
+    check_settings('node_4', 1, 0)
+    
+    check_query()
+
diff --git a/tests/integration/test_reload_zookeeper/test.py b/tests/integration/test_reload_zookeeper/test.py
index 82c47f4ec9e..1fe0ab13a7f 100644
--- a/tests/integration/test_reload_zookeeper/test.py
+++ b/tests/integration/test_reload_zookeeper/test.py
@@ -74,6 +74,9 @@ def test_reload_zookeeper(start_cluster):
     with pytest.raises(QueryRuntimeException):
         node.query("SELECT COUNT() FROM test_table", settings={"select_sequential_consistency" : 1})
 
+    def get_active_zk_connections():
+        return str(node.exec_in_container(['bash', '-c', 'lsof -a -i4 -i6 -itcp -w | grep 2181 | grep ESTABLISHED | wc -l'], privileged=True, user='root')).strip()
+
     ## set config to zoo2, server will be normal
     new_config = """
 <yandex>
@@ -89,5 +92,10 @@ def test_reload_zookeeper(start_cluster):
     node.replace_config("/etc/clickhouse-server/conf.d/zookeeper.xml", new_config)
     node.query("SYSTEM RELOAD CONFIG")
 
+    active_zk_connections = get_active_zk_connections()
+    assert active_zk_connections == '1', "Total connections to ZooKeeper not equal to 1, {}".format(active_zk_connections)
+
     assert_eq_with_retry(node, "SELECT COUNT() FROM test_table", '1000', retry_count=120, sleep_time=0.5)
 
+    active_zk_connections = get_active_zk_connections()
+    assert active_zk_connections == '1', "Total connections to ZooKeeper not equal to 1, {}".format(active_zk_connections)
diff --git a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py
index 91a25ec8d8a..a9dcce1b9d4 100644
--- a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py
+++ b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py
@@ -6,7 +6,6 @@ from helpers.cluster import ClickHouseCluster
 from helpers.client import QueryRuntimeException
 from helpers.test_tools import TSV
 
-cluster = ClickHouseCluster(__file__)
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance("node1", main_configs=["configs/zookeeper_config.xml", "configs/remote_servers.xml"], with_zookeeper=True)
 node2 = cluster.add_instance("node2", main_configs=["configs/zookeeper_config.xml", "configs/remote_servers.xml"], with_zookeeper=True)
@@ -78,3 +77,27 @@ def test_create_replicated_merge_tree_with_not_exists_auxiliary_zookeeper(starte
                 ENGINE = ReplicatedMergeTree('zookeeper_not_exits:/clickhouse/tables/test/test_auxiliary_zookeeper', '{replica}')
                 ORDER BY a;
             '''.format(replica=node1.name))
+
+# Drop table with auxiliary zookeeper.
+def test_drop_replicated_merge_tree_with_auxiliary_zookeeper(started_cluster):
+    drop_table([node1, node2], "test_auxiliary_zookeeper")
+    for node in [node1, node2]:
+        node.query(
+            '''
+                CREATE TABLE test_auxiliary_zookeeper(a Int32)
+                ENGINE = ReplicatedMergeTree('zookeeper2:/clickhouse/tables/test/test_auxiliary_zookeeper', '{replica}')
+                ORDER BY a;
+            '''.format(replica=node.name))
+
+    # Insert data into node1, and query it from node2.
+    node1.query("INSERT INTO test_auxiliary_zookeeper VALUES (1)")
+    time.sleep(5)
+
+    expected = "1\n"
+    assert TSV(node1.query("SELECT a FROM test_auxiliary_zookeeper")) == TSV(expected)
+    assert TSV(node2.query("SELECT a FROM test_auxiliary_zookeeper")) == TSV(expected)
+
+    zk = cluster.get_kazoo_client('zoo1')
+    assert zk.exists('/clickhouse/tables/test/test_auxiliary_zookeeper')
+    drop_table([node1, node2], "test_auxiliary_zookeeper")
+    assert zk.exists('/clickhouse/tables/test/test_auxiliary_zookeeper') is None
diff --git a/tests/integration/test_row_policy/normal_filter2_table2.xml b/tests/integration/test_row_policy/normal_filter2_table2.xml
new file mode 100644
index 00000000000..aca6bddc334
--- /dev/null
+++ b/tests/integration/test_row_policy/normal_filter2_table2.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<yandex>
+    <users>
+        <default>
+            <!-- For testing the table filters -->
+            <databases>
+                <mydb>
+                    <!-- Simple expression filter -->
+                    <filtered_table2>
+                        <filter>a &gt; 0</filter>
+                    </filtered_table2>
+                </mydb>
+            </databases>
+        </default>
+    </users>
+</yandex>
diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py
index c3c86f5a9c5..ffb6dcb0588 100644
--- a/tests/integration/test_row_policy/test.py
+++ b/tests/integration/test_row_policy/test.py
@@ -103,21 +103,32 @@ def test_join():
 
 
 def test_cannot_trick_row_policy_with_keyword_with():
-    assert node.query("WITH 0 AS a SELECT * FROM mydb.filtered_table1") == TSV([[1, 0], [1, 1]])
-    assert node.query("WITH 0 AS a SELECT a, b FROM mydb.filtered_table1") == TSV([[0, 0], [0, 1]])
     assert node.query("WITH 0 AS a SELECT a FROM mydb.filtered_table1") == TSV([[0], [0]])
     assert node.query("WITH 0 AS a SELECT b FROM mydb.filtered_table1") == TSV([[0], [1]])
 
+    assert node.query("WITH 0 AS a SELECT * FROM mydb.filtered_table1")                              == TSV([[1, 0], [1, 1]])
+    assert node.query("WITH 0 AS a SELECT * FROM mydb.filtered_table1 WHERE a >= 0 AND b >= 0 SETTINGS optimize_move_to_prewhere = 0") == TSV([[1, 0], [1, 1]])
+    assert node.query("WITH 0 AS a SELECT * FROM mydb.filtered_table1 PREWHERE a >= 0 AND b >= 0")   == TSV([[1, 0], [1, 1]])
+    assert node.query("WITH 0 AS a SELECT * FROM mydb.filtered_table1 PREWHERE a >= 0 WHERE b >= 0") == TSV([[1, 0], [1, 1]])
+    assert node.query("WITH 0 AS a SELECT * FROM mydb.filtered_table1 PREWHERE b >= 0 WHERE a >= 0") == TSV([[1, 0], [1, 1]])
 
-def test_prewhere_not_supported():
-    expected_error = "PREWHERE is not supported if the table is filtered by row-level security"
-    assert expected_error in node.query_and_get_error("SELECT * FROM mydb.filtered_table1 PREWHERE 1")
-    assert expected_error in node.query_and_get_error("SELECT * FROM mydb.filtered_table2 PREWHERE 1")
-    assert expected_error in node.query_and_get_error("SELECT * FROM mydb.filtered_table3 PREWHERE 1")
+    assert node.query("WITH 0 AS a SELECT a, b FROM mydb.filtered_table1")                              == TSV([[0, 0], [0, 1]])
+    assert node.query("WITH 0 AS a SELECT a, b FROM mydb.filtered_table1 WHERE a >= 0 AND b >= 0 SETTINGS optimize_move_to_prewhere = 0") == TSV([[0, 0], [0, 1]])
+    assert node.query("WITH 0 AS a SELECT a, b FROM mydb.filtered_table1 PREWHERE a >= 0 AND b >= 0")   == TSV([[0, 0], [0, 1]])
+    assert node.query("WITH 0 AS a SELECT a, b FROM mydb.filtered_table1 PREWHERE a >= 0 WHERE b >= 0") == TSV([[0, 0], [0, 1]])
+    assert node.query("WITH 0 AS a SELECT a, b FROM mydb.filtered_table1 PREWHERE b >= 0 WHERE a >= 0") == TSV([[0, 0], [0, 1]])
 
-    # However PREWHERE should still work for user without filtering.
-    assert node.query("SELECT * FROM mydb.filtered_table1 PREWHERE 1", user="another") == TSV(
-        [[0, 0], [0, 1], [1, 0], [1, 1]])
+    assert node.query("WITH 0 AS c SELECT * FROM mydb.filtered_table3")                              == TSV([[0, 1], [1, 0]])
+    assert node.query("WITH 0 AS c SELECT * FROM mydb.filtered_table3 WHERE c >= 0 AND a >= 0 SETTINGS optimize_move_to_prewhere = 0") == TSV([[0, 1], [1, 0]])
+    assert node.query("WITH 0 AS c SELECT * FROM mydb.filtered_table3 PREWHERE c >= 0 AND a >= 0")   == TSV([[0, 1], [1, 0]])
+    assert node.query("WITH 0 AS c SELECT * FROM mydb.filtered_table3 PREWHERE c >= 0 WHERE a >= 0") == TSV([[0, 1], [1, 0]])
+    assert node.query("WITH 0 AS c SELECT * FROM mydb.filtered_table3 PREWHERE a >= 0 WHERE c >= 0") == TSV([[0, 1], [1, 0]])
+
+    assert node.query("WITH 0 AS c SELECT a, b, c FROM mydb.filtered_table3")                              == TSV([[0, 1, 0], [1, 0, 0]])
+    assert node.query("WITH 0 AS c SELECT a, b, c FROM mydb.filtered_table3 WHERE c >= 0 AND a >= 0 SETTINGS optimize_move_to_prewhere = 0") == TSV([[0, 1, 0], [1, 0, 0]])
+    assert node.query("WITH 0 AS c SELECT a, b, c FROM mydb.filtered_table3 PREWHERE c >= 0 AND a >= 0")   == TSV([[0, 1, 0], [1, 0, 0]])
+    assert node.query("WITH 0 AS c SELECT a, b, c FROM mydb.filtered_table3 PREWHERE c >= 0 WHERE a >= 0") == TSV([[0, 1, 0], [1, 0, 0]])
+    assert node.query("WITH 0 AS c SELECT a, b, c FROM mydb.filtered_table3 PREWHERE a >= 0 WHERE c >= 0") == TSV([[0, 1, 0], [1, 0, 0]])
 
 
 def test_policy_from_users_xml_affects_only_user_assigned():
@@ -132,6 +143,57 @@ def test_policy_from_users_xml_affects_only_user_assigned():
     assert node.query("SELECT * FROM mydb.local", user="another") == TSV([[1, 0], [1, 1]])
 
 
+def test_with_prewhere():
+    copy_policy_xml('normal_filter2_table2.xml')
+    assert node.query("SELECT * FROM mydb.filtered_table2 WHERE a > 1 SETTINGS optimize_move_to_prewhere = 0")    == TSV([[4, 3, 2, 1]])
+    assert node.query("SELECT a FROM mydb.filtered_table2 WHERE a > 1 SETTINGS optimize_move_to_prewhere = 0")    == TSV([[4]])
+    assert node.query("SELECT a, b FROM mydb.filtered_table2 WHERE a > 1 SETTINGS optimize_move_to_prewhere = 0") == TSV([[4, 3]])
+    assert node.query("SELECT b, c FROM mydb.filtered_table2 WHERE a > 1 SETTINGS optimize_move_to_prewhere = 0") == TSV([[3, 2]])
+    assert node.query("SELECT d FROM mydb.filtered_table2 WHERE a > 1 SETTINGS optimize_move_to_prewhere = 0")    == TSV([[1]])
+
+    assert node.query("SELECT * FROM mydb.filtered_table2 PREWHERE a > 1")    == TSV([[4, 3, 2, 1]])
+    assert node.query("SELECT a FROM mydb.filtered_table2 PREWHERE a > 1")    == TSV([[4]])
+    assert node.query("SELECT a, b FROM mydb.filtered_table2 PREWHERE a > 1") == TSV([[4, 3]])
+    assert node.query("SELECT b, c FROM mydb.filtered_table2 PREWHERE a > 1") == TSV([[3, 2]])
+    assert node.query("SELECT d FROM mydb.filtered_table2 PREWHERE a > 1")    == TSV([[1]])
+
+    assert node.query("SELECT * FROM mydb.filtered_table2 PREWHERE a < 4 WHERE b < 10") == TSV([[1, 2, 3, 4]])
+    assert node.query("SELECT a FROM mydb.filtered_table2 PREWHERE a < 4 WHERE b < 10") == TSV([[1]])
+    assert node.query("SELECT b FROM mydb.filtered_table2 PREWHERE a < 4 WHERE b < 10") == TSV([[2]])
+    assert node.query("SELECT a, b FROM mydb.filtered_table2 PREWHERE a < 4 WHERE b < 10") == TSV([[1, 2]])
+    assert node.query("SELECT a, c FROM mydb.filtered_table2 PREWHERE a < 4 WHERE b < 10") == TSV([[1, 3]])
+    assert node.query("SELECT b, d FROM mydb.filtered_table2 PREWHERE a < 4 WHERE b < 10") == TSV([[2, 4]])
+    assert node.query("SELECT c, d FROM mydb.filtered_table2 PREWHERE a < 4 WHERE b < 10") == TSV([[3, 4]])
+
+
+def test_throwif_error_in_where_with_same_condition_as_filter():
+    copy_policy_xml('normal_filter2_table2.xml')
+    assert 'expected' in node.query_and_get_error("SELECT * FROM mydb.filtered_table2 WHERE throwIf(a > 0, 'expected') = 0 SETTINGS optimize_move_to_prewhere = 0")
+
+
+def test_throwif_error_in_prewhere_with_same_condition_as_filter():
+    copy_policy_xml('normal_filter2_table2.xml')
+    assert 'expected' in node.query_and_get_error("SELECT * FROM mydb.filtered_table2 PREWHERE throwIf(a > 0, 'expected') = 0")
+
+
+def test_throwif_in_where_doesnt_expose_restricted_data():
+    copy_policy_xml('no_filters.xml')
+    assert 'expected' in node.query_and_get_error("SELECT * FROM mydb.filtered_table2 WHERE throwIf(a = 0, 'expected') = 0 SETTINGS optimize_move_to_prewhere = 0")
+
+    copy_policy_xml('normal_filter2_table2.xml')
+    assert node.query("SELECT * FROM mydb.filtered_table2 WHERE throwIf(a = 0, 'pwned') = 0 SETTINGS optimize_move_to_prewhere = 0") == TSV([
+        [1, 2, 3, 4], [4, 3, 2, 1]])
+
+
+def test_throwif_in_prewhere_doesnt_expose_restricted_data():
+    copy_policy_xml('no_filters.xml')
+    assert 'expected' in node.query_and_get_error("SELECT * FROM mydb.filtered_table2 PREWHERE throwIf(a = 0, 'expected') = 0")
+
+    copy_policy_xml('normal_filter2_table2.xml')
+    assert node.query("SELECT * FROM mydb.filtered_table2 PREWHERE throwIf(a = 0, 'pwned') = 0") == TSV([
+        [1, 2, 3, 4], [4, 3, 2, 1]])
+
+
 def test_change_of_users_xml_changes_row_policies():
     copy_policy_xml('normal_filters.xml')
     assert node.query("SELECT * FROM mydb.filtered_table1") == TSV([[1, 0], [1, 1]])
@@ -154,6 +216,11 @@ def test_change_of_users_xml_changes_row_policies():
     assert node.query("SELECT * FROM mydb.filtered_table2") == TSV([[0, 0, 0, 0], [0, 0, 6, 0]])
     assert node.query("SELECT * FROM mydb.filtered_table3") == TSV([[0, 1], [1, 0]])
 
+    copy_policy_xml('normal_filter2_table2.xml')
+    assert node.query("SELECT * FROM mydb.filtered_table1") == TSV([[0, 0], [0, 1], [1, 0], [1, 1]])
+    assert node.query("SELECT * FROM mydb.filtered_table2") == TSV([[1, 2, 3, 4], [4, 3, 2, 1]])
+    assert node.query("SELECT * FROM mydb.filtered_table3") == TSV([[0, 0], [0, 1], [1, 0], [1, 1]])
+
     copy_policy_xml('no_filters.xml')
     assert node.query("SELECT * FROM mydb.filtered_table1") == TSV([[0, 0], [0, 1], [1, 0], [1, 1]])
     assert node.query("SELECT * FROM mydb.filtered_table2") == TSV(
diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py
index 5f2726832cc..2a73375c5ea 100644
--- a/tests/integration/test_storage_kafka/test.py
+++ b/tests/integration/test_storage_kafka/test.py
@@ -18,10 +18,8 @@ from helpers.client import QueryRuntimeException
 from helpers.cluster import ClickHouseCluster
 from helpers.network import PartitionManager
 from helpers.test_tools import TSV
-from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer, BrokerConnection
+from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer
 from kafka.admin import NewTopic
-from kafka.protocol.admin import DescribeGroupsRequest_v1
-from kafka.protocol.group import MemberAssignment
 
 """
 protoc --version
@@ -83,12 +81,16 @@ def wait_kafka_is_available(max_retries=50):
 def producer_serializer(x):
     return x.encode() if isinstance(x, str) else x
 
-def kafka_produce(topic, messages, timestamp=None):
-    producer = KafkaProducer(bootstrap_servers="localhost:9092", value_serializer=producer_serializer)
+def kafka_produce(topic, messages, timestamp=None, retries=2):
+    producer = KafkaProducer(bootstrap_servers="localhost:9092", value_serializer=producer_serializer, retries=retries, max_in_flight_requests_per_connection=1)
     for message in messages:
         producer.send(topic=topic, value=message, timestamp_ms=timestamp)
         producer.flush()
 
+## just to ensure the python client / producer is working properly
+def kafka_producer_send_heartbeat_msg(max_retries=50):
+    kafka_produce('test_heartbeat_topic', ['test'], retries=max_retries)
+
 def kafka_consume(topic):
     consumer = KafkaConsumer(bootstrap_servers="localhost:9092", auto_offset_reset="earliest")
     consumer.subscribe(topics=(topic))
@@ -156,6 +158,132 @@ def avro_confluent_message(schema_registry_client, value):
     })
     return serializer.encode_record_with_schema('test_subject', schema, value)
 
+# Since everything is async and shaky when receiving messages from Kafka,
+# we may want to try and check results multiple times in a loop.
+def kafka_check_result(result, check=False, ref_file='test_kafka_json.reference'):
+    fpath = p.join(p.dirname(__file__), ref_file)
+    with open(fpath) as reference:
+        if check:
+            assert TSV(result) == TSV(reference)
+        else:
+            return TSV(result) == TSV(reference)
+
+def describe_consumer_group(name):
+    admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092")
+    consumer_groups = admin_client.describe_consumer_groups([name])
+    res = []
+    for member in consumer_groups[0].members:
+        member_info = {}
+        member_info['member_id'] = member.member_id
+        member_info['client_id'] = member.client_id
+        member_info['client_host'] = member.client_host
+        member_topics_assignment = []
+        for (topic, partitions) in member.member_assignment.assignment:
+            member_topics_assignment.append({'topic': topic, 'partitions': partitions})
+        member_info['assignment'] = member_topics_assignment
+        res.append(member_info)
+    return res
+
+# Fixtures
+
+@pytest.fixture(scope="module")
+def kafka_cluster():
+    try:
+        global kafka_id
+        cluster.start()
+        kafka_id = instance.cluster.kafka_docker_id
+        print(("kafka_id is {}".format(kafka_id)))
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+@pytest.fixture(autouse=True)
+def kafka_setup_teardown():
+    instance.query('DROP DATABASE IF EXISTS test; CREATE DATABASE test;')
+    wait_kafka_is_available() # ensure kafka is alive
+    kafka_producer_send_heartbeat_msg() # ensure python kafka client is ok
+    # print("kafka is available - running test")
+    yield  # run test
+
+# Tests
+
+@pytest.mark.timeout(180)
+def test_kafka_settings_old_syntax(kafka_cluster):
+    assert TSV(instance.query("SELECT * FROM system.macros WHERE macro like 'kafka%' ORDER BY macro",
+                              ignore_error=True)) == TSV('''kafka_broker	kafka1
+kafka_client_id	instance
+kafka_format_json_each_row	JSONEachRow
+kafka_group_name_new	new
+kafka_group_name_old	old
+kafka_topic_new	new
+kafka_topic_old	old
+''')
+
+    instance.query('''
+        CREATE TABLE test.kafka (key UInt64, value UInt64)
+            ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_old}', '{kafka_group_name_old}', '{kafka_format_json_each_row}', '\\n');
+        ''')
+
+    # Don't insert malformed messages since old settings syntax
+    # doesn't support skipping of broken messages.
+    messages = []
+    for i in range(50):
+        messages.append(json.dumps({'key': i, 'value': i}))
+    kafka_produce('old', messages)
+
+    result = ''
+    while True:
+        result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
+        if kafka_check_result(result):
+            break
+
+    kafka_check_result(result, True)
+
+    members = describe_consumer_group('old')
+    assert members[0]['client_id'] == 'ClickHouse-instance-test-kafka'
+    # text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:9092 --describe --members --group old --verbose"))
+
+
+@pytest.mark.timeout(180)
+def test_kafka_settings_new_syntax(kafka_cluster):
+    instance.query('''
+        CREATE TABLE test.kafka (key UInt64, value UInt64)
+            ENGINE = Kafka
+            SETTINGS kafka_broker_list = '{kafka_broker}:19092',
+                     kafka_topic_list = '{kafka_topic_new}',
+                     kafka_group_name = '{kafka_group_name_new}',
+                     kafka_format = '{kafka_format_json_each_row}',
+                     kafka_row_delimiter = '\\n',
+                     kafka_client_id = '{kafka_client_id} test 1234',
+                     kafka_skip_broken_messages = 1;
+        ''')
+
+    messages = []
+    for i in range(25):
+        messages.append(json.dumps({'key': i, 'value': i}))
+    kafka_produce('new', messages)
+
+    # Insert couple of malformed messages.
+    kafka_produce('new', ['}{very_broken_message,'])
+    kafka_produce('new', ['}another{very_broken_message,'])
+
+    messages = []
+    for i in range(25, 50):
+        messages.append(json.dumps({'key': i, 'value': i}))
+    kafka_produce('new', messages)
+
+    result = ''
+    while True:
+        result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
+        if kafka_check_result(result):
+            break
+
+    kafka_check_result(result, True)
+
+    members = describe_consumer_group('new')
+    assert members[0]['client_id'] == 'instance test 1234'
+
 
 @pytest.mark.timeout(180)
 def test_kafka_json_as_string(kafka_cluster):
@@ -183,7 +311,7 @@ def test_kafka_json_as_string(kafka_cluster):
         "Parsing of message (topic: kafka_json_as_string, partition: 0, offset: 1) return no rows")
 
 
-@pytest.mark.timeout(300)
+@pytest.mark.timeout(120)
 def test_kafka_formats(kafka_cluster):
     # data was dumped from clickhouse itself in a following manner
     # clickhouse-client --format=Native --query='SELECT toInt64(number) as id, toUInt16( intDiv( id, 65536 ) ) as blockNo, reinterpretAsString(19777) as val1, toFloat32(0.5) as val2, toUInt8(1) as val3 from numbers(100) ORDER BY id' | xxd -ps | tr -d '\n' | sed 's/\(..\)/\\x\1/g'
@@ -311,7 +439,7 @@ def test_kafka_formats(kafka_cluster):
                 # On empty message exception happens: Line "" doesn't match the regexp.: (at row 1)
                 # /src/Processors/Formats/Impl/RegexpRowInputFormat.cpp:140: DB::RegexpRowInputFormat::readRow(std::__1::vector<COW<DB::IColumn>::mutable_ptr<DB::IColumn>, std::__1::allocator<COW<DB::IColumn>::mutable_ptr<DB::IColumn> > >&, DB::RowReadExtension&) @ 0x1df82fcb in /usr/bin/clickhouse
             ],
-            'extra_settings': ", format_regexp='\(id = (.+?), blockNo = (.+?), val1 = \"(.+?)\", val2 = (.+?), val3 = (.+?)\)', format_regexp_escaping_rule='Escaped'"
+            'extra_settings': r", format_regexp='\(id = (.+?), blockNo = (.+?), val1 = \"(.+?)\", val2 = (.+?), val3 = (.+?)\)', format_regexp_escaping_rule='Escaped'"
         },
 
         ## BINARY FORMATS
@@ -545,7 +673,7 @@ def test_kafka_formats(kafka_cluster):
             '''.format(topic_name=topic_name, format_name=format_name,
                        extra_settings=format_opts.get('extra_settings') or ''))
 
-    time.sleep(12)
+    instance.wait_for_log_line('kafka.*Committed offset [0-9]+.*format_tests_', repetitions=len(all_formats.keys()), look_behind_lines=12000)
 
     for format_name, format_opts in list(all_formats.items()):
         print(('Checking {}'.format(format_name)))
@@ -574,148 +702,6 @@ def test_kafka_formats(kafka_cluster):
 '''.format(topic_name=topic_name, offset_0=offsets[0], offset_1=offsets[1], offset_2=offsets[2])
         assert TSV(result) == TSV(expected), 'Proper result for format: {}'.format(format_name)
 
-
-# Since everything is async and shaky when receiving messages from Kafka,
-# we may want to try and check results multiple times in a loop.
-def kafka_check_result(result, check=False, ref_file='test_kafka_json.reference'):
-    fpath = p.join(p.dirname(__file__), ref_file)
-    with open(fpath) as reference:
-        if check:
-            assert TSV(result) == TSV(reference)
-        else:
-            return TSV(result) == TSV(reference)
-
-
-# https://stackoverflow.com/a/57692111/1555175
-def describe_consumer_group(name):
-    client = BrokerConnection('localhost', 9092, socket.AF_INET)
-    client.connect_blocking()
-
-    list_members_in_groups = DescribeGroupsRequest_v1(groups=[name])
-    future = client.send(list_members_in_groups)
-    while not future.is_done:
-        for resp, f in client.recv():
-            f.success(resp)
-
-    (error_code, group_id, state, protocol_type, protocol, members) = future.value.groups[0]
-
-    res = []
-    for member in members:
-        (member_id, client_id, client_host, member_metadata, member_assignment) = member
-        member_info = {}
-        member_info['member_id'] = member_id
-        member_info['client_id'] = client_id
-        member_info['client_host'] = client_host
-        member_topics_assignment = []
-        for (topic, partitions) in MemberAssignment.decode(member_assignment).assignment:
-            member_topics_assignment.append({'topic': topic, 'partitions': partitions})
-        member_info['assignment'] = member_topics_assignment
-        res.append(member_info)
-    return res
-
-
-# Fixtures
-
-@pytest.fixture(scope="module")
-def kafka_cluster():
-    try:
-        global kafka_id
-        cluster.start()
-        kafka_id = instance.cluster.kafka_docker_id
-        print(("kafka_id is {}".format(kafka_id)))
-        yield cluster
-
-    finally:
-        cluster.shutdown()
-
-
-@pytest.fixture(autouse=True)
-def kafka_setup_teardown():
-    instance.query('DROP DATABASE IF EXISTS test; CREATE DATABASE test;')
-    wait_kafka_is_available()
-    # print("kafka is available - running test")
-    yield  # run test
-
-
-# Tests
-
-@pytest.mark.timeout(180)
-def test_kafka_settings_old_syntax(kafka_cluster):
-    assert TSV(instance.query("SELECT * FROM system.macros WHERE macro like 'kafka%' ORDER BY macro",
-                              ignore_error=True)) == TSV('''kafka_broker	kafka1
-kafka_client_id	instance
-kafka_format_json_each_row	JSONEachRow
-kafka_group_name_new	new
-kafka_group_name_old	old
-kafka_topic_new	new
-kafka_topic_old	old
-''')
-
-    instance.query('''
-        CREATE TABLE test.kafka (key UInt64, value UInt64)
-            ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_old}', '{kafka_group_name_old}', '{kafka_format_json_each_row}', '\\n');
-        ''')
-
-    # Don't insert malformed messages since old settings syntax
-    # doesn't support skipping of broken messages.
-    messages = []
-    for i in range(50):
-        messages.append(json.dumps({'key': i, 'value': i}))
-    kafka_produce('old', messages)
-
-    result = ''
-    while True:
-        result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
-        if kafka_check_result(result):
-            break
-
-    kafka_check_result(result, True)
-
-    members = describe_consumer_group('old')
-    assert members[0]['client_id'] == 'ClickHouse-instance-test-kafka'
-    # text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:9092 --describe --members --group old --verbose"))
-
-
-@pytest.mark.timeout(180)
-def test_kafka_settings_new_syntax(kafka_cluster):
-    instance.query('''
-        CREATE TABLE test.kafka (key UInt64, value UInt64)
-            ENGINE = Kafka
-            SETTINGS kafka_broker_list = '{kafka_broker}:19092',
-                     kafka_topic_list = '{kafka_topic_new}',
-                     kafka_group_name = '{kafka_group_name_new}',
-                     kafka_format = '{kafka_format_json_each_row}',
-                     kafka_row_delimiter = '\\n',
-                     kafka_client_id = '{kafka_client_id} test 1234',
-                     kafka_skip_broken_messages = 1;
-        ''')
-
-    messages = []
-    for i in range(25):
-        messages.append(json.dumps({'key': i, 'value': i}))
-    kafka_produce('new', messages)
-
-    # Insert couple of malformed messages.
-    kafka_produce('new', ['}{very_broken_message,'])
-    kafka_produce('new', ['}another{very_broken_message,'])
-
-    messages = []
-    for i in range(25, 50):
-        messages.append(json.dumps({'key': i, 'value': i}))
-    kafka_produce('new', messages)
-
-    result = ''
-    while True:
-        result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
-        if kafka_check_result(result):
-            break
-
-    kafka_check_result(result, True)
-
-    members = describe_consumer_group('new')
-    assert members[0]['client_id'] == 'instance test 1234'
-
-
 @pytest.mark.timeout(180)
 def test_kafka_issue11308(kafka_cluster):
     # Check that matview does respect Kafka SETTINGS
@@ -796,6 +782,12 @@ def test_kafka_issue4116(kafka_cluster):
 
 @pytest.mark.timeout(180)
 def test_kafka_consumer_hang(kafka_cluster):
+    admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092")
+
+    topic_list = []
+    topic_list.append(NewTopic(name="consumer_hang", num_partitions=8, replication_factor=1))
+    admin_client.create_topics(new_topics=topic_list, validate_only=False)
+
     instance.query('''
         DROP TABLE IF EXISTS test.kafka;
         DROP TABLE IF EXISTS test.view;
@@ -807,20 +799,18 @@ def test_kafka_consumer_hang(kafka_cluster):
                      kafka_topic_list = 'consumer_hang',
                      kafka_group_name = 'consumer_hang',
                      kafka_format = 'JSONEachRow',
-                     kafka_num_consumers = 8,
-                     kafka_row_delimiter = '\\n';
+                     kafka_num_consumers = 8;
         CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = Memory();
         CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT * FROM test.kafka;
         ''')
 
-    time.sleep(10)
-    instance.query('SELECT * FROM test.view')
+    instance.wait_for_log_line('kafka.*Stalled', repetitions=20)
 
     # This should trigger heartbeat fail,
     # which will trigger REBALANCE_IN_PROGRESS,
     # and which can lead to consumer hang.
     kafka_cluster.pause_container('kafka1')
-    time.sleep(0.5)
+    instance.wait_for_log_line('heartbeat error')
     kafka_cluster.unpause_container('kafka1')
 
     # print("Attempt to drop")
@@ -844,6 +834,12 @@ def test_kafka_consumer_hang(kafka_cluster):
 
 @pytest.mark.timeout(180)
 def test_kafka_consumer_hang2(kafka_cluster):
+    admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092")
+
+    topic_list = []
+    topic_list.append(NewTopic(name="consumer_hang2", num_partitions=1, replication_factor=1))
+    admin_client.create_topics(new_topics=topic_list, validate_only=False)
+
     instance.query('''
         DROP TABLE IF EXISTS test.kafka;
 
@@ -884,23 +880,22 @@ def test_kafka_consumer_hang2(kafka_cluster):
     assert int(instance.query("select count() from system.processes where position(lower(query),'dr'||'op')>0")) == 0
 
 
-@pytest.mark.timeout(180)
+@pytest.mark.timeout(120)
 def test_kafka_csv_with_delimiter(kafka_cluster):
+    messages = []
+    for i in range(50):
+        messages.append('{i}, {i}'.format(i=i))
+    kafka_produce('csv', messages)
+
     instance.query('''
         CREATE TABLE test.kafka (key UInt64, value UInt64)
             ENGINE = Kafka
             SETTINGS kafka_broker_list = 'kafka1:19092',
                      kafka_topic_list = 'csv',
                      kafka_group_name = 'csv',
-                     kafka_format = 'CSV',
-                     kafka_row_delimiter = '\\n';
+                     kafka_format = 'CSV';
         ''')
 
-    messages = []
-    for i in range(50):
-        messages.append('{i}, {i}'.format(i=i))
-    kafka_produce('csv', messages)
-
     result = ''
     while True:
         result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
@@ -910,23 +905,22 @@ def test_kafka_csv_with_delimiter(kafka_cluster):
     kafka_check_result(result, True)
 
 
-@pytest.mark.timeout(180)
+@pytest.mark.timeout(120)
 def test_kafka_tsv_with_delimiter(kafka_cluster):
+    messages = []
+    for i in range(50):
+        messages.append('{i}\t{i}'.format(i=i))
+    kafka_produce('tsv', messages)
+
     instance.query('''
         CREATE TABLE test.kafka (key UInt64, value UInt64)
             ENGINE = Kafka
             SETTINGS kafka_broker_list = 'kafka1:19092',
                      kafka_topic_list = 'tsv',
                      kafka_group_name = 'tsv',
-                     kafka_format = 'TSV',
-                     kafka_row_delimiter = '\\n';
+                     kafka_format = 'TSV';
         ''')
 
-    messages = []
-    for i in range(50):
-        messages.append('{i}\t{i}'.format(i=i))
-    kafka_produce('tsv', messages)
-
     result = ''
     while True:
         result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
@@ -936,8 +930,13 @@ def test_kafka_tsv_with_delimiter(kafka_cluster):
     kafka_check_result(result, True)
 
 
-@pytest.mark.timeout(180)
+@pytest.mark.timeout(120)
 def test_kafka_select_empty(kafka_cluster):
+    admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092")
+    topic_list = []
+    topic_list.append(NewTopic(name="empty", num_partitions=1, replication_factor=1))
+    admin_client.create_topics(new_topics=topic_list, validate_only=False)
+
     instance.query('''
         CREATE TABLE test.kafka (key UInt64)
             ENGINE = Kafka
@@ -953,15 +952,6 @@ def test_kafka_select_empty(kafka_cluster):
 
 @pytest.mark.timeout(180)
 def test_kafka_json_without_delimiter(kafka_cluster):
-    instance.query('''
-        CREATE TABLE test.kafka (key UInt64, value UInt64)
-            ENGINE = Kafka
-            SETTINGS kafka_broker_list = 'kafka1:19092',
-                     kafka_topic_list = 'json',
-                     kafka_group_name = 'json',
-                     kafka_format = 'JSONEachRow';
-        ''')
-
     messages = ''
     for i in range(25):
         messages += json.dumps({'key': i, 'value': i}) + '\n'
@@ -972,6 +962,15 @@ def test_kafka_json_without_delimiter(kafka_cluster):
         messages += json.dumps({'key': i, 'value': i}) + '\n'
     kafka_produce('json', [messages])
 
+    instance.query('''
+        CREATE TABLE test.kafka (key UInt64, value UInt64)
+            ENGINE = Kafka
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'json',
+                     kafka_group_name = 'json',
+                     kafka_format = 'JSONEachRow';
+        ''')
+
     result = ''
     while True:
         result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
@@ -983,6 +982,10 @@ def test_kafka_json_without_delimiter(kafka_cluster):
 
 @pytest.mark.timeout(180)
 def test_kafka_protobuf(kafka_cluster):
+    kafka_produce_protobuf_messages('pb', 0, 20)
+    kafka_produce_protobuf_messages('pb', 20, 1)
+    kafka_produce_protobuf_messages('pb', 21, 29)
+
     instance.query('''
         CREATE TABLE test.kafka (key UInt64, value String)
             ENGINE = Kafka
@@ -993,10 +996,6 @@ def test_kafka_protobuf(kafka_cluster):
                      kafka_schema = 'kafka.proto:KeyValuePair';
         ''')
 
-    kafka_produce_protobuf_messages('pb', 0, 20)
-    kafka_produce_protobuf_messages('pb', 20, 1)
-    kafka_produce_protobuf_messages('pb', 21, 29)
-
     result = ''
     while True:
         result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
@@ -1009,6 +1008,9 @@ def test_kafka_protobuf(kafka_cluster):
 @pytest.mark.timeout(180)
 def test_kafka_string_field_on_first_position_in_protobuf(kafka_cluster):
 # https://github.com/ClickHouse/ClickHouse/issues/12615
+    kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 0, 20)
+    kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 20, 1)
+    kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 21, 29)
 
     instance.query('''
 CREATE TABLE test.kafka (
@@ -1021,14 +1023,8 @@ SETTINGS
     kafka_group_name = 'string_field_on_first_position_in_protobuf',
     kafka_format = 'Protobuf',
     kafka_schema = 'social:User';
-
-    SELECT * FROM test.kafka;
         ''')
 
-    kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 0, 20)
-    kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 20, 1)
-    kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 21, 29)
-
     result = instance.query('SELECT * FROM test.kafka', ignore_error=True)
     expected = '''\
 John Doe 0	1000000
@@ -1170,7 +1166,7 @@ def test_kafka_materialized_view(kafka_cluster):
     kafka_check_result(result, True)
 
 @pytest.mark.timeout(180)
-def test_librdkafka_snappy_regression(kafka_cluster):
+def test_librdkafka_compression(kafka_cluster):
     """
     Regression for UB in snappy-c (that is used in librdkafka),
     backport pr is [1].
@@ -1180,55 +1176,63 @@ def test_librdkafka_snappy_regression(kafka_cluster):
     Example of corruption:
 
         2020.12.10 09:59:56.831507 [ 20 ] {} <Error> void DB::StorageKafka::threadFunc(size_t): Code: 27, e.displayText() = DB::Exception: Cannot parse input: expected '"' before: 'foo"}': (while reading the value of key value): (at row 1)
-, Stack trace (when copying this message, always include the lines below):
+
+    To trigger this regression there should duplicated messages
+
+    Orignal reproducer is:
+    $ gcc --version |& fgrep gcc
+    gcc (GCC) 10.2.0
+    $ yes foobarbaz | fold -w 80 | head -n10 >| in-…
+    $ make clean && make CFLAGS='-Wall -g -O2 -ftree-loop-vectorize -DNDEBUG=1 -DSG=1 -fPIC'
+    $ ./verify in
+    final comparision of in failed at 20 of 100
+
     """
 
-    # create topic with snappy compression
-    admin_client = admin.AdminClient({'bootstrap.servers': 'localhost:9092'})
-    topic_snappy = admin.NewTopic(topic='snappy_regression', num_partitions=1, replication_factor=1, config={
-        'compression.type': 'snappy',
-    })
-    admin_client.create_topics(new_topics=[topic_snappy], validate_only=False)
-
-    instance.query('''
-        CREATE TABLE test.kafka (key UInt64, value String)
-            ENGINE = Kafka
-            SETTINGS kafka_broker_list = 'kafka1:19092',
-                     kafka_topic_list = 'snappy_regression',
-                     kafka_group_name = 'ch_snappy_regression',
-                     kafka_format = 'JSONEachRow';
-    ''')
+    supported_compression_types = ['gzip', 'snappy', 'lz4', 'zstd', 'uncompressed']
 
     messages = []
     expected = []
-    # To trigger this regression there should duplicated messages
-    # Orignal reproducer is:
-    #
-    #     $ gcc --version |& fgrep gcc
-    #     gcc (GCC) 10.2.0
-    #     $ yes foobarbaz | fold -w 80 | head -n10 >| in-…
-    #     $ make clean && make CFLAGS='-Wall -g -O2 -ftree-loop-vectorize -DNDEBUG=1 -DSG=1 -fPIC'
-    #     $ ./verify in
-    #     final comparision of in failed at 20 of 100
+
     value = 'foobarbaz'*10
     number_of_messages = 50
     for i in range(number_of_messages):
         messages.append(json.dumps({'key': i, 'value': value}))
         expected.append(f'{i}\t{value}')
-    kafka_produce('snappy_regression', messages)
 
     expected = '\n'.join(expected)
 
-    while True:
-        result = instance.query('SELECT * FROM test.kafka')
-        rows = len(result.strip('\n').split('\n'))
-        print(rows)
-        if rows == number_of_messages:
-            break
+    for compression_type in supported_compression_types:
+        print(('Check compression {}'.format(compression_type)))
 
-    assert TSV(result) == TSV(expected)
+        topic_name = 'test_librdkafka_compression_{}'.format(compression_type)
+        admin_client = admin.AdminClient({'bootstrap.servers': 'localhost:9092'})
+        topic = admin.NewTopic(topic=topic_name, num_partitions=1, replication_factor=1, config={
+            'compression.type': compression_type,
+        })
+        admin_client.create_topics(new_topics=[topic], validate_only=False)
 
-    instance.query('DROP TABLE test.kafka')
+        instance.query('''
+            CREATE TABLE test.kafka (key UInt64, value String)
+                ENGINE = Kafka
+                SETTINGS kafka_broker_list = 'kafka1:19092',
+                        kafka_topic_list = '{topic_name}',
+                        kafka_group_name = '{topic_name}_group',
+                        kafka_format = 'JSONEachRow',
+                        kafka_flush_interval_ms = 1000;
+            CREATE MATERIALIZED VIEW test.consumer Engine=Log AS
+                SELECT * FROM test.kafka;
+        '''.format(topic_name=topic_name) )
+
+        kafka_produce(topic_name, messages)
+
+        instance.wait_for_log_line("Committed offset {}".format(number_of_messages))
+
+        result = instance.query('SELECT * FROM test.consumer')
+        assert TSV(result) == TSV(expected)
+
+        instance.query('DROP TABLE test.kafka SYNC')
+        instance.query('DROP TABLE test.consumer SYNC')
 
 @pytest.mark.timeout(180)
 def test_kafka_materialized_view_with_subquery(kafka_cluster):
@@ -1577,9 +1581,6 @@ def test_kafka_commit_on_block_write(kafka_cluster):
         DROP TABLE test.kafka;
     ''')
 
-    while int(instance.query("SELECT count() FROM system.tables WHERE database='test' AND name='kafka'")) == 1:
-        time.sleep(1)
-
     instance.query('''
         CREATE TABLE test.kafka (key UInt64, value UInt64)
             ENGINE = Kafka
@@ -1874,7 +1875,8 @@ def test_kafka_lot_of_partitions_partial_commit_of_bulk(kafka_cluster):
                      kafka_topic_list = 'topic_with_multiple_partitions2',
                      kafka_group_name = 'topic_with_multiple_partitions2',
                      kafka_format = 'JSONEachRow',
-                     kafka_max_block_size = 211;
+                     kafka_max_block_size = 211,
+                     kafka_flush_interval_ms = 500;
         CREATE TABLE test.view (key UInt64, value UInt64)
             ENGINE = MergeTree()
             ORDER BY key;
@@ -1892,7 +1894,7 @@ def test_kafka_lot_of_partitions_partial_commit_of_bulk(kafka_cluster):
         messages.append("\n".join(rows))
     kafka_produce('topic_with_multiple_partitions2', messages)
 
-    time.sleep(30)
+    instance.wait_for_log_line('kafka.*Stalled', repetitions=5)
 
     result = instance.query('SELECT count(), uniqExact(key), max(key) FROM test.view')
     print(result)
@@ -1961,7 +1963,8 @@ def test_kafka_rebalance(kafka_cluster):
                         kafka_topic_list = 'topic_with_multiple_partitions',
                         kafka_group_name = 'rebalance_test_group',
                         kafka_format = 'JSONEachRow',
-                        kafka_max_block_size = 33;
+                        kafka_max_block_size = 33,
+                        kafka_flush_interval_ms = 500;
             CREATE MATERIALIZED VIEW test.{0}_mv TO test.destination AS
                 SELECT
                 key,
@@ -1975,21 +1978,15 @@ def test_kafka_rebalance(kafka_cluster):
             FROM test.{0};
         '''.format(table_name))
         # kafka_cluster.open_bash_shell('instance')
-        while int(
-                instance.query("SELECT count() FROM test.destination WHERE _consumed_by='{}'".format(table_name))) == 0:
-            print(("Waiting for test.kafka_consumer{} to start consume".format(consumer_index)))
-            time.sleep(1)
+        # Waiting for test.kafka_consumerX to start consume ...
+        instance.wait_for_log_line('kafka_consumer{}.*Polled offset [0-9]+'.format(consumer_index))
 
     cancel.set()
 
     # I leave last one working by intent (to finish consuming after all rebalances)
     for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS - 1):
         print(("Dropping test.kafka_consumer{}".format(consumer_index)))
-        instance.query('DROP TABLE IF EXISTS test.kafka_consumer{}'.format(consumer_index))
-        while int(instance.query(
-                "SELECT count() FROM system.tables WHERE database='test' AND name='kafka_consumer{}'".format(
-                    consumer_index))) == 1:
-            time.sleep(1)
+        instance.query('DROP TABLE IF EXISTS test.kafka_consumer{} SYNC'.format(consumer_index))
 
     # print(instance.query('SELECT count(), uniqExact(key), max(key) + 1 FROM test.destination'))
     # kafka_cluster.open_bash_shell('instance')
@@ -2042,9 +2039,9 @@ def test_kafka_rebalance(kafka_cluster):
     assert result == 1, 'Messages from kafka get duplicated!'
 
 
-@pytest.mark.timeout(1200)
+@pytest.mark.timeout(120)
 def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster):
-    messages = [json.dumps({'key': j + 1, 'value': 'x' * 300}) for j in range(1)]
+    messages = [json.dumps({'key': j + 1, 'value': 'x' * 300}) for j in range(22)]
     kafka_produce('no_holes_when_write_suffix_failed', messages)
 
     instance.query('''
@@ -2060,39 +2057,28 @@ def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster):
                      kafka_max_block_size = 20,
                      kafka_flush_interval_ms = 2000;
 
-        SELECT * FROM test.kafka LIMIT 1; /* do subscription & assignment in advance (it can take different time, test rely on timing, so can flap otherwise) */
+        CREATE TABLE test.view (key UInt64, value String)
+            ENGINE = ReplicatedMergeTree('/clickhouse/kafkatest/tables/no_holes_when_write_suffix_failed', 'node1')
+            ORDER BY key;
     ''')
 
-    messages = [json.dumps({'key': j + 1, 'value': 'x' * 300}) for j in range(22)]
-    kafka_produce('no_holes_when_write_suffix_failed', messages)
-
     # init PartitionManager (it starts container) earlier
     pm = PartitionManager()
 
     instance.query('''
-        CREATE TABLE test.view (key UInt64, value String)
-            ENGINE = ReplicatedMergeTree('/clickhouse/kafkatest/tables/no_holes_when_write_suffix_failed', 'node1')
-            ORDER BY key;
-
         CREATE MATERIALIZED VIEW test.consumer TO test.view AS
             SELECT * FROM test.kafka
-            WHERE NOT sleepEachRow(1);
+            WHERE NOT sleepEachRow(0.25);
     ''')
 
+    instance.wait_for_log_line("Polled batch of 20 messages")
     # the tricky part here is that disconnect should happen after write prefix, but before write suffix
-    # so i use sleepEachRow
-
-    time.sleep(3)
+    # we have 0.25 (sleepEachRow) * 20 ( Rows ) = 5 sec window after "Polled batch of 20 messages"
+    # while materialized view is working to inject zookeeper failure
     pm.drop_instance_zk_connections(instance)
-    time.sleep(20)
+    instance.wait_for_log_line("Error.*(session has been expired|Connection loss).*while write prefix to view")
     pm.heal_all()
-
-    # connection restored and it will take a while until next block will be flushed
-    # it takes years on CI :\
-    time.sleep(45)
-
-    # as it's a bit tricky to hit the proper moment - let's check in logs if we did it correctly
-    assert instance.contains_in_log("ZooKeeper session has been expired.: while write prefix to view")
+    instance.wait_for_log_line("Committed offset 22")
 
     result = instance.query('SELECT count(), uniqExact(key), max(key) FROM test.view')
     print(result)
@@ -2146,7 +2132,7 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster):
     kafka_produce('commits_of_unprocessed_messages_on_drop', messages)
 
     instance.query('''
-        DROP TABLE IF EXISTS test.destination;
+        DROP TABLE IF EXISTS test.destination SYNC;
         CREATE TABLE test.destination (
             key UInt64,
             value UInt64,
@@ -2166,7 +2152,8 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster):
                     kafka_topic_list = 'commits_of_unprocessed_messages_on_drop',
                     kafka_group_name = 'commits_of_unprocessed_messages_on_drop_test_group',
                     kafka_format = 'JSONEachRow',
-                    kafka_max_block_size = 1000;
+                    kafka_max_block_size = 1000,
+                    kafka_flush_interval_ms = 1000;
 
         CREATE MATERIALIZED VIEW test.kafka_consumer TO test.destination AS
             SELECT
@@ -2180,9 +2167,8 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster):
         FROM test.kafka;
     ''')
 
-    while int(instance.query("SELECT count() FROM test.destination")) == 0:
-        print("Waiting for test.kafka_consumer to start consume")
-        time.sleep(1)
+    # Waiting for test.kafka_consumer to start consume
+    instance.wait_for_log_line('Committed offset [0-9]+')
 
     cancel = threading.Event()
 
@@ -2195,14 +2181,14 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster):
                 messages.append(json.dumps({'key': i[0], 'value': i[0]}))
                 i[0] += 1
             kafka_produce('commits_of_unprocessed_messages_on_drop', messages)
-            time.sleep(1)
+            time.sleep(0.5)
 
     kafka_thread = threading.Thread(target=produce)
     kafka_thread.start()
-    time.sleep(12)
+    time.sleep(4)
 
     instance.query('''
-        DROP TABLE test.kafka;
+        DROP TABLE test.kafka SYNC;
     ''')
 
     instance.query('''
@@ -2212,11 +2198,12 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster):
                     kafka_topic_list = 'commits_of_unprocessed_messages_on_drop',
                     kafka_group_name = 'commits_of_unprocessed_messages_on_drop_test_group',
                     kafka_format = 'JSONEachRow',
-                    kafka_max_block_size = 10000;
+                    kafka_max_block_size = 10000,
+                    kafka_flush_interval_ms = 1000;
     ''')
 
     cancel.set()
-    time.sleep(15)
+    instance.wait_for_log_line('kafka.*Stalled', repetitions=5)
 
     # kafka_cluster.open_bash_shell('instance')
     # SELECT key, _timestamp, _offset FROM test.destination where runningDifference(key) <> 1 ORDER BY key;
@@ -2225,8 +2212,8 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster):
     print(result)
 
     instance.query('''
-        DROP TABLE test.kafka_consumer;
-        DROP TABLE test.destination;
+        DROP TABLE test.kafka_consumer SYNC;
+        DROP TABLE test.destination SYNC;
     ''')
 
     kafka_thread.join()
@@ -2245,7 +2232,8 @@ def test_bad_reschedule(kafka_cluster):
                     kafka_topic_list = 'test_bad_reschedule',
                     kafka_group_name = 'test_bad_reschedule',
                     kafka_format = 'JSONEachRow',
-                    kafka_max_block_size = 1000;
+                    kafka_max_block_size = 1000,
+                    kafka_flush_interval_ms = 1000;
 
         CREATE MATERIALIZED VIEW test.destination Engine=Log AS
         SELECT
@@ -2260,21 +2248,19 @@ def test_bad_reschedule(kafka_cluster):
         FROM test.kafka;
     ''')
 
-    while int(instance.query("SELECT count() FROM test.destination")) < 20000:
-        print("Waiting for consume")
-        time.sleep(1)
+    instance.wait_for_log_line("Committed offset 20000")
 
     assert int(instance.query("SELECT max(consume_ts) - min(consume_ts) FROM test.destination")) < 8
 
 
 @pytest.mark.timeout(300)
 def test_kafka_duplicates_when_commit_failed(kafka_cluster):
-    messages = [json.dumps({'key': j + 1, 'value': 'x' * 300}) for j in range(1)]
+    messages = [json.dumps({'key': j + 1, 'value': 'x' * 300}) for j in range(22)]
     kafka_produce('duplicates_when_commit_failed', messages)
 
     instance.query('''
-        DROP TABLE IF EXISTS test.view;
-        DROP TABLE IF EXISTS test.consumer;
+        DROP TABLE IF EXISTS test.view SYNC;
+        DROP TABLE IF EXISTS test.consumer SYNC;
 
         CREATE TABLE test.kafka (key UInt64, value String)
             ENGINE = Kafka
@@ -2285,51 +2271,42 @@ def test_kafka_duplicates_when_commit_failed(kafka_cluster):
                      kafka_max_block_size = 20,
                      kafka_flush_interval_ms = 1000;
 
-        SELECT * FROM test.kafka LIMIT 1; /* do subscription & assignment in advance (it can take different time, test rely on timing, so can flap otherwise) */
-    ''')
-
-    messages = [json.dumps({'key': j + 1, 'value': 'x' * 300}) for j in range(22)]
-    kafka_produce('duplicates_when_commit_failed', messages)
-
-    instance.query('''
         CREATE TABLE test.view (key UInt64, value String)
             ENGINE = MergeTree()
             ORDER BY key;
-
-        CREATE MATERIALIZED VIEW test.consumer TO test.view AS
-            SELECT * FROM test.kafka
-            WHERE NOT sleepEachRow(0.5);
     ''')
 
-    # print time.strftime("%m/%d/%Y %H:%M:%S")
-    time.sleep(3) #  MV will work for 10 sec, after that commit should happen, we want to pause before
+    instance.query('''
+        CREATE MATERIALIZED VIEW test.consumer TO test.view AS
+            SELECT * FROM test.kafka
+            WHERE NOT sleepEachRow(0.25);
+    ''')
 
-    # print time.strftime("%m/%d/%Y %H:%M:%S")
+    instance.wait_for_log_line("Polled batch of 20 messages")
+    # the tricky part here is that disconnect should happen after write prefix, but before we do commit
+    # we have 0.25 (sleepEachRow) * 20 ( Rows ) = 5 sec window after "Polled batch of 20 messages"
+    # while materialized view is working to inject zookeeper failure
     kafka_cluster.pause_container('kafka1')
-    # that timeout it VERY important, and picked after lot of experiments
-    # when too low (<30sec) librdkafka will not report any timeout (alternative is to decrease the default session timeouts for librdkafka)
-    # when too high (>50sec) broker will decide to remove us from the consumer group, and will start answering "Broker: Unknown member"
-    time.sleep(42)
 
-    # print time.strftime("%m/%d/%Y %H:%M:%S")
+    # if we restore the connection too fast (<30sec) librdkafka will not report any timeout
+    # (alternative is to decrease the default session timeouts for librdkafka)
+    #
+    # when the delay is too long (>50sec) broker will decide to remove us from the consumer group,
+    # and will start answering "Broker: Unknown member"
+    instance.wait_for_log_line("Exception during commit attempt: Local: Waiting for coordinator", timeout=45)
+    instance.wait_for_log_line("All commit attempts failed", look_behind_lines=500)
+
     kafka_cluster.unpause_container('kafka1')
 
     # kafka_cluster.open_bash_shell('instance')
-
-    # connection restored and it will take a while until next block will be flushed
-    # it takes years on CI :\
-    time.sleep(30)
-
-    # as it's a bit tricky to hit the proper moment - let's check in logs if we did it correctly
-    assert instance.contains_in_log("Local: Waiting for coordinator")
-    assert instance.contains_in_log("All commit attempts failed")
+    instance.wait_for_log_line("Committed offset 22")
 
     result = instance.query('SELECT count(), uniqExact(key), max(key) FROM test.view')
     print(result)
 
     instance.query('''
-        DROP TABLE test.consumer;
-        DROP TABLE test.view;
+        DROP TABLE test.consumer SYNC;
+        DROP TABLE test.view SYNC;
     ''')
 
     # After https://github.com/edenhill/librdkafka/issues/2631
@@ -2392,9 +2369,8 @@ def test_premature_flush_on_eof(kafka_cluster):
     # all subscriptions/assignments done during select, so it start sending data to test.destination
     # immediately after creation of MV
 
-    time.sleep(1.5) # that sleep is needed to ensure that first poll finished, and at least one 'empty' polls happened.
-                  # Empty poll before the fix were leading to premature flush.
-                  # TODO: wait for messages in log: "Polled batch of 1 messages", followed by "Stalled"
+    instance.wait_for_log_line("Polled batch of 1 messages")
+    instance.wait_for_log_line("Stalled")
 
     # produce more messages after delay
     kafka_produce('premature_flush_on_eof', messages)
@@ -2402,7 +2378,7 @@ def test_premature_flush_on_eof(kafka_cluster):
     # data was not flushed yet (it will be flushed 7.5 sec after creating MV)
     assert int(instance.query("SELECT count() FROM test.destination")) == 0
 
-    time.sleep(9) # TODO: wait for messages in log: "Committed offset ..."
+    instance.wait_for_log_line("Committed offset 2")
 
     # it should be single part, i.e. single insert
     result = instance.query('SELECT _part, count() FROM test.destination group by _part')
@@ -2414,10 +2390,10 @@ def test_premature_flush_on_eof(kafka_cluster):
     ''')
 
 
-@pytest.mark.timeout(180)
+@pytest.mark.timeout(120)
 def test_kafka_unavailable(kafka_cluster):
-    messages = [json.dumps({'key': j + 1, 'value': j + 1}) for j in range(20000)]
-    kafka_produce('test_bad_reschedule', messages)
+    messages = [json.dumps({'key': j + 1, 'value': j + 1}) for j in range(2000)]
+    kafka_produce('test_kafka_unavailable', messages)
 
     kafka_cluster.pause_container('kafka1')
 
@@ -2425,10 +2401,11 @@ def test_kafka_unavailable(kafka_cluster):
         CREATE TABLE test.kafka (key UInt64, value UInt64)
             ENGINE = Kafka
             SETTINGS kafka_broker_list = 'kafka1:19092',
-                    kafka_topic_list = 'test_bad_reschedule',
-                    kafka_group_name = 'test_bad_reschedule',
+                    kafka_topic_list = 'test_kafka_unavailable',
+                    kafka_group_name = 'test_kafka_unavailable',
                     kafka_format = 'JSONEachRow',
-                    kafka_max_block_size = 1000;
+                    kafka_max_block_size = 1000,
+                    kafka_flush_interval_ms = 1000;
 
         CREATE MATERIALIZED VIEW test.destination Engine=Log AS
         SELECT
@@ -2444,19 +2421,22 @@ def test_kafka_unavailable(kafka_cluster):
     ''')
 
     instance.query("SELECT * FROM test.kafka")
-    instance.query("SELECT count() FROM test.destination")
 
-    # enough to trigger issue
-    time.sleep(30)
+    instance.wait_for_log_line('brokers are down')
+    instance.wait_for_log_line('stalled. Reschedule', repetitions=2)
+
     kafka_cluster.unpause_container('kafka1')
 
-    while int(instance.query("SELECT count() FROM test.destination")) < 20000:
-        print("Waiting for consume")
-        time.sleep(1)
-
+    instance.wait_for_log_line("Committed offset 2000")
+    assert int(instance.query("SELECT count() FROM test.destination")) == 2000
+    time.sleep(5) # needed to give time for kafka client in python test to recovery
 
 @pytest.mark.timeout(180)
 def test_kafka_issue14202(kafka_cluster):
+    """
+    INSERT INTO Kafka Engine from an empty SELECT sub query was leading to failure
+    """
+
     instance.query('''
         CREATE TABLE test.empty_table (
             dt Date,
@@ -2474,8 +2454,6 @@ def test_kafka_issue14202(kafka_cluster):
                      kafka_format = 'JSONEachRow';
         ''')
 
-    time.sleep(3)
-
     instance.query(
         'INSERT INTO test.kafka_q SELECT t, some_string  FROM ( SELECT dt AS t, some_string FROM test.empty_table )')
     # check instance is alive
diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py
index 4f567c19f2b..cee495438a2 100644
--- a/tests/integration/test_storage_postgresql/test.py
+++ b/tests/integration/test_storage_postgresql/test.py
@@ -63,13 +63,13 @@ def test_postgres_conversions(started_cluster):
     cursor.execute(
         '''CREATE TABLE IF NOT EXISTS test_types (
         a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial,
-        h timestamp, i date, j numeric(5, 5), k decimal(5, 5))''')
+        h timestamp, i date, j decimal(5, 3), k numeric)''')
     node1.query('''
         INSERT INTO TABLE FUNCTION postgresql('postgres1:5432', 'clickhouse', 'test_types', 'postgres', 'mysecretpassword') VALUES
-        (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12', '2000-05-12', 0.2, 0.2)''')
+        (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12', '2000-05-12', 22.222, 22.222)''')
     result = node1.query('''
-        SELECT * FROM postgresql('postgres1:5432', 'clickhouse', 'test_types', 'postgres', 'mysecretpassword')''')
-    assert(result == '-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\t2000-05-12\t0.20000\t0.20000\n')
+        SELECT a, b, c, d, e, f, g, h, i, j, toDecimal128(k, 3) FROM postgresql('postgres1:5432', 'clickhouse', 'test_types', 'postgres', 'mysecretpassword')''')
+    assert(result == '-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\t2000-05-12\t22.222\t22.222\n')
 
     cursor.execute(
         '''CREATE TABLE IF NOT EXISTS test_array_dimensions
diff --git a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
index 1a441909998..2cf9f8022d1 100644
--- a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
+++ b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
@@ -2,11 +2,13 @@
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
 
         <coordination_settings>
             <operation_timeout_ms>5000</operation_timeout_ms>
             <session_timeout_ms>10000</session_timeout_ms>
             <raft_logs_level>trace</raft_logs_level>
+            <force_sync>false</force_sync>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_testkeeper_back_to_back/configs/use_test_keeper.xml b/tests/integration/test_testkeeper_back_to_back/configs/use_test_keeper.xml
new file mode 100644
index 00000000000..12dc7fd9447
--- /dev/null
+++ b/tests/integration/test_testkeeper_back_to_back/configs/use_test_keeper.xml
@@ -0,0 +1,8 @@
+<yandex>
+    <zookeeper>
+            <node index="1">
+            <host>node1</host>
+            <port>9181</port>
+        </node>
+    </zookeeper>
+</yandex>
diff --git a/tests/integration/test_testkeeper_back_to_back/test.py b/tests/integration/test_testkeeper_back_to_back/test.py
index 8ec54f1a883..dd4e1f98cfd 100644
--- a/tests/integration/test_testkeeper_back_to_back/test.py
+++ b/tests/integration/test_testkeeper_back_to_back/test.py
@@ -8,32 +8,23 @@ from multiprocessing.dummy import Pool
 
 cluster = ClickHouseCluster(__file__)
 node = cluster.add_instance('node', main_configs=['configs/enable_test_keeper.xml', 'configs/logs_conf.xml'], with_zookeeper=True)
-from kazoo.client import KazooClient, KazooState
-
-_genuine_zk_instance = None
-_fake_zk_instance = None
+from kazoo.client import KazooClient, KazooState, KeeperState
 
 def get_genuine_zk():
-    global _genuine_zk_instance
-    if not _genuine_zk_instance:
-        print("Zoo1", cluster.get_instance_ip("zoo1"))
-        _genuine_zk_instance = cluster.get_kazoo_client('zoo1')
-    return _genuine_zk_instance
-
+    print("Zoo1", cluster.get_instance_ip("zoo1"))
+    return cluster.get_kazoo_client('zoo1')
 
 def get_fake_zk():
-    global _fake_zk_instance
-    if not _fake_zk_instance:
-        print("node", cluster.get_instance_ip("node"))
-        _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip("node") + ":9181", timeout=30.0)
-        def reset_last_zxid_listener(state):
-            print("Fake zk callback called for state", state)
-            global _fake_zk_instance
-            if state != KazooState.CONNECTED:
-                _fake_zk_instance._reset()
+    print("node", cluster.get_instance_ip("node"))
+    _fake_zk_instance =  KazooClient(hosts=cluster.get_instance_ip("node") + ":9181", timeout=30.0)
+    def reset_last_zxid_listener(state):
+        print("Fake zk callback called for state", state)
+        nonlocal _fake_zk_instance
+        if state != KazooState.CONNECTED:
+            _fake_zk_instance._reset()
 
-        _fake_zk_instance.add_listener(reset_last_zxid_listener)
-        _fake_zk_instance.start()
+    _fake_zk_instance.add_listener(reset_last_zxid_listener)
+    _fake_zk_instance.start()
     return _fake_zk_instance
 
 def random_string(length):
@@ -44,6 +35,15 @@ def create_random_path(prefix="", depth=1):
         return prefix
     return create_random_path(os.path.join(prefix, random_string(3)), depth - 1)
 
+def stop_zk(zk):
+    try:
+        if zk:
+            zk.stop()
+            zk.close()
+    except:
+        pass
+
+
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
@@ -53,44 +53,46 @@ def started_cluster():
 
     finally:
         cluster.shutdown()
-        if _genuine_zk_instance:
-            _genuine_zk_instance.stop()
-            _genuine_zk_instance.close()
-        if _fake_zk_instance:
-            _fake_zk_instance.stop()
-            _fake_zk_instance.close()
 
 
 def test_simple_commands(started_cluster):
-    genuine_zk = get_genuine_zk()
-    fake_zk = get_fake_zk()
+    try:
+        genuine_zk = get_genuine_zk()
+        fake_zk = get_fake_zk()
 
-    for zk in [genuine_zk, fake_zk]:
-        zk.create("/test_simple_commands", b"")
-        zk.create("/test_simple_commands/somenode1", b"hello")
-        zk.set("/test_simple_commands/somenode1", b"world")
+        for zk in [genuine_zk, fake_zk]:
+            zk.create("/test_simple_commands", b"")
+            zk.create("/test_simple_commands/somenode1", b"hello")
+            zk.set("/test_simple_commands/somenode1", b"world")
 
-    for zk in [genuine_zk, fake_zk]:
-        assert zk.exists("/test_simple_commands")
-        assert zk.exists("/test_simple_commands/somenode1")
-        print(zk.get("/test_simple_commands/somenode1"))
-        assert zk.get("/test_simple_commands/somenode1")[0] == b"world"
+        for zk in [genuine_zk, fake_zk]:
+            assert zk.exists("/test_simple_commands")
+            assert zk.exists("/test_simple_commands/somenode1")
+            print(zk.get("/test_simple_commands/somenode1"))
+            assert zk.get("/test_simple_commands/somenode1")[0] == b"world"
+    finally:
+        for zk in [genuine_zk, fake_zk]:
+            stop_zk(zk)
 
 
 def test_sequential_nodes(started_cluster):
-    genuine_zk = get_genuine_zk()
-    fake_zk = get_fake_zk()
-    genuine_zk.create("/test_sequential_nodes")
-    fake_zk.create("/test_sequential_nodes")
-    for i in range(1, 11):
-        genuine_zk.create("/test_sequential_nodes/" + ("a" * i) + "-", sequence=True)
-        genuine_zk.create("/test_sequential_nodes/" + ("b" * i))
-        fake_zk.create("/test_sequential_nodes/" + ("a" * i) + "-", sequence=True)
-        fake_zk.create("/test_sequential_nodes/" + ("b" * i))
+    try:
+        genuine_zk = get_genuine_zk()
+        fake_zk = get_fake_zk()
+        genuine_zk.create("/test_sequential_nodes")
+        fake_zk.create("/test_sequential_nodes")
+        for i in range(1, 11):
+            genuine_zk.create("/test_sequential_nodes/" + ("a" * i) + "-", sequence=True)
+            genuine_zk.create("/test_sequential_nodes/" + ("b" * i))
+            fake_zk.create("/test_sequential_nodes/" + ("a" * i) + "-", sequence=True)
+            fake_zk.create("/test_sequential_nodes/" + ("b" * i))
 
-    genuine_childs = list(sorted(genuine_zk.get_children("/test_sequential_nodes")))
-    fake_childs = list(sorted(fake_zk.get_children("/test_sequential_nodes")))
-    assert genuine_childs == fake_childs
+        genuine_childs = list(sorted(genuine_zk.get_children("/test_sequential_nodes")))
+        fake_childs = list(sorted(fake_zk.get_children("/test_sequential_nodes")))
+        assert genuine_childs == fake_childs
+    finally:
+        for zk in [genuine_zk, fake_zk]:
+            stop_zk(zk)
 
 
 def assert_eq_stats(stat1, stat2):
@@ -102,130 +104,141 @@ def assert_eq_stats(stat1, stat2):
     assert stat1.numChildren == stat2.numChildren
 
 def test_stats(started_cluster):
-    genuine_zk = get_genuine_zk()
-    fake_zk = get_fake_zk()
-    genuine_zk.create("/test_stats_nodes")
-    fake_zk.create("/test_stats_nodes")
-    genuine_stats = genuine_zk.exists("/test_stats_nodes")
-    fake_stats = fake_zk.exists("/test_stats_nodes")
-    assert_eq_stats(genuine_stats, fake_stats)
-    for i in range(1, 11):
-        genuine_zk.create("/test_stats_nodes/" + ("a" * i) + "-", sequence=True)
-        genuine_zk.create("/test_stats_nodes/" + ("b" * i))
-        fake_zk.create("/test_stats_nodes/" + ("a" * i) + "-", sequence=True)
-        fake_zk.create("/test_stats_nodes/" + ("b" * i))
+    try:
+        genuine_zk = get_genuine_zk()
+        fake_zk = get_fake_zk()
+        genuine_zk.create("/test_stats_nodes")
+        fake_zk.create("/test_stats_nodes")
+        genuine_stats = genuine_zk.exists("/test_stats_nodes")
+        fake_stats = fake_zk.exists("/test_stats_nodes")
+        assert_eq_stats(genuine_stats, fake_stats)
+        for i in range(1, 11):
+            genuine_zk.create("/test_stats_nodes/" + ("a" * i) + "-", sequence=True)
+            genuine_zk.create("/test_stats_nodes/" + ("b" * i))
+            fake_zk.create("/test_stats_nodes/" + ("a" * i) + "-", sequence=True)
+            fake_zk.create("/test_stats_nodes/" + ("b" * i))
 
-    genuine_stats = genuine_zk.exists("/test_stats_nodes")
-    fake_stats = fake_zk.exists("/test_stats_nodes")
-    assert_eq_stats(genuine_stats, fake_stats)
-    for i in range(1, 11):
-        print("/test_stats_nodes/" + ("a" * i) + "-" + "{:010d}".format((i - 1) * 2))
-        genuine_zk.delete("/test_stats_nodes/" + ("a" * i) + "-" + "{:010d}".format((i - 1) * 2))
-        genuine_zk.delete("/test_stats_nodes/" + ("b" * i))
-        fake_zk.delete("/test_stats_nodes/" + ("a" * i) + "-" + "{:010d}".format((i - 1) * 2))
-        fake_zk.delete("/test_stats_nodes/" + ("b" * i))
+        genuine_stats = genuine_zk.exists("/test_stats_nodes")
+        fake_stats = fake_zk.exists("/test_stats_nodes")
+        assert_eq_stats(genuine_stats, fake_stats)
+        for i in range(1, 11):
+            print("/test_stats_nodes/" + ("a" * i) + "-" + "{:010d}".format((i - 1) * 2))
+            genuine_zk.delete("/test_stats_nodes/" + ("a" * i) + "-" + "{:010d}".format((i - 1) * 2))
+            genuine_zk.delete("/test_stats_nodes/" + ("b" * i))
+            fake_zk.delete("/test_stats_nodes/" + ("a" * i) + "-" + "{:010d}".format((i - 1) * 2))
+            fake_zk.delete("/test_stats_nodes/" + ("b" * i))
 
-    genuine_stats = genuine_zk.exists("/test_stats_nodes")
-    fake_stats = fake_zk.exists("/test_stats_nodes")
-    print(genuine_stats)
-    print(fake_stats)
-    assert_eq_stats(genuine_stats, fake_stats)
-    for i in range(100):
-        genuine_zk.set("/test_stats_nodes", ("q" * i).encode())
-        fake_zk.set("/test_stats_nodes", ("q" * i).encode())
+        genuine_stats = genuine_zk.exists("/test_stats_nodes")
+        fake_stats = fake_zk.exists("/test_stats_nodes")
+        print(genuine_stats)
+        print(fake_stats)
+        assert_eq_stats(genuine_stats, fake_stats)
+        for i in range(100):
+            genuine_zk.set("/test_stats_nodes", ("q" * i).encode())
+            fake_zk.set("/test_stats_nodes", ("q" * i).encode())
 
-    genuine_stats = genuine_zk.exists("/test_stats_nodes")
-    fake_stats = fake_zk.exists("/test_stats_nodes")
-    print(genuine_stats)
-    print(fake_stats)
-    assert_eq_stats(genuine_stats, fake_stats)
+        genuine_stats = genuine_zk.exists("/test_stats_nodes")
+        fake_stats = fake_zk.exists("/test_stats_nodes")
+        print(genuine_stats)
+        print(fake_stats)
+        assert_eq_stats(genuine_stats, fake_stats)
+    finally:
+        for zk in [genuine_zk, fake_zk]:
+            stop_zk(zk)
 
 def test_watchers(started_cluster):
-    genuine_zk = get_genuine_zk()
-    fake_zk = get_fake_zk()
-    genuine_zk.create("/test_data_watches")
-    fake_zk.create("/test_data_watches")
-    genuine_data_watch_data = None
+    try:
+        genuine_zk = get_genuine_zk()
+        fake_zk = get_fake_zk()
+        genuine_zk.create("/test_data_watches")
+        fake_zk.create("/test_data_watches")
+        genuine_data_watch_data = None
 
-    def genuine_callback(event):
-        print("Genuine data watch called")
-        nonlocal genuine_data_watch_data
-        genuine_data_watch_data = event
+        def genuine_callback(event):
+            print("Genuine data watch called")
+            nonlocal genuine_data_watch_data
+            genuine_data_watch_data = event
 
-    fake_data_watch_data = None
-    def fake_callback(event):
-        print("Fake data watch called")
-        nonlocal fake_data_watch_data
-        fake_data_watch_data = event
+        fake_data_watch_data = None
+        def fake_callback(event):
+            print("Fake data watch called")
+            nonlocal fake_data_watch_data
+            fake_data_watch_data = event
 
-    genuine_zk.get("/test_data_watches", watch=genuine_callback)
-    fake_zk.get("/test_data_watches", watch=fake_callback)
+        genuine_zk.get("/test_data_watches", watch=genuine_callback)
+        fake_zk.get("/test_data_watches", watch=fake_callback)
 
-    print("Calling set genuine")
-    genuine_zk.set("/test_data_watches", b"a")
-    print("Calling set fake")
-    fake_zk.set("/test_data_watches", b"a")
-    time.sleep(3)
+        print("Calling set genuine")
+        genuine_zk.set("/test_data_watches", b"a")
+        print("Calling set fake")
+        fake_zk.set("/test_data_watches", b"a")
+        time.sleep(3)
 
-    print("Genuine data", genuine_data_watch_data)
-    print("Fake data", fake_data_watch_data)
-    assert genuine_data_watch_data == fake_data_watch_data
+        print("Genuine data", genuine_data_watch_data)
+        print("Fake data", fake_data_watch_data)
+        assert genuine_data_watch_data == fake_data_watch_data
 
-    genuine_children = None
-    def genuine_child_callback(event):
-        print("Genuine child watch called")
-        nonlocal genuine_children
-        genuine_children = event
+        genuine_children = None
+        def genuine_child_callback(event):
+            print("Genuine child watch called")
+            nonlocal genuine_children
+            genuine_children = event
 
-    fake_children = None
-    def fake_child_callback(event):
-        print("Fake child watch called")
-        nonlocal fake_children
-        fake_children = event
+        fake_children = None
+        def fake_child_callback(event):
+            print("Fake child watch called")
+            nonlocal fake_children
+            fake_children = event
 
-    genuine_zk.get_children("/test_data_watches", watch=genuine_child_callback)
-    fake_zk.get_children("/test_data_watches", watch=fake_child_callback)
+        genuine_zk.get_children("/test_data_watches", watch=genuine_child_callback)
+        fake_zk.get_children("/test_data_watches", watch=fake_child_callback)
 
-    print("Calling genuine child")
-    genuine_zk.create("/test_data_watches/child", b"b")
-    print("Calling fake child")
-    fake_zk.create("/test_data_watches/child", b"b")
+        print("Calling genuine child")
+        genuine_zk.create("/test_data_watches/child", b"b")
+        print("Calling fake child")
+        fake_zk.create("/test_data_watches/child", b"b")
 
-    time.sleep(3)
+        time.sleep(3)
 
-    print("Genuine children", genuine_children)
-    print("Fake children", fake_children)
-    assert genuine_children == fake_children
+        print("Genuine children", genuine_children)
+        print("Fake children", fake_children)
+        assert genuine_children == fake_children
+    finally:
+        for zk in [genuine_zk, fake_zk]:
+            stop_zk(zk)
 
 def test_multitransactions(started_cluster):
-    genuine_zk = get_genuine_zk()
-    fake_zk = get_fake_zk()
-    for zk in [genuine_zk, fake_zk]:
-        zk.create('/test_multitransactions')
-        t = zk.transaction()
-        t.create('/test_multitransactions/freddy')
-        t.create('/test_multitransactions/fred', ephemeral=True)
-        t.create('/test_multitransactions/smith', sequence=True)
-        results = t.commit()
-        assert len(results) == 3
-        assert results[0] == '/test_multitransactions/freddy'
-        assert results[2].startswith('/test_multitransactions/smith0') is True
-
-    from kazoo.exceptions import RolledBackError, NoNodeError
-    for i, zk in enumerate([genuine_zk, fake_zk]):
-        print("Processing ZK", i)
-        t = zk.transaction()
-        t.create('/test_multitransactions/q')
-        t.delete('/test_multitransactions/a')
-        t.create('/test_multitransactions/x')
-        results = t.commit()
-        print("Results", results)
-        assert results[0].__class__ == RolledBackError
-        assert results[1].__class__ == NoNodeError
-        assert zk.exists('/test_multitransactions/q') is None
-        assert zk.exists('/test_multitransactions/a') is None
-        assert zk.exists('/test_multitransactions/x') is None
+    try:
+        genuine_zk = get_genuine_zk()
+        fake_zk = get_fake_zk()
+        for zk in [genuine_zk, fake_zk]:
+            zk.create('/test_multitransactions')
+            t = zk.transaction()
+            t.create('/test_multitransactions/freddy')
+            t.create('/test_multitransactions/fred', ephemeral=True)
+            t.create('/test_multitransactions/smith', sequence=True)
+            results = t.commit()
+            assert len(results) == 3
+            assert results[0] == '/test_multitransactions/freddy'
+            assert results[2].startswith('/test_multitransactions/smith0') is True
 
+        from kazoo.exceptions import RolledBackError, NoNodeError
+        for i, zk in enumerate([genuine_zk, fake_zk]):
+            print("Processing ZK", i)
+            t = zk.transaction()
+            t.create('/test_multitransactions/q')
+            t.delete('/test_multitransactions/a')
+            t.create('/test_multitransactions/x')
+            results = t.commit()
+            print("Results", results)
+            assert results[0].__class__ == RolledBackError
+            assert results[1].__class__ == NoNodeError
+            assert zk.exists('/test_multitransactions/q') is None
+            assert zk.exists('/test_multitransactions/a') is None
+            assert zk.exists('/test_multitransactions/x') is None
+    finally:
+        for zk in [genuine_zk, fake_zk]:
+            stop_zk(zk)
 
 def exists(zk, path):
     result = zk.exists(path)
@@ -278,13 +291,13 @@ class Request(object):
         arg_str = ', '.join([str(k) + "=" + str(v) for k, v in self.arguments.items()])
         return "ZKRequest name {} with arguments {}".format(self.name, arg_str)
 
-def generate_requests(iters=1):
+def generate_requests(prefix="/", iters=1):
     requests = []
     existing_paths = []
     for i in range(iters):
         for _ in range(100):
             rand_length = random.randint(0, 10)
-            path = "/"
+            path = prefix
             for j in range(1, rand_length):
                 path = create_random_path(path, 1)
                 existing_paths.append(path)
@@ -322,31 +335,43 @@ def generate_requests(iters=1):
 
 
 def test_random_requests(started_cluster):
-    requests = generate_requests(10)
-    genuine_zk = get_genuine_zk()
-    fake_zk = get_fake_zk()
-    for i, request in enumerate(requests):
-        genuine_throw = False
-        fake_throw = False
-        fake_result = None
-        genuine_result = None
-        try:
-            genuine_result = request.callback(genuine_zk)
-        except Exception as ex:
-            genuine_throw = True
+    try:
+        requests = generate_requests("/test_random_requests", 10)
+        print("Generated", len(requests), "requests")
+        genuine_zk = get_genuine_zk()
+        fake_zk = get_fake_zk()
+        genuine_zk.create("/test_random_requests")
+        fake_zk.create("/test_random_requests")
+        for i, request in enumerate(requests):
+            genuine_throw = False
+            fake_throw = False
+            fake_result = None
+            genuine_result = None
+            try:
+                genuine_result = request.callback(genuine_zk)
+            except Exception as ex:
+                print("i", i, "request", request)
+                print("Genuine exception", str(ex))
+                genuine_throw = True
 
-        try:
-            fake_result = request.callback(fake_zk)
-        except Exception as ex:
-            fake_throw = True
+            try:
+                fake_result = request.callback(fake_zk)
+            except Exception as ex:
+                print("i", i, "request", request)
+                print("Fake exception", str(ex))
+                fake_throw = True
 
-        assert fake_throw == genuine_throw, "Fake throw genuine not or vise versa"
-        assert fake_result == genuine_result, "Zookeeper results differ"
-    root_children_genuine = [elem for elem in list(sorted(genuine_zk.get_children("/"))) if elem not in ('clickhouse', 'zookeeper')]
-    root_children_fake = [elem for elem in list(sorted(fake_zk.get_children("/"))) if elem not in ('clickhouse', 'zookeeper')]
-    assert root_children_fake == root_children_genuine
+            assert fake_throw == genuine_throw, "Fake throw genuine not or vise versa request {}"
+            assert fake_result == genuine_result, "Zookeeper results differ"
+        root_children_genuine = [elem for elem in list(sorted(genuine_zk.get_children("/test_random_requests"))) if elem not in ('clickhouse', 'zookeeper')]
+        root_children_fake = [elem for elem in list(sorted(fake_zk.get_children("/test_random_requests"))) if elem not in ('clickhouse', 'zookeeper')]
+        assert root_children_fake == root_children_genuine
+    finally:
+        for zk in [genuine_zk, fake_zk]:
+            stop_zk(zk)
 
 def test_end_of_session(started_cluster):
+
     fake_zk1 = None
     fake_zk2 = None
     genuine_zk1 = None
@@ -401,13 +426,8 @@ def test_end_of_session(started_cluster):
         assert fake_ephemeral_event == genuine_ephemeral_event
 
     finally:
-        try:
-            for zk in [fake_zk1, fake_zk2, genuine_zk1, genuine_zk2]:
-                if zk:
-                    zk.stop()
-                    zk.close()
-        except:
-            pass
+        for zk in [fake_zk1, fake_zk2, genuine_zk1, genuine_zk2]:
+            stop_zk(zk)
 
 def test_end_of_watches_session(started_cluster):
     fake_zk1 = None
@@ -442,91 +462,89 @@ def test_end_of_watches_session(started_cluster):
 
         assert dummy_set == 2
     finally:
-        try:
-            for zk in [fake_zk1, fake_zk2]:
-                if zk:
-                    zk.stop()
-                    zk.close()
-        except:
-            pass
+        for zk in [fake_zk1, fake_zk2]:
+            stop_zk(zk)
 
 def test_concurrent_watches(started_cluster):
-    fake_zk = get_fake_zk()
-    fake_zk.restart()
-    global_path = "/test_concurrent_watches_0"
-    fake_zk.create(global_path)
+    try:
+        fake_zk = get_fake_zk()
+        fake_zk.restart()
+        global_path = "/test_concurrent_watches_0"
+        fake_zk.create(global_path)
 
-    dumb_watch_triggered_counter = 0
-    all_paths_triggered = []
+        dumb_watch_triggered_counter = 0
+        all_paths_triggered = []
 
-    existing_path = []
-    all_paths_created = []
-    watches_created = 0
-    def create_path_and_watch(i):
-        nonlocal watches_created
-        nonlocal all_paths_created
-        fake_zk.ensure_path(global_path + "/" + str(i))
-        # new function each time
-        def dumb_watch(event):
-            nonlocal dumb_watch_triggered_counter
-            dumb_watch_triggered_counter += 1
-            nonlocal all_paths_triggered
-            all_paths_triggered.append(event.path)
+        existing_path = []
+        all_paths_created = []
+        watches_created = 0
+        def create_path_and_watch(i):
+            nonlocal watches_created
+            nonlocal all_paths_created
+            fake_zk.ensure_path(global_path + "/" + str(i))
+            # new function each time
+            def dumb_watch(event):
+                nonlocal dumb_watch_triggered_counter
+                dumb_watch_triggered_counter += 1
+                nonlocal all_paths_triggered
+                all_paths_triggered.append(event.path)
 
-        fake_zk.get(global_path + "/" + str(i), watch=dumb_watch)
-        all_paths_created.append(global_path + "/" + str(i))
-        watches_created += 1
-        existing_path.append(i)
+            fake_zk.get(global_path + "/" + str(i), watch=dumb_watch)
+            all_paths_created.append(global_path + "/" + str(i))
+            watches_created += 1
+            existing_path.append(i)
 
-    trigger_called = 0
-    def trigger_watch(i):
-        nonlocal trigger_called
-        trigger_called += 1
-        fake_zk.set(global_path + "/" + str(i), b"somevalue")
-        try:
-            existing_path.remove(i)
-        except:
-            pass
-
-    def call(total):
-        for i in range(total):
-            create_path_and_watch(random.randint(0, 1000))
-            time.sleep(random.random() % 0.5)
+        trigger_called = 0
+        def trigger_watch(i):
+            nonlocal trigger_called
+            trigger_called += 1
+            fake_zk.set(global_path + "/" + str(i), b"somevalue")
             try:
-                rand_num = random.choice(existing_path)
-                trigger_watch(rand_num)
-            except:
-                pass
-        while existing_path:
-            try:
-                rand_num = random.choice(existing_path)
-                trigger_watch(rand_num)
+                existing_path.remove(i)
             except:
                 pass
 
-    p = Pool(10)
-    arguments = [100] * 10
-    watches_must_be_created = sum(arguments)
-    watches_trigger_must_be_called = sum(arguments)
-    watches_must_be_triggered = sum(arguments)
-    p.map(call, arguments)
-    p.close()
+        def call(total):
+            for i in range(total):
+                create_path_and_watch(random.randint(0, 1000))
+                time.sleep(random.random() % 0.5)
+                try:
+                    rand_num = random.choice(existing_path)
+                    trigger_watch(rand_num)
+                except:
+                    pass
+            while existing_path:
+                try:
+                    rand_num = random.choice(existing_path)
+                    trigger_watch(rand_num)
+                except:
+                    pass
 
-    # waiting for late watches
-    for i in range(50):
-        if dumb_watch_triggered_counter == watches_must_be_triggered:
-            break
+        p = Pool(10)
+        arguments = [100] * 10
+        watches_must_be_created = sum(arguments)
+        watches_trigger_must_be_called = sum(arguments)
+        watches_must_be_triggered = sum(arguments)
+        p.map(call, arguments)
+        p.close()
 
-        time.sleep(0.1)
+        # waiting for late watches
+        for i in range(50):
+            if dumb_watch_triggered_counter == watches_must_be_triggered:
+                break
 
-    assert watches_created == watches_must_be_created
-    assert trigger_called >= watches_trigger_must_be_called
-    assert len(existing_path) == 0
-    if dumb_watch_triggered_counter != watches_must_be_triggered:
-        print("All created paths", all_paths_created)
-        print("All triggerred paths", all_paths_triggered)
-        print("All paths len", len(all_paths_created))
-        print("All triggered len", len(all_paths_triggered))
-        print("Diff", list(set(all_paths_created) - set(all_paths_triggered)))
+            time.sleep(0.1)
 
-    assert dumb_watch_triggered_counter == watches_must_be_triggered
+        assert watches_created == watches_must_be_created
+        assert trigger_called >= watches_trigger_must_be_called
+        assert len(existing_path) == 0
+        if dumb_watch_triggered_counter != watches_must_be_triggered:
+            print("All created paths", all_paths_created)
+            print("All triggerred paths", all_paths_triggered)
+            print("All paths len", len(all_paths_created))
+            print("All triggered len", len(all_paths_triggered))
+            print("Diff", list(set(all_paths_created) - set(all_paths_triggered)))
+
+        assert dumb_watch_triggered_counter == watches_must_be_triggered
+    finally:
+        stop_zk(fake_zk)
diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper1.xml
index 4ad76889d1e..a47e5eae09a 100644
--- a/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper1.xml
@@ -2,6 +2,7 @@
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
 
         <coordination_settings>
             <operation_timeout_ms>5000</operation_timeout_ms>
diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper2.xml
index a1954a1e639..18681f0dc95 100644
--- a/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper2.xml
@@ -2,6 +2,7 @@
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <server_id>2</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
 
         <coordination_settings>
             <operation_timeout_ms>5000</operation_timeout_ms>
diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper3.xml
index 88d2358138f..184d3724219 100644
--- a/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper3.xml
@@ -2,6 +2,7 @@
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <server_id>3</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
 
         <coordination_settings>
             <operation_timeout_ms>5000</operation_timeout_ms>
diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/test.py b/tests/integration/test_testkeeper_multinode_blocade_leader/test.py
index 3b2867ef3c7..47064413b45 100644
--- a/tests/integration/test_testkeeper_multinode_blocade_leader/test.py
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/test.py
@@ -6,6 +6,7 @@ import os
 import time
 from multiprocessing.dummy import Pool
 from helpers.network import PartitionManager
+from helpers.test_tools import assert_eq_with_retry
 
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
@@ -14,6 +15,18 @@ node3 = cluster.add_instance('node3', main_configs=['configs/enable_test_keeper3
 
 from kazoo.client import KazooClient, KazooState
 
+"""
+In this test, we blockade RAFT leader and check that the whole system is
+able to recover. It's not a good test because we use ClickHouse's replicated
+tables to check connectivity, but they may require special operations (or a long
+wait) after session expiration. We don't use kazoo, because this client pretends
+to be very smart: SUSPEND sessions, try to recover them, and so on. The test
+will be even less predictable than with ClickHouse tables.
+
+TODO find (or write) not so smart python client.
+TODO remove this when jepsen tests will be written.
+"""
+
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
@@ -55,7 +68,6 @@ def get_fake_zk(nodename, timeout=30.0):
     _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
     def reset_listener(state):
         nonlocal _fake_zk_instance
-        print("Fake zk callback called for state", state)
         if state != KazooState.CONNECTED:
             _fake_zk_instance._reset()
 
@@ -67,19 +79,25 @@ def get_fake_zk(nodename, timeout=30.0):
 # in extremely rare case it can take more than 5 minutes in debug build with sanitizer
 @pytest.mark.timeout(600)
 def test_blocade_leader(started_cluster):
-    wait_nodes()
-    for i, node in enumerate([node1, node2, node3]):
-        node.query("CREATE DATABASE IF NOT EXISTS ordinary ENGINE=Ordinary")
-        node.query("CREATE TABLE ordinary.t1 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t1', '{}') ORDER BY tuple()".format(i + 1))
+    for i in range(100):
+        wait_nodes()
+        try:
+            for i, node in enumerate([node1, node2, node3]):
+                node.query("CREATE DATABASE IF NOT EXISTS ordinary ENGINE=Ordinary")
+                node.query("CREATE TABLE IF NOT EXISTS ordinary.t1 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t1', '{}') ORDER BY tuple()".format(i + 1))
+            break
+        except Exception as ex:
+            print("Got exception from node", smaller_exception(ex))
+            time.sleep(0.1)
 
     node2.query("INSERT INTO ordinary.t1 SELECT number FROM numbers(10)")
 
     node1.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
     node3.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
 
-    assert node1.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
-    assert node2.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
-    assert node3.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
+    assert_eq_with_retry(node1, "SELECT COUNT() FROM ordinary.t1", "10")
+    assert_eq_with_retry(node2, "SELECT COUNT() FROM ordinary.t1", "10")
+    assert_eq_with_retry(node3, "SELECT COUNT() FROM ordinary.t1", "10")
 
     with PartitionManager() as pm:
         pm.partition_instances(node2, node1)
@@ -87,7 +105,7 @@ def test_blocade_leader(started_cluster):
 
         for i in range(100):
             try:
-                node2.query("SYSTEM RESTART REPLICA ordinary.t1")
+                restart_replica_for_sure(node2, "ordinary.t1", "/clickhouse/t1/replicas/2")
                 node2.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
@@ -104,7 +122,7 @@ def test_blocade_leader(started_cluster):
 
         for i in range(100):
             try:
-                node3.query("SYSTEM RESTART REPLICA ordinary.t1")
+                restart_replica_for_sure(node3, "ordinary.t1", "/clickhouse/t1/replicas/3")
                 node3.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
@@ -122,7 +140,7 @@ def test_blocade_leader(started_cluster):
     for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
-                node.query("SYSTEM RESTART REPLICA ordinary.t1")
+                restart_replica_for_sure(node, "ordinary.t1", "/clickhouse/t1/replicas/{}".format(n + 1))
                 break
             except Exception as ex:
                 try:
@@ -150,7 +168,7 @@ def test_blocade_leader(started_cluster):
     for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
-                node.query("SYSTEM RESTART REPLICA ordinary.t1")
+                restart_replica_for_sure(node, "ordinary.t1", "/clickhouse/t1/replicas/{}".format(n + 1))
                 node.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
                 break
             except Exception as ex:
@@ -170,9 +188,9 @@ def test_blocade_leader(started_cluster):
         for num, node in enumerate([node1, node2, node3]):
             dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
 
-    assert node1.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
-    assert node2.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
-    assert node3.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
+    assert_eq_with_retry(node1, "SELECT COUNT() FROM ordinary.t1", "310")
+    assert_eq_with_retry(node2, "SELECT COUNT() FROM ordinary.t1", "310")
+    assert_eq_with_retry(node3, "SELECT COUNT() FROM ordinary.t1", "310")
 
 
 def dump_zk(node, zk_path, replica_path):
@@ -188,22 +206,47 @@ def dump_zk(node, zk_path, replica_path):
     print("Parts")
     print(node.query("SELECT name FROM system.zookeeper WHERE path = '{}/parts' FORMAT Vertical".format(replica_path)))
 
+def restart_replica_for_sure(node, table_name, zk_replica_path):
+    fake_zk = None
+    try:
+        node.query("DETACH TABLE {}".format(table_name))
+        fake_zk = get_fake_zk(node.name)
+        if fake_zk.exists(zk_replica_path + "/is_active") is not None:
+            fake_zk.delete(zk_replica_path + "/is_active")
+
+        node.query("ATTACH TABLE {}".format(table_name))
+    except Exception as ex:
+        print("Exception", ex)
+        raise ex
+    finally:
+        if fake_zk:
+            fake_zk.stop()
+            fake_zk.close()
+
+
+
 # in extremely rare case it can take more than 5 minutes in debug build with sanitizer
 @pytest.mark.timeout(600)
 def test_blocade_leader_twice(started_cluster):
-    wait_nodes()
-    for i, node in enumerate([node1, node2, node3]):
-        node.query("CREATE DATABASE IF NOT EXISTS ordinary ENGINE=Ordinary")
-        node.query("CREATE TABLE ordinary.t2 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t2', '{}') ORDER BY tuple()".format(i + 1))
+    for i in range(100):
+        wait_nodes()
+        try:
+            for i, node in enumerate([node1, node2, node3]):
+                node.query("CREATE DATABASE IF NOT EXISTS ordinary ENGINE=Ordinary")
+                node.query("CREATE TABLE IF NOT EXISTS ordinary.t2 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t2', '{}') ORDER BY tuple()".format(i + 1))
+            break
+        except Exception as ex:
+            print("Got exception from node", smaller_exception(ex))
+            time.sleep(0.1)
 
     node2.query("INSERT INTO ordinary.t2 SELECT number FROM numbers(10)")
 
     node1.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
     node3.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
 
-    assert node1.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
-    assert node2.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
-    assert node3.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
+    assert_eq_with_retry(node1, "SELECT COUNT() FROM ordinary.t2", "10")
+    assert_eq_with_retry(node2, "SELECT COUNT() FROM ordinary.t2", "10")
+    assert_eq_with_retry(node3, "SELECT COUNT() FROM ordinary.t2", "10")
 
     with PartitionManager() as pm:
         pm.partition_instances(node2, node1)
@@ -211,7 +254,7 @@ def test_blocade_leader_twice(started_cluster):
 
         for i in range(100):
             try:
-                node2.query("SYSTEM RESTART REPLICA ordinary.t2")
+                restart_replica_for_sure(node2, "ordinary.t2", "/clickhouse/t2/replicas/2")
                 node2.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
@@ -228,7 +271,8 @@ def test_blocade_leader_twice(started_cluster):
 
         for i in range(100):
             try:
-                node3.query("SYSTEM RESTART REPLICA ordinary.t2")
+                restart_replica_for_sure(node3, "ordinary.t2", "/clickhouse/t2/replicas/3")
+                node3.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
                 node3.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
@@ -243,6 +287,10 @@ def test_blocade_leader_twice(started_cluster):
                 dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
             assert False, "Cannot reconnect for node3"
 
+        node2.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
+
+        assert_eq_with_retry(node2, "SELECT COUNT() FROM ordinary.t2", "210")
+        assert_eq_with_retry(node3, "SELECT COUNT() FROM ordinary.t2", "210")
 
         # Total network partition
         pm.partition_instances(node3, node2)
@@ -261,11 +309,10 @@ def test_blocade_leader_twice(started_cluster):
             except Exception as ex:
                 time.sleep(0.5)
 
-
     for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
-                node.query("SYSTEM RESTART REPLICA ordinary.t2")
+                restart_replica_for_sure(node, "ordinary.t2", "/clickhouse/t2/replicas/{}".format(n + 1))
                 break
             except Exception as ex:
                 try:
@@ -293,29 +340,34 @@ def test_blocade_leader_twice(started_cluster):
                 dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
             assert False, "Cannot reconnect for node{}".format(n + 1)
 
-    for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
-            try:
-                node.query("SYSTEM RESTART REPLICA ordinary.t2")
-                node.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
-                break
-            except Exception as ex:
+            all_done = True
+            for n, node in enumerate([node1, node2, node3]):
                 try:
-                    node.query("ATTACH TABLE ordinary.t2")
-                except Exception as attach_ex:
-                    print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
+                    restart_replica_for_sure(node, "ordinary.t2", "/clickhouse/t2/replicas/{}".format(n + 1))
+                    node.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
+                    break
+                except Exception as ex:
+                    all_done = False
+                    try:
+                        node.query("ATTACH TABLE ordinary.t2")
+                    except Exception as attach_ex:
+                        print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
 
-                print("Got exception node{}".format(n + 1), smaller_exception(ex))
-                time.sleep(0.5)
+                    print("Got exception node{}".format(n + 1), smaller_exception(ex))
+                    time.sleep(0.5)
+
+            if all_done:
+                break
         else:
             for num, node in enumerate([node1, node2, node3]):
                 dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
-            assert False, "Cannot reconnect for node{}".format(n + 1)
+            assert False, "Cannot reconnect in i {} retries".format(i)
 
-    assert node1.query("SELECT COUNT() FROM ordinary.t2") == "510\n"
+    assert_eq_with_retry(node1, "SELECT COUNT() FROM ordinary.t2", "510")
     if node2.query("SELECT COUNT() FROM ordinary.t2") != "510\n":
         for num, node in enumerate([node1, node2, node3]):
             dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
 
-    assert node2.query("SELECT COUNT() FROM ordinary.t2") == "510\n"
-    assert node3.query("SELECT COUNT() FROM ordinary.t2") == "510\n"
+    assert_eq_with_retry(node2, "SELECT COUNT() FROM ordinary.t2", "510")
+    assert_eq_with_retry(node3, "SELECT COUNT() FROM ordinary.t2", "510")
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper1.xml
index 4ad76889d1e..a47e5eae09a 100644
--- a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper1.xml
@@ -2,6 +2,7 @@
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
 
         <coordination_settings>
             <operation_timeout_ms>5000</operation_timeout_ms>
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper2.xml
index a1954a1e639..18681f0dc95 100644
--- a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper2.xml
@@ -2,6 +2,7 @@
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <server_id>2</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
 
         <coordination_settings>
             <operation_timeout_ms>5000</operation_timeout_ms>
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper3.xml
index 88d2358138f..184d3724219 100644
--- a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper3.xml
@@ -2,6 +2,7 @@
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <server_id>3</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
 
         <coordination_settings>
             <operation_timeout_ms>5000</operation_timeout_ms>
diff --git a/tests/integration/test_testkeeper_multinode_simple/test.py b/tests/integration/test_testkeeper_multinode_simple/test.py
index a7ece4bbd56..985915e10a1 100644
--- a/tests/integration/test_testkeeper_multinode_simple/test.py
+++ b/tests/integration/test_testkeeper_multinode_simple/test.py
@@ -6,6 +6,7 @@ import os
 import time
 from multiprocessing.dummy import Pool
 from helpers.network import PartitionManager
+from helpers.test_tools import assert_eq_with_retry
 
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
@@ -234,6 +235,6 @@ def test_simple_replicated_table(started_cluster):
     node1.query("SYSTEM SYNC REPLICA t", timeout=10)
     node3.query("SYSTEM SYNC REPLICA t", timeout=10)
 
-    assert node1.query("SELECT COUNT() FROM t") == "10\n"
-    assert node2.query("SELECT COUNT() FROM t") == "10\n"
-    assert node3.query("SELECT COUNT() FROM t") == "10\n"
+    assert_eq_with_retry(node1, "SELECT COUNT() FROM t", "10")
+    assert_eq_with_retry(node2, "SELECT COUNT() FROM t", "10")
+    assert_eq_with_retry(node3, "SELECT COUNT() FROM t", "10")
diff --git a/tests/integration/test_testkeeper_persistent_log/__init__.py b/tests/integration/test_testkeeper_persistent_log/__init__.py
new file mode 100644
index 00000000000..e5a0d9b4834
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/tests/integration/test_testkeeper_persistent_log/configs/enable_test_keeper.xml b/tests/integration/test_testkeeper_persistent_log/configs/enable_test_keeper.xml
new file mode 100644
index 00000000000..a8b8991f959
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log/configs/enable_test_keeper.xml
@@ -0,0 +1,21 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>localhost</hostname>
+                <port>44444</port>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_persistent_log/configs/logs_conf.xml b/tests/integration/test_testkeeper_persistent_log/configs/logs_conf.xml
new file mode 100644
index 00000000000..318a6bca95d
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log/configs/logs_conf.xml
@@ -0,0 +1,12 @@
+<yandex>
+    <shutdown_wait_unfinished>3</shutdown_wait_unfinished>
+    <logger>
+        <level>trace</level>
+        <log>/var/log/clickhouse-server/log.log</log>
+        <errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
+        <size>1000M</size>
+        <count>10</count>
+        <stderr>/var/log/clickhouse-server/stderr.log</stderr>
+        <stdout>/var/log/clickhouse-server/stdout.log</stdout>
+    </logger>
+</yandex>
diff --git a/tests/integration/test_testkeeper_persistent_log/configs/use_test_keeper.xml b/tests/integration/test_testkeeper_persistent_log/configs/use_test_keeper.xml
new file mode 100644
index 00000000000..2e48e91bca5
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log/configs/use_test_keeper.xml
@@ -0,0 +1,8 @@
+<yandex>
+    <zookeeper>
+            <node index="1">
+            <host>node</host>
+            <port>9181</port>
+        </node>
+    </zookeeper>
+</yandex>
diff --git a/tests/integration/test_testkeeper_persistent_log/test.py b/tests/integration/test_testkeeper_persistent_log/test.py
new file mode 100644
index 00000000000..71fee94088f
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log/test.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+import pytest
+from helpers.cluster import ClickHouseCluster
+import random
+import string
+import os
+import time
+from kazoo.client import KazooClient, KazooState
+
+
+cluster = ClickHouseCluster(__file__)
+
+node = cluster.add_instance('node', main_configs=['configs/enable_test_keeper.xml', 'configs/logs_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+
+
+def random_string(length):
+    return ''.join(random.choices(string.ascii_lowercase + string.digits, k=length))
+
+def create_random_path(prefix="", depth=1):
+    if depth == 0:
+        return prefix
+    return create_random_path(os.path.join(prefix, random_string(3)), depth - 1)
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+def get_connection_zk(nodename, timeout=30.0):
+    _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
+    def reset_listener(state):
+        nonlocal _fake_zk_instance
+        print("Fake zk callback called for state", state)
+        if state != KazooState.CONNECTED:
+            _fake_zk_instance._reset()
+
+    _fake_zk_instance.add_listener(reset_listener)
+    _fake_zk_instance.start()
+    return _fake_zk_instance
+
+def test_state_after_restart(started_cluster):
+    try:
+        node_zk = None
+        node_zk2 = None
+        node_zk = get_connection_zk("node")
+
+        node_zk.create("/test_state_after_restart", b"somevalue")
+        strs = []
+        for i in range(100):
+            strs.append(random_string(123).encode())
+            node_zk.create("/test_state_after_restart/node" + str(i), strs[i])
+
+        for i in range(100):
+            if i % 7 == 0:
+                node_zk.delete("/test_state_after_restart/node" + str(i))
+
+        node.restart_clickhouse(kill=True)
+
+        node_zk2 = get_connection_zk("node")
+
+        assert node_zk2.get("/test_state_after_restart")[0] == b"somevalue"
+        for i in range(100):
+            if i % 7 == 0:
+                assert node_zk2.exists("/test_state_after_restart/node" + str(i)) is None
+            else:
+                assert len(node_zk2.get("/test_state_after_restart/node" + str(i))[0]) == 123
+                assert node_zk2.get("/test_state_after_restart/node" + str(i))[0] == strs[i]
+    finally:
+        try:
+            if node_zk is not None:
+                node_zk.stop()
+                node_zk.close()
+
+            if node_zk2 is not None:
+                node_zk2.stop()
+                node_zk2.close()
+        except:
+            pass
+
+
+# http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html
+def test_ephemeral_after_restart(started_cluster):
+    try:
+        node_zk = None
+        node_zk2 = None
+        node_zk = get_connection_zk("node")
+
+        node_zk.create("/test_ephemeral_after_restart", b"somevalue")
+        strs = []
+        for i in range(100):
+            strs.append(random_string(123).encode())
+            node_zk.create("/test_ephemeral_after_restart/node" + str(i), strs[i], ephemeral=True)
+
+        for i in range(100):
+            if i % 7 == 0:
+                node_zk.delete("/test_ephemeral_after_restart/node" + str(i))
+
+        node.restart_clickhouse(kill=True)
+
+        node_zk2 = get_connection_zk("node")
+
+        assert node_zk2.get("/test_ephemeral_after_restart")[0] == b"somevalue"
+        for i in range(100):
+            if i % 7 == 0:
+                assert node_zk2.exists("/test_ephemeral_after_restart/node" + str(i)) is None
+            else:
+                assert len(node_zk2.get("/test_ephemeral_after_restart/node" + str(i))[0]) == 123
+                assert node_zk2.get("/test_ephemeral_after_restart/node" + str(i))[0] == strs[i]
+    finally:
+        try:
+            if node_zk is not None:
+                node_zk.stop()
+                node_zk.close()
+
+            if node_zk2 is not None:
+                node_zk2.stop()
+                node_zk2.close()
+        except:
+            pass
diff --git a/tests/integration/test_testkeeper_persistent_log_multinode/__init__.py b/tests/integration/test_testkeeper_persistent_log_multinode/__init__.py
new file mode 100644
index 00000000000..e5a0d9b4834
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log_multinode/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/tests/integration/test_testkeeper_persistent_log_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_persistent_log_multinode/configs/enable_test_keeper1.xml
new file mode 100644
index 00000000000..a47e5eae09a
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log_multinode/configs/enable_test_keeper1.xml
@@ -0,0 +1,39 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_persistent_log_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_persistent_log_multinode/configs/enable_test_keeper2.xml
new file mode 100644
index 00000000000..18681f0dc95
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log_multinode/configs/enable_test_keeper2.xml
@@ -0,0 +1,39 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>2</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_persistent_log_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_persistent_log_multinode/configs/enable_test_keeper3.xml
new file mode 100644
index 00000000000..184d3724219
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log_multinode/configs/enable_test_keeper3.xml
@@ -0,0 +1,39 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>3</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_persistent_log_multinode/configs/log_conf.xml b/tests/integration/test_testkeeper_persistent_log_multinode/configs/log_conf.xml
new file mode 100644
index 00000000000..318a6bca95d
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log_multinode/configs/log_conf.xml
@@ -0,0 +1,12 @@
+<yandex>
+    <shutdown_wait_unfinished>3</shutdown_wait_unfinished>
+    <logger>
+        <level>trace</level>
+        <log>/var/log/clickhouse-server/log.log</log>
+        <errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
+        <size>1000M</size>
+        <count>10</count>
+        <stderr>/var/log/clickhouse-server/stderr.log</stderr>
+        <stdout>/var/log/clickhouse-server/stdout.log</stdout>
+    </logger>
+</yandex>
diff --git a/tests/integration/test_testkeeper_persistent_log_multinode/configs/use_test_keeper.xml b/tests/integration/test_testkeeper_persistent_log_multinode/configs/use_test_keeper.xml
new file mode 100644
index 00000000000..b6139005d2f
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log_multinode/configs/use_test_keeper.xml
@@ -0,0 +1,16 @@
+<yandex>
+    <zookeeper>
+        <node index="1">
+            <host>node1</host>
+            <port>9181</port>
+        </node>
+        <node index="2">
+            <host>node2</host>
+            <port>9181</port>
+        </node>
+        <node index="3">
+            <host>node3</host>
+            <port>9181</port>
+        </node>
+    </zookeeper>
+</yandex>
diff --git a/tests/integration/test_testkeeper_persistent_log_multinode/test.py b/tests/integration/test_testkeeper_persistent_log_multinode/test.py
new file mode 100644
index 00000000000..cb9cf5a59d1
--- /dev/null
+++ b/tests/integration/test_testkeeper_persistent_log_multinode/test.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+import pytest
+from helpers.cluster import ClickHouseCluster
+import random
+import string
+import os
+import time
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+node2 = cluster.add_instance('node2', main_configs=['configs/enable_test_keeper2.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+node3 = cluster.add_instance('node3', main_configs=['configs/enable_test_keeper3.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+
+from kazoo.client import KazooClient, KazooState
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+def get_fake_zk(nodename, timeout=30.0):
+    _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
+    def reset_listener(state):
+        nonlocal _fake_zk_instance
+        print("Fake zk callback called for state", state)
+        if state != KazooState.CONNECTED:
+            _fake_zk_instance._reset()
+
+    _fake_zk_instance.add_listener(reset_listener)
+    _fake_zk_instance.start()
+    return _fake_zk_instance
+
+def stop_zk(zk):
+    try:
+        if zk:
+            zk.stop()
+            zk.close()
+    except:
+        pass
+
+def test_restart_multinode(started_cluster):
+    try:
+        node1_zk = node2_zk = node3_zk = None
+
+        node1_zk = get_fake_zk("node1")
+        node2_zk = get_fake_zk("node2")
+        node3_zk = get_fake_zk("node3")
+
+        for i in range(100):
+            node1_zk.create("/test_read_write_multinode_node" + str(i), ("somedata" + str(i)).encode())
+
+        for i in range(100):
+            if i % 10 == 0:
+                node1_zk.delete("/test_read_write_multinode_node" + str(i))
+
+        node2_zk.sync("/test_read_write_multinode_node0")
+        node3_zk.sync("/test_read_write_multinode_node0")
+
+        for i in range(100):
+            if i % 10 != 0:
+                assert node2_zk.get("/test_read_write_multinode_node" + str(i))[0] == ("somedata" + str(i)).encode()
+                assert node3_zk.get("/test_read_write_multinode_node" + str(i))[0] == ("somedata" + str(i)).encode()
+            else:
+                assert node2_zk.exists("/test_read_write_multinode_node" + str(i)) is None
+                assert node3_zk.exists("/test_read_write_multinode_node" + str(i)) is None
+
+    finally:
+        for zk in [node1_zk, node2_zk, node3_zk]:
+            stop_zk(zk)
+
+    node1.restart_clickhouse(kill=True)
+    node2.restart_clickhouse(kill=True)
+    node3.restart_clickhouse(kill=True)
+    for i in range(100):
+        try:
+            node1_zk = get_fake_zk("node1")
+            node2_zk = get_fake_zk("node2")
+            node3_zk = get_fake_zk("node3")
+            for i in range(100):
+                if i % 10 != 0:
+                    assert node1_zk.get("/test_read_write_multinode_node" + str(i))[0] == ("somedata" + str(i)).encode()
+                    assert node2_zk.get("/test_read_write_multinode_node" + str(i))[0] == ("somedata" + str(i)).encode()
+                    assert node3_zk.get("/test_read_write_multinode_node" + str(i))[0] == ("somedata" + str(i)).encode()
+                else:
+                    assert node1_zk.exists("/test_read_write_multinode_node" + str(i)) is None
+                    assert node2_zk.exists("/test_read_write_multinode_node" + str(i)) is None
+                    assert node3_zk.exists("/test_read_write_multinode_node" + str(i)) is None
+            break
+        except Exception as ex:
+            print("Got exception as ex", ex)
+        finally:
+            for zk in [node1_zk, node2_zk, node3_zk]:
+                stop_zk(zk)
diff --git a/tests/integration/test_zookeeper_config/test.py b/tests/integration/test_zookeeper_config/test.py
index eb5ab2da98f..80875da45e0 100644
--- a/tests/integration/test_zookeeper_config/test.py
+++ b/tests/integration/test_zookeeper_config/test.py
@@ -129,7 +129,7 @@ def test_secure_connection():
     # We need absolute path in zookeeper volumes. Generate it dynamically.
     TEMPLATE = '''
     zoo{zoo_id}:
-        image: zookeeper:3.5.6
+        image: zookeeper:3.6.2
         restart: always
         environment:
             ZOO_TICK_TIME: 500
diff --git a/tests/performance/modulo.xml b/tests/performance/modulo.xml
index 77b544ff389..7c33855ff32 100644
--- a/tests/performance/modulo.xml
+++ b/tests/performance/modulo.xml
@@ -1,7 +1,4 @@
 <test>
-
-
-
     <query>SELECT number % 128 FROM numbers(300000000) FORMAT Null</query>
     <query>SELECT number % 255 FROM numbers(300000000) FORMAT Null</query>
     <query>SELECT number % 256 FROM numbers(300000000) FORMAT Null</query>
diff --git a/tests/performance/prewhere_with_row_level_filter.xml b/tests/performance/prewhere_with_row_level_filter.xml
new file mode 100644
index 00000000000..b7b41c0a30f
--- /dev/null
+++ b/tests/performance/prewhere_with_row_level_filter.xml
@@ -0,0 +1,17 @@
+<test>
+    <create_query>DROP TABLE IF EXISTS test_prl;</create_query>
+    <create_query>CREATE TABLE test_prl (n UInt64) ENGINE MergeTree ORDER BY n;</create_query>
+    <create_query>GRANT CREATE ROW POLICY ON *.* TO CURRENT_USER</create_query>
+    <create_query>CREATE ROW POLICY OR REPLACE test_prl_policy ON test_prl AS PERMISSIVE FOR SELECT USING n % 7 TO ALL;</create_query>
+
+    <fill_query>INSERT INTO test_prl SELECT number FROM numbers(50000000);</fill_query>
+
+    <query>SELECT * FROM test_prl;</query>
+    <query>SELECT * FROM test_prl WHERE n % 3 AND n % 5 SETTINGS optimize_move_to_prewhere = 0;</query>
+    <query>SELECT * FROM test_prl PREWHERE n % 3 AND n % 5;</query>
+    <query>SELECT * FROM test_prl PREWHERE n % 3 WHERE n % 5;</query>
+    <query>SELECT * FROM test_prl PREWHERE n % 5 WHERE n % 3;</query>
+
+    <drop_query>DROP ROW POLICY IF EXISTS test_prl_policy ON test_prl;</drop_query>
+    <drop_query>DROP TABLE IF EXISTS test_prl;</drop_query>
+</test>
diff --git a/tests/performance/subqueries.xml b/tests/performance/subqueries.xml
new file mode 100644
index 00000000000..0d41099841b
--- /dev/null
+++ b/tests/performance/subqueries.xml
@@ -0,0 +1,7 @@
+<test>
+    <create_query>create table tab (a UInt32, b UInt32) engine = MergeTree order by (a, b)</create_query>
+    <fill_query>insert into tab values (1, 1)</fill_query>
+    <query>select a, b from tab where (a, b) in (select toUInt32(number) as x, toUInt32(sleep(0.1) + 1) from numbers_mt(16)) settings max_threads = 2, max_block_size = 4</query>
+    <query>select a, b from tab where (1, 1) = (select min(toUInt32(number + 1)) as x, min(toUInt32(sleep(0.1) + 1)) from numbers_mt(16)) settings max_threads = 2, max_block_size = 4</query>
+    <drop_query>DROP TABLE tab</drop_query>
+</test>
diff --git a/tests/queries/0_stateless/00205_scalar_subqueries.sql b/tests/queries/0_stateless/00205_scalar_subqueries.sql
index 14244377e5f..03bcd0a3ebc 100644
--- a/tests/queries/0_stateless/00205_scalar_subqueries.sql
+++ b/tests/queries/0_stateless/00205_scalar_subqueries.sql
@@ -7,3 +7,4 @@ SELECT (SELECT toDate('2015-01-02'), 'Hello');
 SELECT (SELECT toDate('2015-01-02'), 'Hello') AS x, x, identity((SELECT 1)), identity((SELECT 1) AS y);
 -- SELECT (SELECT uniqState(''));
 
+ SELECT ( SELECT throwIf(1 + dummy) );  -- { serverError 395 }
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
index 903217ca939..762f8a937e4 100755
--- a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
@@ -1,18 +1,19 @@
 #!/usr/bin/env bash
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+SCHEMADIR=$CURDIR/format_schemas
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
 set -eo pipefail
 
 # Run the client.
-$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+$CLICKHOUSE_CLIENT --multiquery <<EOF
 DROP TABLE IF EXISTS array_3dim_protobuf_00825;
 
 CREATE TABLE array_3dim_protobuf_00825
 (
-    `a_b_c` Array(Array(Array(Int32)))
+    a_b_c Array(Array(Array(Int32)))
 ) ENGINE = MergeTree ORDER BY tuple();
 
 INSERT INTO array_3dim_protobuf_00825 VALUES ([[], [[]], [[1]], [[2,3],[4]]]), ([[[5, 6, 7]], [[8, 9, 10]]]);
@@ -21,15 +22,16 @@ SELECT * FROM array_3dim_protobuf_00825;
 EOF
 
 BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_3dim.XXXXXX.binary")
-$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_3dim:ABC'" > "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_array_3dim:ABC'" > "$BINARY_FILE_PATH"
 
 # Check the output in the protobuf format
 echo
-$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_3dim:ABC" --input "$BINARY_FILE_PATH"
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_array_3dim:ABC" --input "$BINARY_FILE_PATH"
 
 # Check the input in the protobuf format (now the table contains the same data twice).
 echo
-$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_3dim:ABC'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_array_3dim:ABC'" < "$BINARY_FILE_PATH"
 $CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825"
 
 rm "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "DROP TABLE array_3dim_protobuf_00825"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
index 0b386723091..243446f9438 100755
--- a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
@@ -3,18 +3,21 @@
 # https://github.com/ClickHouse/ClickHouse/issues/9069
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+SCHEMADIR=$CURDIR/format_schemas
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
 set -eo pipefail
 
 # Run the client.
-$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+$CLICKHOUSE_CLIENT --multiquery <<EOF
+DROP TABLE IF EXISTS array_of_arrays_protobuf_00825;
+
 CREATE TABLE array_of_arrays_protobuf_00825
 (
-    `a` String,
-    `b` Nested (
-        `c` Array(Float64)
+    a String,
+    b Nested (
+        c Array(Float64)
     )
 ) ENGINE = MergeTree ORDER BY tuple();
 
@@ -24,15 +27,16 @@ SELECT * FROM array_of_arrays_protobuf_00825;
 EOF
 
 BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_of_arrays.XXXXXX.binary")
-$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_of_arrays:AA'" > "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_array_of_arrays:AA'" > "$BINARY_FILE_PATH"
 
 # Check the output in the protobuf format
 echo
-$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_of_arrays:AA" --input "$BINARY_FILE_PATH"
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_array_of_arrays:AA" --input "$BINARY_FILE_PATH"
 
 # Check the input in the protobuf format (now the table contains the same data twice).
 echo
-$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_of_arrays:AA'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_array_of_arrays:AA'" < "$BINARY_FILE_PATH"
 $CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825"
 
 rm "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "DROP TABLE array_of_arrays_protobuf_00825"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
index cbb387a62a5..300f82e5ca2 100755
--- a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
@@ -3,13 +3,14 @@
 # https://github.com/ClickHouse/ClickHouse/issues/7438
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+SCHEMADIR=$CURDIR/format_schemas
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
 set -eo pipefail
 
 # Run the client.
-$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+$CLICKHOUSE_CLIENT --multiquery <<EOF
 DROP TABLE IF EXISTS enum_mapping_protobuf_00825;
 
 CREATE TABLE enum_mapping_protobuf_00825
@@ -23,15 +24,16 @@ SELECT * FROM enum_mapping_protobuf_00825;
 EOF
 
 BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_enum_mapping.XXXXXX.binary")
-$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_enum_mapping:Message'" > "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_enum_mapping:Message'" > "$BINARY_FILE_PATH"
 
 # Check the output in the protobuf format
 echo
-$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_enum_mapping:Message" --input "$BINARY_FILE_PATH"
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_enum_mapping:Message" --input "$BINARY_FILE_PATH"
 
 # Check the input in the protobuf format (now the table contains the same data twice).
 echo
-$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_enum_mapping:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_enum_mapping:Message'" < "$BINARY_FILE_PATH"
 $CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825"
 
 rm "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "DROP TABLE enum_mapping_protobuf_00825"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_input.insh b/tests/queries/0_stateless/00825_protobuf_format_input.insh
deleted file mode 100644
index 39a2f17c98f..00000000000
--- a/tests/queries/0_stateless/00825_protobuf_format_input.insh
+++ /dev/null
@@ -1,5 +0,0 @@
-echo -ne '\xf3\x01\x0a\x24\x61\x37\x35\x32\x32\x31\x35\x38\x2d\x33\x64\x34\x31\x2d\x34\x62\x37\x37\x2d\x61\x64\x36\x39\x2d\x36\x63\x35\x39\x38\x65\x65\x35\x35\x63\x34\x39\x12\x04\x49\x76\x61\x6e\x1a\x06\x50\x65\x74\x72\x6f\x76\x20\x01\x28\xaf\x1f\x32\x03\x70\x6e\x67\x3a\x0c\x2b\x37\x34\x39\x35\x31\x32\x33\x34\x35\x36\x37\x40\x01\x4d\xfc\xd0\x30\x5c\x50\x26\x58\x09\x62\x09\x59\x65\x73\x74\x65\x72\x64\x61\x79\x62\x07\x46\x6c\x6f\x77\x65\x72\x73\x6a\x04\xff\x01\x00\x00\x72\x06\x4d\x6f\x73\x63\x6f\x77\x7a\x08\x4b\x03\x5f\x42\x72\x7d\x16\x42\x81\x01\x1f\x85\xeb\x51\xb8\x1e\x09\x40\x89\x01\x33\x33\x33\x33\x33\xc3\x6a\x40\x95\x01\xcd\xcc\xcc\x3d\x9d\x01\x9a\x99\xb9\x40\xa0\x01\x80\xc4\xd7\x8d\x7f\xaa\x01\x0c\x0a\x05\x6d\x65\x74\x65\x72\x15\x00\x00\x80\x3f\xaa\x01\x11\x0a\x0a\x63\x65\x6e\x74\x69\x6d\x65\x74\x65\x72\x15\x0a\xd7\x23\x3c\xaa\x01\x10\x0a\x09\x6b\x69\x6c\x6f\x6d\x65\x74\x65\x72\x15\x00\x00\x7a\x44\xb2\x01\x10\x0a\x0e\xa2\x06\x0b\x0a\x09\x08\xf4\x03\x12\x04\xf5\x03\xf6\x03\x7e\x0a\x24\x63\x36\x39\x34\x61\x64\x38\x61\x2d\x66\x37\x31\x34\x2d\x34\x65\x61\x33\x2d\x39\x30\x37\x64\x2d\x66\x64\x35\x34\x66\x62\x32\x35\x64\x39\x62\x35\x12\x07\x4e\x61\x74\x61\x6c\x69\x61\x1a\x08\x53\x6f\x6b\x6f\x6c\x6f\x76\x61\x28\xa6\x3f\x32\x03\x6a\x70\x67\x50\x1a\x58\x0b\x6a\x04\x64\xc8\x01\x32\x72\x08\x50\x6c\x79\x6d\x6f\x75\x74\x68\x7a\x08\x6a\x9d\x49\x42\x46\x8c\x84\xc0\x81\x01\x6e\x86\x1b\xf0\xf9\x21\x09\x40\x95\x01\x42\x60\xe5\x3b\x9d\x01\xcd\xcc\xac\x40\xa0\x01\xff\xff\xa9\xce\x93\x8c\x09\xc0\x01\x0a\x24\x61\x37\x64\x61\x31\x61\x61\x36\x2d\x66\x34\x32\x35\x2d\x34\x37\x38\x39\x2d\x38\x39\x34\x37\x2d\x62\x30\x33\x34\x37\x38\x36\x65\x64\x33\x37\x34\x12\x06\x56\x61\x73\x69\x6c\x79\x1a\x07\x53\x69\x64\x6f\x72\x6f\x76\x20\x01\x28\xfb\x48\x32\x03\x62\x6d\x70\x3a\x0d\x2b\x34\x34\x32\x30\x31\x32\x33\x34\x35\x36\x37\x38\x40\x01\x4d\x50\xe0\x27\x5c\x50\x17\x58\x04\x62\x05\x53\x75\x6e\x6e\x79\x6a\x05\xfa\x01\xf4\x01\x0a\x72\x08\x4d\x75\x72\x6d\x61\x6e\x73\x6b\x7a\x08\xfd\xf0\x89\x42\xc8\x4c\x04\x42\x81\x01\x11\x2d\x44\x54\xfb\x21\x09\x40\x89\x01\x00\x00\x00\xe8\x76\x48\x37\x42\x95\x01\x00\x00\x48\x44\x9d\x01\xcd\xcc\x4c\xc0\xa0\x01\x80\xd4\x9f\x93\x01\xaa\x01\x0c\x0a\x05\x70\x6f\x75\x6e\x64\x15\x00\x00\x80\x41\xb2\x01\x0a\x0a\x08\xa2\x06\x05\x0a\x03\x08\xf7\x03' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'"
-echo -ne '\xb3\x01\x12\x05\x46\x72\x69\x64\x61\x28\x99\xe1\xf3\xd1\x0b\x52\x08\x45\x72\x6d\x61\x6b\x6f\x76\x61\x72\x0c\x00\x00\xdc\x42\x00\x00\x52\x43\x00\x00\x94\x42\x79\x48\xce\x3d\x51\x00\x00\x00\x00\xc8\x02\x14\xc2\x05\x08\x00\x00\x80\x44\x00\x00\x80\x49\x9a\x06\x02\x4b\x42\x9a\x06\x02\x4d\x42\xa1\x06\x00\x00\x00\x00\x00\x00\xe0\x3f\xa8\x06\x2a\xa8\x06\xa8\xff\xff\xff\xff\xff\xff\xff\xff\x01\xb0\x06\x01\xbd\x06\x25\x06\x49\x40\xfa\x06\x02\x34\x30\x90\x08\xe2\x08\xe1\x08\x89\xe6\x6e\xdd\x01\x00\x00\x00\xb0\x09\xc3\x19\xd0\x0c\xb7\x02\xe2\x12\x24\x32\x30\x66\x63\x64\x39\x35\x61\x2d\x33\x33\x32\x64\x2d\x34\x31\x64\x62\x2d\x61\x39\x65\x63\x2d\x31\x36\x31\x66\x36\x34\x34\x64\x30\x35\x39\x63\xa0\x38\xbc\x05\xaa\x38\x02\xbd\x05\xb4\x01\x08\x01\x12\x06\x49\x73\x6f\x6c\x64\x65\x52\x07\x4c\x61\x76\x72\x6f\x76\x61\x72\x0c\x00\x00\x7f\x43\x00\x00\x00\x00\x00\x00\x7f\x43\xaa\x01\x03\x61\x62\x63\xc8\x02\x32\xc2\x05\x08\x00\x00\x00\x41\x00\x00\x80\x3f\x9a\x06\x04\x42\x79\x74\x65\x9a\x06\x03\x42\x69\x74\xa1\x06\x00\x00\x00\x00\x00\x00\x12\x40\xa8\x06\x1a\xa8\x06\xb0\xff\xff\xff\xff\xff\xff\xff\xff\x01\xb0\x06\x01\xbd\x06\xf9\x0f\x49\x40\xc2\x06\x01\x2c\xfa\x06\x02\x33\x32\x90\x08\x78\xe1\x08\x39\x4e\x2b\xfe\xe4\xf5\xff\xff\xb0\x09\xe8\x30\xd8\x12\x01\xe2\x12\x24\x37\x63\x66\x61\x36\x38\x35\x36\x2d\x61\x35\x34\x61\x2d\x34\x37\x38\x36\x2d\x62\x38\x65\x35\x2d\x37\x34\x35\x31\x35\x39\x64\x35\x32\x32\x37\x38\xa0\x38\xbe\x05\xc2\x3e\x05\x15\x00\x00\xb6\x42' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:AltPerson'"
-echo -ne '\xa5\x02\x0a\x24\x61\x61\x30\x65\x35\x61\x30\x36\x2d\x63\x61\x62\x32\x2d\x34\x30\x33\x34\x2d\x61\x36\x61\x32\x2d\x34\x38\x65\x38\x32\x62\x39\x31\x36\x36\x34\x65\x12\x06\x4c\x65\x6f\x6e\x69\x64\x1a\x08\x4b\x69\x72\x69\x6c\x6c\x6f\x76\x22\x04\x6d\x61\x6c\x65\x2a\x0a\x31\x39\x38\x33\x2d\x30\x36\x2d\x32\x34\x3a\x0c\x2b\x37\x34\x39\x35\x30\x32\x37\x35\x38\x36\x34\x42\x01\x31\x4a\x13\x32\x30\x31\x39\x2d\x30\x32\x2d\x30\x34\x20\x30\x39\x3a\x34\x35\x3a\x30\x30\x52\x02\x33\x35\x5a\x06\x63\x61\x6e\x63\x65\x72\x62\x07\x37\x20\x72\x69\x6e\x67\x73\x62\x08\x45\x61\x73\x74\x73\x69\x64\x65\x62\x0b\x4c\x61\x73\x74\x20\x48\x75\x72\x72\x61\x68\x6a\x01\x30\x6a\x01\x30\x6a\x03\x32\x35\x35\x72\x09\x53\x61\x6e\x20\x44\x69\x65\x67\x6f\x7a\x09\x33\x32\x2e\x38\x32\x33\x39\x34\x33\x7a\x0b\x2d\x31\x31\x37\x2e\x30\x38\x31\x33\x32\x37\x82\x01\x09\x33\x2e\x31\x34\x31\x35\x39\x32\x37\x8a\x01\x08\x31\x35\x30\x30\x30\x30\x30\x30\x92\x01\x06\x31\x38\x36\x2e\x37\x35\x9a\x01\x04\x2d\x32\x2e\x31\xa2\x01\x0b\x32\x30\x36\x35\x39\x38\x32\x39\x33\x33\x31\xaa\x01\x18\x0a\x06\x6d\x69\x6e\x75\x74\x65\x0a\x04\x68\x6f\x75\x72\x12\x02\x36\x30\x12\x04\x33\x36\x30\x30\xb2\x01\x08\x0a\x06\x12\x04\x31\x38\x30\x30' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:StrPerson'"
-echo -ne '\xdd\x01\x0a\x24\x33\x66\x61\x65\x65\x30\x36\x34\x2d\x63\x34\x66\x37\x2d\x34\x64\x33\x34\x2d\x62\x36\x66\x33\x2d\x38\x64\x38\x31\x63\x32\x62\x36\x61\x31\x35\x64\x12\x04\x4e\x69\x63\x6b\x1a\x0a\x4b\x6f\x6c\x65\x73\x6e\x69\x6b\x6f\x76\x20\x01\x28\xda\x52\x32\x03\x62\x6d\x70\x3a\x0c\x34\x31\x32\x2d\x36\x38\x37\x2d\x35\x30\x30\x37\x40\x01\x4d\x2f\x27\xf2\x5b\x50\x14\x58\x09\x62\x06\x48\x61\x76\x61\x6e\x61\x68\x80\x01\x68\x00\x68\x80\x01\x72\x0a\x50\x69\x74\x74\x73\x62\x75\x72\x67\x68\x7a\x08\x9b\x11\x22\x42\x1f\xe6\x9f\xc2\x81\x01\x28\x2d\x44\x54\xfb\x21\x09\x40\x89\x01\x00\x00\x00\xe8\x76\x48\x27\x42\x95\x01\x00\x00\x43\x44\x9d\x01\x66\x66\x92\x41\xa0\x01\xce\xdf\xb8\xba\x01\xab\x01\x0d\xcd\xcc\xe2\x41\x0d\xcd\xcc\x4c\x3e\x0d\x00\x00\x80\x3f\x12\x05\x6f\x75\x6e\x63\x65\x12\x05\x63\x61\x72\x61\x74\x12\x04\x67\x72\x61\x6d\xac\x01\xb3\x01\x0b\xa2\x06\x05\x0b\x08\x96\x4a\x0c\x0c\xb4\x01' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_syntax2:Syntax2Person'"
-echo -ne '\x04\x08\x02\x10\x04\x00\x04\x08\x03\x10\x09' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_squares_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare'"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_input.reference b/tests/queries/0_stateless/00825_protobuf_format_input.reference
deleted file mode 100644
index 75fef3f8ac3..00000000000
--- a/tests/queries/0_stateless/00825_protobuf_format_input.reference
+++ /dev/null
@@ -1,18 +0,0 @@
-a7da1aa6-f425-4789-8947-b034786ed374	Vasily	Sidorov	male	1995-07-28	bmp	+442012345678	1	2018-12-30 00:00:00	23	leo	['Sunny']	[250,244,10]	Murmansk	[68.970680,33.074982]	3.14159265358979	100000000000.00	800	-3.2	154400000	['pound']	[16]	503	[]
-c694ad8a-f714-4ea3-907d-fd54fb25d9b5	Natalia	Sokolova	female	1992-03-08	jpg	\N	0	\N	26	pisces	[]	[100,200,50]	Plymouth	[50.403724,-4.142123]	3.14159	\N	0.007	5.4	-20000000000000	[]	[]	\N	[]
-aa0e5a06-cab2-4034-a6a2-48e82b91664e	Leonid	Kirillov	male	1983-06-24	\N	+74950275864\0	1	2019-02-04 09:45:00	35	cancer	['7 rings','Eastside','Last Hurrah']	[0,0,255]	San Diego	[32.823943,-117.081327]	3.1415927	15000000.00	186.75	-2.1	20659829331	['minute','hour']	[60,3600]	\N	[1800]
-20fcd95a-332d-41db-a9ec-161f644d059c	Frida	Ermakova	female	1978-12-12	\N	3124555929\0\0\0	0	2013-03-11 16:30:00	40	sagittarius	[]	[110,210,74]		[42.000000,-88.000000]	3.1410000324249268	311.00	0.5	10.0	8010000009	['KB','MB']	[1024,1048576]	700	[701]
-a7522158-3d41-4b77-ad69-6c598ee55c49	Ivan	Petrov	male	1980-12-29	png	+74951234567\0	1	2019-01-05 18:45:00	38	capricorn	['Yesterday','Flowers']	[255,0,0]	Moscow	[55.753216,37.622504]	3.14	214.10	0.1	5.8	17060000000	['meter','centimeter','kilometer']	[1,0.01,1000]	500	[501,502]
-3faee064-c4f7-4d34-b6f3-8d81c2b6a15d	Nick	Kolesnikov	male	1998-12-26	bmp	412-687-5007\0	1	2018-11-19 05:59:59	20	capricorn	['Havana']	[128,0,128]	Pittsburgh	[40.517192,-79.949456]	3.1415926535898	50000000000.00	780	18.3	195500007	['ounce','carat','gram']	[28.35,0.2,1]	9494	[]
-7cfa6856-a54a-4786-b8e5-745159d52278	Isolde	Lavrova	female	1987-02-09	\N	\N	1	\N	32	aquarius	[]	[255,0,255]		[26.000000,-80.000000]	3.1415998935699463	\N	4.5	25.0	-11111111111111	['Byte','Bit']	[8,1]	702	[]
-0	0
-2	4
-3	9
-a7da1aa6-f425-4789-8947-b034786ed374	Vasily	Sidorov	male	1995-07-28	bmp	+442012345678	1	2018-12-30 00:00:00	23	leo	['Sunny']	[250,244,10]	Murmansk	[68.970680,33.074982]	3.14159265358979	100000000000.00	800	-3.2	154400000	['pound']	[16]	503	[]
-c694ad8a-f714-4ea3-907d-fd54fb25d9b5	Natalia	Sokolova	female	1992-03-08	jpg	\N	0	\N	26	pisces	[]	[100,200,50]	Plymouth	[50.403724,-4.142123]	3.14159	\N	0.007	5.4	-20000000000000	[]	[]	\N	[]
-a7522158-3d41-4b77-ad69-6c598ee55c49	Ivan	Petrov	male	1980-12-29	png	+74951234567\0	1	2019-01-05 18:45:00	38	capricorn	['Yesterday','Flowers']	[255,0,0]	Moscow	[55.753216,37.622504]	3.14	214.10	0.1	5.8	17060000000	['meter','centimeter','kilometer']	[1,0.01,1000]	500	[501,502]
-3faee064-c4f7-4d34-b6f3-8d81c2b6a15d	Nick	Kolesnikov	male	1998-12-26	bmp	412-687-5007\0	1	2018-11-19 05:59:59	20	capricorn	['Havana']	[128,0,128]	Pittsburgh	[40.517192,-79.949456]	3.1415926535898	50000000000.00	780	18.3	195500007	['ounce','carat','gram']	[28.35,0.2,1]	9494	[]
-2	4
-3	9
-ok
-ok
diff --git a/tests/queries/0_stateless/00825_protobuf_format_input.sh b/tests/queries/0_stateless/00825_protobuf_format_input.sh
deleted file mode 100755
index 5a85a852cb1..00000000000
--- a/tests/queries/0_stateless/00825_protobuf_format_input.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/bin/env bash
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-set -eo pipefail
-
-# Run the client.
-$CLICKHOUSE_CLIENT --multiquery <<'EOF'
-DROP TABLE IF EXISTS in_persons_00825;
-DROP TABLE IF EXISTS in_squares_00825;
-
-CREATE TABLE in_persons_00825 (uuid UUID,
-                               name String,
-                               surname String,
-                               gender Enum8('male'=1, 'female'=0),
-                               birthDate Date,
-                               photo Nullable(String),
-                               phoneNumber Nullable(FixedString(13)),
-                               isOnline UInt8,
-                               visitTime Nullable(DateTime),
-                               age UInt8,
-                               zodiacSign Enum16('aries'=321, 'taurus'=420, 'gemini'=521, 'cancer'=621, 'leo'=723, 'virgo'=823,
-                                                 'libra'=923, 'scorpius'=1023, 'sagittarius'=1122, 'capricorn'=1222, 'aquarius'=120,
-                                                 'pisces'=219),
-                               songs Array(String),
-                               color Array(UInt8),
-                               hometown LowCardinality(String),
-                               location Array(Decimal32(6)),
-                               pi Nullable(Float64),
-                               lotteryWin Nullable(Decimal64(2)),
-                               someRatio Float32,
-                               temperature Decimal32(1),
-                               randomBigNumber Int64,
-                               measureUnits Nested (unit String, coef Float32),
-                               nestiness_a_b_c_d Nullable(UInt32),
-                               `nestiness_a_B.c_E` Array(UInt32)
-                              ) ENGINE = MergeTree ORDER BY tuple();
-
-CREATE TABLE in_squares_00825 (number UInt32, square UInt32) ENGINE = MergeTree ORDER BY tuple();
-EOF
-
-# To generate the file 00825_protobuf_format_input.insh use the following commands:
-# ninja ProtobufDelimitedMessagesSerializer
-# build/utils/test-data-generator/ProtobufDelimitedMessagesSerializer
-# shellcheck source=./00825_protobuf_format_input.insh
-source "$CURDIR"/00825_protobuf_format_input.insh
-
-$CLICKHOUSE_CLIENT --query "SELECT * FROM in_persons_00825 ORDER BY uuid;"
-$CLICKHOUSE_CLIENT --query "SELECT * FROM in_squares_00825 ORDER BY number;"
-
-$CLICKHOUSE_CLIENT --query "TRUNCATE TABLE in_persons_00825;"
-$CLICKHOUSE_CLIENT --query "TRUNCATE TABLE in_squares_00825;"
-
-# shellcheck source=./00825_protobuf_format_input_single.insh
-source "$CURDIR"/00825_protobuf_format_input_single.insh
-
-$CLICKHOUSE_CLIENT --query "SELECT * FROM in_persons_00825 ORDER BY uuid;"
-$CLICKHOUSE_CLIENT --query "SELECT * FROM in_squares_00825 ORDER BY number;"
-
-# Try to input malformed data.
-set +eo pipefail
-echo -ne '\xe0\x80\x3f\x0b' \
-    | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'" 2>&1 \
-    | grep -qF "Protobuf messages are corrupted" && echo "ok" || echo "fail"
-set -eo pipefail
-
-# Try to input malformed data for ProtobufSingle
-set +eo pipefail
-echo -ne '\xff\xff\x3f\x0b' \
-    | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'" 2>&1 \
-    | grep -qF "Protobuf messages are corrupted" && echo "ok" || echo "fail"
-set -eo pipefail
-
-$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_persons_00825;"
-$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_squares_00825;"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_input_single.insh b/tests/queries/0_stateless/00825_protobuf_format_input_single.insh
deleted file mode 100644
index 6c4dfec05aa..00000000000
--- a/tests/queries/0_stateless/00825_protobuf_format_input_single.insh
+++ /dev/null
@@ -1,12 +0,0 @@
-echo -ne '\x0a\x24\x61\x37\x35\x32\x32\x31\x35\x38\x2d\x33\x64\x34\x31\x2d\x34\x62\x37\x37\x2d\x61\x64\x36\x39\x2d\x36\x63\x35\x39\x38\x65\x65\x35\x35\x63\x34\x39\x12\x04\x49\x76\x61\x6e\x1a\x06\x50\x65\x74\x72\x6f\x76\x20\x01\x28\xaf\x1f\x32\x03\x70\x6e\x67\x3a\x0c\x2b\x37\x34\x39\x35\x31\x32\x33\x34\x35\x36\x37\x40\x01\x4d\xfc\xd0\x30\x5c\x50\x26\x58\x09\x62\x09\x59\x65\x73\x74\x65\x72\x64\x61\x79\x62\x07\x46\x6c\x6f\x77\x65\x72\x73\x6a\x04\xff\x01\x00\x00\x72\x06\x4d\x6f\x73\x63\x6f\x77\x7a\x08\x4b\x03\x5f\x42\x72\x7d\x16\x42\x81\x01\x1f\x85\xeb\x51\xb8\x1e\x09\x40\x89\x01\x33\x33\x33\x33\x33\xc3\x6a\x40\x95\x01\xcd\xcc\xcc\x3d\x9d\x01\x9a\x99\xb9\x40\xa0\x01\x80\xc4\xd7\x8d\x7f\xaa\x01\x0c\x0a\x05\x6d\x65\x74\x65\x72\x15\x00\x00\x80\x3f\xaa\x01\x11\x0a\x0a\x63\x65\x6e\x74\x69\x6d\x65\x74\x65\x72\x15\x0a\xd7\x23\x3c\xaa\x01\x10\x0a\x09\x6b\x69\x6c\x6f\x6d\x65\x74\x65\x72\x15\x00\x00\x7a\x44\xb2\x01\x10\x0a\x0e\xa2\x06\x0b\x0a\x09\x08\xf4\x03\x12\x04\xf5\x03\xf6\x03' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'"
-echo -ne '\x0a\x24\x63\x36\x39\x34\x61\x64\x38\x61\x2d\x66\x37\x31\x34\x2d\x34\x65\x61\x33\x2d\x39\x30\x37\x64\x2d\x66\x64\x35\x34\x66\x62\x32\x35\x64\x39\x62\x35\x12\x07\x4e\x61\x74\x61\x6c\x69\x61\x1a\x08\x53\x6f\x6b\x6f\x6c\x6f\x76\x61\x28\xa6\x3f\x32\x03\x6a\x70\x67\x50\x1a\x58\x0b\x6a\x04\x64\xc8\x01\x32\x72\x08\x50\x6c\x79\x6d\x6f\x75\x74\x68\x7a\x08\x6a\x9d\x49\x42\x46\x8c\x84\xc0\x81\x01\x6e\x86\x1b\xf0\xf9\x21\x09\x40\x95\x01\x42\x60\xe5\x3b\x9d\x01\xcd\xcc\xac\x40\xa0\x01\xff\xff\xa9\xce\x93\x8c\x09' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'"
-echo -ne '\x0a\x24\x61\x37\x64\x61\x31\x61\x61\x36\x2d\x66\x34\x32\x35\x2d\x34\x37\x38\x39\x2d\x38\x39\x34\x37\x2d\x62\x30\x33\x34\x37\x38\x36\x65\x64\x33\x37\x34\x12\x06\x56\x61\x73\x69\x6c\x79\x1a\x07\x53\x69\x64\x6f\x72\x6f\x76\x20\x01\x28\xfb\x48\x32\x03\x62\x6d\x70\x3a\x0d\x2b\x34\x34\x32\x30\x31\x32\x33\x34\x35\x36\x37\x38\x40\x01\x4d\x50\xe0\x27\x5c\x50\x17\x58\x04\x62\x05\x53\x75\x6e\x6e\x79\x6a\x05\xfa\x01\xf4\x01\x0a\x72\x08\x4d\x75\x72\x6d\x61\x6e\x73\x6b\x7a\x08\xfd\xf0\x89\x42\xc8\x4c\x04\x42\x81\x01\x11\x2d\x44\x54\xfb\x21\x09\x40\x89\x01\x00\x00\x00\xe8\x76\x48\x37\x42\x95\x01\x00\x00\x48\x44\x9d\x01\xcd\xcc\x4c\xc0\xa0\x01\x80\xd4\x9f\x93\x01\xaa\x01\x0c\x0a\x05\x70\x6f\x75\x6e\x64\x15\x00\x00\x80\x41\xb2\x01\x0a\x0a\x08\xa2\x06\x05\x0a\x03\x08\xf7\x03' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'"
-echo -ne '\x0a\x24\x33\x66\x61\x65\x65\x30\x36\x34\x2d\x63\x34\x66\x37\x2d\x34\x64\x33\x34\x2d\x62\x36\x66\x33\x2d\x38\x64\x38\x31\x63\x32\x62\x36\x61\x31\x35\x64\x12\x04\x4e\x69\x63\x6b\x1a\x0a\x4b\x6f\x6c\x65\x73\x6e\x69\x6b\x6f\x76\x20\x01\x28\xda\x52\x32\x03\x62\x6d\x70\x3a\x0c\x34\x31\x32\x2d\x36\x38\x37\x2d\x35\x30\x30\x37\x40\x01\x4d\x2f\x27\xf2\x5b\x50\x14\x58\x09\x62\x06\x48\x61\x76\x61\x6e\x61\x68\x80\x01\x68\x00\x68\x80\x01\x72\x0a\x50\x69\x74\x74\x73\x62\x75\x72\x67\x68\x7a\x08\x9b\x11\x22\x42\x1f\xe6\x9f\xc2\x81\x01\x28\x2d\x44\x54\xfb\x21\x09\x40\x89\x01\x00\x00\x00\xe8\x76\x48\x27\x42\x95\x01\x00\x00\x43\x44\x9d\x01\x66\x66\x92\x41\xa0\x01\xce\xdf\xb8\xba\x01\xab\x01\x0d\xcd\xcc\xe2\x41\x0d\xcd\xcc\x4c\x3e\x0d\x00\x00\x80\x3f\x12\x05\x6f\x75\x6e\x63\x65\x12\x05\x63\x61\x72\x61\x74\x12\x04\x67\x72\x61\x6d\xac\x01\xb3\x01\x0b\xa2\x06\x05\x0b\x08\x96\x4a\x0c\x0c\xb4\x01' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format_syntax2:Syntax2Person'"
-
-echo -ne '\x08\x02\x10\x04' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_squares_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare'"
-echo -ne '\x08\x03\x10\x09' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_squares_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare'"
-
-### Actually empty Protobuf message is a valid message (with all values default).
-### It will work in Kafka but clickhouse-client forbids that:
-### Code: 108. DB::Exception: No data to insert
-## echo -ne '' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_squares_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare'"
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.sh b/tests/queries/0_stateless/00825_protobuf_format_map.sh
index 5df25c41750..877fe73603e 100755
--- a/tests/queries/0_stateless/00825_protobuf_format_map.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_map.sh
@@ -1,31 +1,30 @@
 #!/usr/bin/env bash
 
-# https://github.com/ClickHouse/ClickHouse/issues/6497
-
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+SCHEMADIR=$CURDIR/format_schemas
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
 set -eo pipefail
 
 # Run the client.
-$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+$CLICKHOUSE_CLIENT --multiquery <<EOF
 SET allow_experimental_map_type = 1;
 
-DROP TABLE IF EXISTS map_00825;
+DROP TABLE IF EXISTS map_protobuf_00825;
 
-CREATE TABLE map_00825
+CREATE TABLE map_protobuf_00825
 (
   a Map(String, UInt32)
 ) ENGINE = MergeTree ORDER BY tuple();
 
-INSERT INTO map_00825 VALUES ({'x':5, 'y':7}), ({'z':11}), ({'temp':0}), ({'':0});
+INSERT INTO map_protobuf_00825 VALUES ({'x':5, 'y':7}), ({'z':11}), ({'temp':0}), ({'':0});
 
-SELECT * FROM map_00825;
+SELECT * FROM map_protobuf_00825;
 EOF
 
 BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_map.XXXXXX.binary")
-$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_map:Message'" > "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM map_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_map:Message'" > "$BINARY_FILE_PATH"
 
 # Check the output in the protobuf format
 echo
@@ -34,7 +33,8 @@ hexdump -C $BINARY_FILE_PATH
 
 # Check the input in the protobuf format (now the table contains the same data twice).
 echo
-$CLICKHOUSE_CLIENT --query "INSERT INTO map_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_map:Message'" < "$BINARY_FILE_PATH"
-$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825"
+$CLICKHOUSE_CLIENT --query "INSERT INTO map_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_map:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM map_protobuf_00825"
 
 rm "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "DROP TABLE map_protobuf_00825"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_in_nested.reference b/tests/queries/0_stateless/00825_protobuf_format_nested_in_nested.reference
new file mode 100644
index 00000000000..08fb4696506
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_in_nested.reference
@@ -0,0 +1,52 @@
+[[(1),(2)],[(3),(4),(5)]]
+[[(6)]]
+[[]]
+[]
+
+Binary representation:
+00000000  18 0a 08 12 02 18 01 12  02 18 02 0a 0c 12 02 18  |................|
+00000010  03 12 02 18 04 12 02 18  05 06 0a 04 12 02 18 06  |................|
+00000020  02 0a 00 00                                       |....|
+00000024
+
+MESSAGE #1 AT 0x00000001
+x {
+  y {
+    z: 1
+  }
+  y {
+    z: 2
+  }
+}
+x {
+  y {
+    z: 3
+  }
+  y {
+    z: 4
+  }
+  y {
+    z: 5
+  }
+}
+MESSAGE #2 AT 0x0000001A
+x {
+  y {
+    z: 6
+  }
+}
+MESSAGE #3 AT 0x00000021
+x {
+}
+MESSAGE #4 AT 0x00000024
+
+Binary representation is as expected
+
+[[(1),(2)],[(3),(4),(5)]]
+[[(6)]]
+[[]]
+[]
+[[(1),(2)],[(3),(4),(5)]]
+[[(6)]]
+[[]]
+[]
diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_in_nested.sh b/tests/queries/0_stateless/00825_protobuf_format_nested_in_nested.sh
new file mode 100755
index 00000000000..9d382f8c589
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_in_nested.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/11117
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+SCHEMADIR=$CURDIR/format_schemas
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<EOF
+DROP TABLE IF EXISTS nested_in_nested_protobuf_00825;
+
+CREATE TABLE nested_in_nested_protobuf_00825 (x Nested (y Nested (z Int64))) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO nested_in_nested_protobuf_00825 VALUES ([[(1),(2)],[(3),(4),(5)]]), ([[(6)]]), ([[]]), ([]);
+
+SELECT * FROM nested_in_nested_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_in_nested.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_in_nested_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_nested_in_nested:MessageType'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_nested_in_nested:MessageType" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO nested_in_nested_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_nested_in_nested:MessageType'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_in_nested_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "DROP TABLE nested_in_nested_protobuf_00825"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
index 58ded92f2c1..b33db75b5c9 100755
--- a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
@@ -3,13 +3,14 @@
 # https://github.com/ClickHouse/ClickHouse/issues/6497
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+SCHEMADIR=$CURDIR/format_schemas
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
 set -eo pipefail
 
 # Run the client.
-$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+$CLICKHOUSE_CLIENT --multiquery <<EOF
 DROP TABLE IF EXISTS nested_optional_protobuf_00825;
 
 CREATE TABLE nested_optional_protobuf_00825
@@ -27,15 +28,16 @@ SELECT * FROM nested_optional_protobuf_00825;
 EOF
 
 BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_optional.XXXXXX.binary")
-$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_nested_optional:Message'" > "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_nested_optional:Message'" > "$BINARY_FILE_PATH"
 
 # Check the output in the protobuf format
 echo
-$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_nested_optional:Message" --input "$BINARY_FILE_PATH"
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_nested_optional:Message" --input "$BINARY_FILE_PATH"
 
 # Check the input in the protobuf format (now the table contains the same data twice).
 echo
-$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_nested_optional:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_nested_optional:Message'" < "$BINARY_FILE_PATH"
 $CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825"
 
 rm "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "DROP TABLE nested_optional_protobuf_00825"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_no_length_delimiter.reference b/tests/queries/0_stateless/00825_protobuf_format_no_length_delimiter.reference
new file mode 100644
index 00000000000..ba84d41eb51
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_no_length_delimiter.reference
@@ -0,0 +1,13 @@
+1000	1K
+2000	2K
+3000	3K
+
+Binary representation:
+00000000  08 e8 07 12 02 31 4b                              |.....1K|
+00000007
+
+x: 1000
+str: "1K"
+
+Roundtrip:
+1000	1K
diff --git a/tests/queries/0_stateless/00825_protobuf_format_no_length_delimiter.sh b/tests/queries/0_stateless/00825_protobuf_format_no_length_delimiter.sh
new file mode 100755
index 00000000000..b95d35e8256
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_no_length_delimiter.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+SCHEMADIR=$CURDIR/format_schemas
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<EOF
+DROP TABLE IF EXISTS no_length_delimiter_protobuf_00825;
+DROP TABLE IF EXISTS roundtrip_no_length_delimiter_protobuf_00825;
+
+CREATE TABLE no_length_delimiter_protobuf_00825
+(
+  x Int32,
+  str String
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO no_length_delimiter_protobuf_00825 VALUES (1000, '1K'), (2000, '2K'), (3000, '3K');
+SELECT * FROM no_length_delimiter_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_no_length_delimiter.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM no_length_delimiter_protobuf_00825 LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_no_length_delimiter:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the ProtobufSingle format
+echo
+echo "Binary representation:"
+hexdump -C $BINARY_FILE_PATH
+
+echo
+(cd $SCHEMADIR && protoc --decode Message 00825_protobuf_format_no_length_delimiter.proto) < $BINARY_FILE_PATH
+
+# Check the input in the ProtobufSingle format.
+echo
+echo "Roundtrip:"
+$CLICKHOUSE_CLIENT --query "CREATE TABLE roundtrip_no_length_delimiter_protobuf_00825 AS no_length_delimiter_protobuf_00825"
+$CLICKHOUSE_CLIENT --query "INSERT INTO roundtrip_no_length_delimiter_protobuf_00825 FORMAT ProtobufSingle SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_no_length_delimiter:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM roundtrip_no_length_delimiter_protobuf_00825"
+rm "$BINARY_FILE_PATH"
+
+# The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.
+$CLICKHOUSE_CLIENT --multiquery --testmode > /dev/null <<EOF
+SELECT * FROM no_length_delimiter_protobuf_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_no_length_delimiter:Message'; -- { clientError 546 }
+EOF
+
+$CLICKHOUSE_CLIENT --multiquery <<EOF
+DROP TABLE no_length_delimiter_protobuf_00825;
+DROP TABLE roundtrip_no_length_delimiter_protobuf_00825;
+EOF
diff --git a/tests/queries/0_stateless/00825_protobuf_format_output.reference b/tests/queries/0_stateless/00825_protobuf_format_output.reference
deleted file mode 100644
index f0e0ac58a5e..00000000000
Binary files a/tests/queries/0_stateless/00825_protobuf_format_output.reference and /dev/null differ
diff --git a/tests/queries/0_stateless/00825_protobuf_format_output.sh b/tests/queries/0_stateless/00825_protobuf_format_output.sh
deleted file mode 100755
index f2d0c60b393..00000000000
--- a/tests/queries/0_stateless/00825_protobuf_format_output.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env bash
-
-# To generate reference file for this test use the following commands:
-# ninja ProtobufDelimitedMessagesSerializer
-# build/utils/test-data-generator/ProtobufDelimitedMessagesSerializer
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-set -e -o pipefail
-
-# Run the client.
-$CLICKHOUSE_CLIENT -mnT <<EOF
-DROP TABLE IF EXISTS out_persons_00825;
-DROP TABLE IF EXISTS out_squares_00825;
-
-CREATE TABLE out_persons_00825 (uuid UUID,
-                                name String,
-                                surname String,
-                                gender Enum8('male'=1, 'female'=0),
-                                birthDate Date,
-                                photo Nullable(String),
-                                phoneNumber Nullable(FixedString(13)),
-                                isOnline UInt8,
-                                visitTime Nullable(DateTime),
-                                age UInt8,
-                                zodiacSign Enum16('aries'=321, 'taurus'=420, 'gemini'=521, 'cancer'=621, 'leo'=723, 'virgo'=823,
-                                                  'libra'=923, 'scorpius'=1023, 'sagittarius'=1122, 'capricorn'=1222, 'aquarius'=120,
-                                                  'pisces'=219),
-                                songs Array(String),
-                                color Array(UInt8),
-                                hometown LowCardinality(String),
-                                location Array(Decimal32(6)),
-                                pi Nullable(Float64),
-                                lotteryWin Nullable(Decimal64(2)),
-                                someRatio Float32,
-                                temperature Decimal32(1),
-                                randomBigNumber Int64,
-                                measureUnits Nested(unit  String, coef Float32),
-                                nestiness_a_b_c_d Nullable(UInt32),
-                                \`nestiness_a_B.c_E\` Array(UInt32)
-                               ) ENGINE = MergeTree ORDER BY tuple();
-
-CREATE TABLE out_squares_00825 (number UInt32, square UInt64) ENGINE = MergeTree ORDER BY tuple();
-
-INSERT INTO out_persons_00825 VALUES (toUUID('a7522158-3d41-4b77-ad69-6c598ee55c49'), 'Ivan', 'Petrov', 'male', toDate('1980-12-29'), 'png', '+74951234567', 1, toDateTime('2019-01-05 18:45:00'), 38, 'capricorn', ['Yesterday', 'Flowers'], [255, 0, 0], 'Moscow', [55.753215, 37.622504], 3.14, 214.10, 0.1, 5.8, 17060000000, ['meter', 'centimeter', 'kilometer'], [1, 0.01, 1000], 500, [501, 502]);
-INSERT INTO out_persons_00825 VALUES (toUUID('c694ad8a-f714-4ea3-907d-fd54fb25d9b5'), 'Natalia', 'Sokolova', 'female', toDate('1992-03-08'), 'jpg', NULL, 0, NULL, 26, 'pisces', [], [100, 200, 50], 'Plymouth', [50.403724, -4.142123], 3.14159, NULL, 0.007, 5.4, -20000000000000, [], [], NULL, []);
-INSERT INTO out_persons_00825 VALUES (toUUID('a7da1aa6-f425-4789-8947-b034786ed374'), 'Vasily', 'Sidorov', 'male', toDate('1995-07-28'), 'bmp', '+442012345678', 1, toDateTime('2018-12-30 00:00:00'), 23, 'leo', ['Sunny'], [250, 244, 10], 'Murmansk', [68.970682, 33.074981], 3.14159265358979, 100000000000, 800, -3.2, 154400000, ['pound'], [16], 503, []);
-INSERT INTO out_squares_00825 VALUES (2, 4), (0, 0), (3, 9);
-
-SELECT * FROM out_persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person';
-SELECT 'ALTERNATIVE->';
-SELECT * FROM out_persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:AltPerson';
-SELECT 'STRINGS->';
-SELECT * FROM out_persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:StrPerson';
-SELECT 'SYNTAX2->';
-SELECT * FROM out_persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_syntax2:Syntax2Person';
-SELECT 'SQUARES->';
-SELECT * FROM out_squares_00825 ORDER BY number FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare';
-
-SELECT '\n\n** ProtobufSingle **\n\n';
-
-SELECT * FROM out_persons_00825 ORDER BY name LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person';
-SELECT 'ALTERNATIVE->';
-SELECT * FROM out_persons_00825 ORDER BY name LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:AltPerson';
-SELECT 'STRINGS->';
-SELECT * FROM out_persons_00825 ORDER BY name LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:StrPerson';
-SELECT 'SYNTAX2->';
-SELECT * FROM out_persons_00825 ORDER BY name LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format_syntax2:Syntax2Person';
-SELECT 'SQUARES->';
-SELECT * FROM out_squares_00825 ORDER BY number LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare';
-
--- Code: 546, e.displayText() = DB::Exception: The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.
-SELECT * FROM out_persons_00825 ORDER BY name FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'; -- { clientError 546 }
-
-DROP TABLE IF EXISTS out_persons_00825;
-DROP TABLE IF EXISTS out_squares_00825;
-EOF
diff --git a/tests/queries/0_stateless/00825_protobuf_format_persons.reference b/tests/queries/0_stateless/00825_protobuf_format_persons.reference
new file mode 100644
index 00000000000..711980b3592
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_persons.reference
@@ -0,0 +1,569 @@
+a7522158-3d41-4b77-ad69-6c598ee55c49	Ivan	Petrov	male	1980-12-29	png	+74951234567\0	1	2019-01-05 18:45:00	38	capricorn	['Yesterday','Flowers']	[255,0,0]	Moscow	[55.753215,37.622504]	3.14	214.10	0.1	5.8	17060000000	['meter','centimeter','kilometer']	[1,0.01,1000]	500	[501,502]
+c694ad8a-f714-4ea3-907d-fd54fb25d9b5	Natalia	Sokolova	female	1992-03-08	jpg	\N	0	\N	26	pisces	[]	[100,200,50]	Plymouth	[50.403724,-4.142123]	3.14159	\N	0.007	5.4	-20000000000000	[]	[]	\N	[]
+a7da1aa6-f425-4789-8947-b034786ed374	Vasily	Sidorov	male	1995-07-28	bmp	+442012345678	1	2018-12-30 00:00:00	23	leo	['Sunny']	[250,244,10]	Murmansk	[68.970682,33.074981]	3.14159265358979	100000000000.00	800	-3.2	154400000	['pound']	[16]	503	[]
+
+Schema 00825_protobuf_format_persons:Person
+
+Binary representation:
+00000000  f4 01 0a 24 61 37 35 32  32 31 35 38 2d 33 64 34  |...$a7522158-3d4|
+00000010  31 2d 34 62 37 37 2d 61  64 36 39 2d 36 63 35 39  |1-4b77-ad69-6c59|
+00000020  38 65 65 35 35 63 34 39  12 04 49 76 61 6e 1a 06  |8ee55c49..Ivan..|
+00000030  50 65 74 72 6f 76 20 01  28 af 1f 32 03 70 6e 67  |Petrov .(..2.png|
+00000040  3a 0d 2b 37 34 39 35 31  32 33 34 35 36 37 00 40  |:.+74951234567.@|
+00000050  01 4d fc d0 30 5c 50 26  58 09 62 09 59 65 73 74  |.M..0\P&X.b.Yest|
+00000060  65 72 64 61 79 62 07 46  6c 6f 77 65 72 73 6a 04  |erdayb.Flowersj.|
+00000070  ff 01 00 00 72 06 4d 6f  73 63 6f 77 7a 08 4b 03  |....r.Moscowz.K.|
+00000080  5f 42 72 7d 16 42 81 01  1f 85 eb 51 b8 1e 09 40  |_Br}.B.....Q...@|
+00000090  89 01 33 33 33 33 33 c3  6a 40 95 01 cd cc cc 3d  |..33333.j@.....=|
+000000a0  9d 01 9a 99 b9 40 a0 01  80 c4 d7 8d 7f aa 01 0c  |.....@..........|
+000000b0  0a 05 6d 65 74 65 72 15  00 00 80 3f aa 01 11 0a  |..meter....?....|
+000000c0  0a 63 65 6e 74 69 6d 65  74 65 72 15 0a d7 23 3c  |.centimeter...#<|
+000000d0  aa 01 10 0a 09 6b 69 6c  6f 6d 65 74 65 72 15 00  |.....kilometer..|
+000000e0  00 7a 44 b2 01 10 0a 0e  a2 06 0b 0a 09 08 f4 03  |.zD.............|
+000000f0  12 04 f5 03 f6 03 7e 0a  24 63 36 39 34 61 64 38  |......~.$c694ad8|
+00000100  61 2d 66 37 31 34 2d 34  65 61 33 2d 39 30 37 64  |a-f714-4ea3-907d|
+00000110  2d 66 64 35 34 66 62 32  35 64 39 62 35 12 07 4e  |-fd54fb25d9b5..N|
+00000120  61 74 61 6c 69 61 1a 08  53 6f 6b 6f 6c 6f 76 61  |atalia..Sokolova|
+00000130  28 a6 3f 32 03 6a 70 67  50 1a 58 0b 6a 04 64 c8  |(.?2.jpgP.X.j.d.|
+00000140  01 32 72 08 50 6c 79 6d  6f 75 74 68 7a 08 6a 9d  |.2r.Plymouthz.j.|
+00000150  49 42 46 8c 84 c0 81 01  6e 86 1b f0 f9 21 09 40  |IBF.....n....!.@|
+00000160  95 01 42 60 e5 3b 9d 01  cd cc ac 40 a0 01 ff ff  |..B`.;.....@....|
+00000170  a9 ce 93 8c 09 c0 01 0a  24 61 37 64 61 31 61 61  |........$a7da1aa|
+00000180  36 2d 66 34 32 35 2d 34  37 38 39 2d 38 39 34 37  |6-f425-4789-8947|
+00000190  2d 62 30 33 34 37 38 36  65 64 33 37 34 12 06 56  |-b034786ed374..V|
+000001a0  61 73 69 6c 79 1a 07 53  69 64 6f 72 6f 76 20 01  |asily..Sidorov .|
+000001b0  28 fb 48 32 03 62 6d 70  3a 0d 2b 34 34 32 30 31  |(.H2.bmp:.+44201|
+000001c0  32 33 34 35 36 37 38 40  01 4d 50 e0 27 5c 50 17  |2345678@.MP.'\P.|
+000001d0  58 04 62 05 53 75 6e 6e  79 6a 05 fa 01 f4 01 0a  |X.b.Sunnyj......|
+000001e0  72 08 4d 75 72 6d 61 6e  73 6b 7a 08 fd f0 89 42  |r.Murmanskz....B|
+000001f0  c8 4c 04 42 81 01 11 2d  44 54 fb 21 09 40 89 01  |.L.B...-DT.!.@..|
+00000200  00 00 00 e8 76 48 37 42  95 01 00 00 48 44 9d 01  |....vH7B....HD..|
+00000210  cd cc 4c c0 a0 01 80 d4  9f 93 01 aa 01 0c 0a 05  |..L.............|
+00000220  70 6f 75 6e 64 15 00 00  80 41 b2 01 0a 0a 08 a2  |pound....A......|
+00000230  06 05 0a 03 08 f7 03                              |.......|
+00000237
+
+MESSAGE #1 AT 0x00000002
+uuid: "a7522158-3d41-4b77-ad69-6c598ee55c49"
+name: "Ivan"
+surname: "Petrov"
+gender: male
+birthDate: 4015
+photo: "png"
+phoneNumber: "+74951234567\000"
+isOnline: true
+visitTime: 1546703100
+age: 38
+zodiacSign: capricorn
+songs: "Yesterday"
+songs: "Flowers"
+color: 255
+color: 0
+color: 0
+hometown: "Moscow"
+location: 55.7532158
+location: 37.6225052
+pi: 3.14
+lotteryWin: 214.1
+someRatio: 0.1
+temperature: 5.8
+randomBigNumber: 17060000000
+measureUnits {
+  unit: "meter"
+  coef: 1
+}
+measureUnits {
+  unit: "centimeter"
+  coef: 0.01
+}
+measureUnits {
+  unit: "kilometer"
+  coef: 1000
+}
+nestiness {
+  a {
+    b {
+      c {
+        d: 500
+        e: 501
+        e: 502
+      }
+    }
+  }
+}
+MESSAGE #2 AT 0x000000F7
+uuid: "c694ad8a-f714-4ea3-907d-fd54fb25d9b5"
+name: "Natalia"
+surname: "Sokolova"
+birthDate: 8102
+photo: "jpg"
+age: 26
+zodiacSign: pisces
+color: 100
+color: 200
+color: 50
+hometown: "Plymouth"
+location: 50.4037247
+location: -4.14212322
+pi: 3.14159
+someRatio: 0.007
+temperature: 5.4
+randomBigNumber: -20000000000000
+MESSAGE #3 AT 0x00000177
+uuid: "a7da1aa6-f425-4789-8947-b034786ed374"
+name: "Vasily"
+surname: "Sidorov"
+gender: male
+birthDate: 9339
+photo: "bmp"
+phoneNumber: "+442012345678"
+isOnline: true
+visitTime: 1546117200
+age: 23
+zodiacSign: leo
+songs: "Sunny"
+color: 250
+color: 244
+color: 10
+hometown: "Murmansk"
+location: 68.9706802
+location: 33.0749817
+pi: 3.14159265358979
+lotteryWin: 100000000000
+someRatio: 800
+temperature: -3.2
+randomBigNumber: 154400000
+measureUnits {
+  unit: "pound"
+  coef: 16
+}
+nestiness {
+  a {
+    b {
+      c {
+        d: 503
+      }
+    }
+  }
+}
+
+Binary representation is as expected
+
+Roundtrip:
+a7522158-3d41-4b77-ad69-6c598ee55c49	Ivan	Petrov	male	1980-12-29	png	+74951234567\0	1	2019-01-05 18:45:00	38	capricorn	['Yesterday','Flowers']	[255,0,0]	Moscow	[55.753216,37.622504]	3.14	214.10	0.1	5.8	17060000000	['meter','centimeter','kilometer']	[1,0.01,1000]	500	[501,502]
+c694ad8a-f714-4ea3-907d-fd54fb25d9b5	Natalia	Sokolova	female	1992-03-08	jpg	\N	0	\N	26	pisces	[]	[100,200,50]	Plymouth	[50.403724,-4.142123]	3.14159	\N	0.007	5.4	-20000000000000	[]	[]	\N	[]
+a7da1aa6-f425-4789-8947-b034786ed374	Vasily	Sidorov	male	1995-07-28	bmp	+442012345678	1	2018-12-30 00:00:00	23	leo	['Sunny']	[250,244,10]	Murmansk	[68.970680,33.074982]	3.14159265358979	100000000000.00	800	-3.2	154400000	['pound']	[16]	503	[]
+
+Schema 00825_protobuf_format_persons:AltPerson
+
+Binary representation:
+00000000  c4 01 08 01 12 04 49 76  61 6e 28 87 a8 c4 9b 97  |......Ivan(.....|
+00000010  02 52 06 50 65 74 72 6f  76 72 0c 00 00 7f 43 00  |.R.Petrovr....C.|
+00000020  00 00 00 00 00 00 00 79  fc d0 30 5c 00 00 00 00  |.......y..0\....|
+00000030  c8 02 0a c2 05 0c 00 00  80 3f 0a d7 23 3c 00 00  |.........?..#<..|
+00000040  7a 44 9a 06 05 6d 65 74  65 72 9a 06 0a 63 65 6e  |zD...meter...cen|
+00000050  74 69 6d 65 74 65 72 9a  06 09 6b 69 6c 6f 6d 65  |timeter...kilome|
+00000060  74 65 72 a1 06 00 00 00  a0 99 99 b9 3f a8 06 37  |ter.........?..7|
+00000070  a8 06 25 bd 06 c3 f5 48  40 fa 06 02 33 38 90 08  |..%....H@...38..|
+00000080  c6 09 e1 08 00 f1 da f8  03 00 00 00 b0 09 af 1f  |................|
+00000090  d0 0c d6 01 e2 12 24 61  37 35 32 32 31 35 38 2d  |......$a7522158-|
+000000a0  33 64 34 31 2d 34 62 37  37 2d 61 64 36 39 2d 36  |3d41-4b77-ad69-6|
+000000b0  63 35 39 38 65 65 35 35  63 34 39 a0 38 f4 03 aa  |c598ee55c49.8...|
+000000c0  38 04 f5 03 f6 03 84 01  12 07 4e 61 74 61 6c 69  |8.........Natali|
+000000d0  61 52 08 53 6f 6b 6f 6c  6f 76 61 72 0c 00 00 c8  |aR.Sokolovar....|
+000000e0  42 00 00 48 43 00 00 48  42 c8 02 0a a1 06 00 00  |B..HC..HB.......|
+000000f0  00 40 08 ac 7c 3f a8 06  32 a8 06 fc ff ff ff ff  |.@..|?..2.......|
+00000100  ff ff ff ff 01 b0 06 01  bd 06 d0 0f 49 40 fa 06  |............I@..|
+00000110  02 32 36 90 08 db 01 e1  08 00 c0 1a 63 cf ed ff  |.26.........c...|
+00000120  ff b0 09 a6 3f e2 12 24  63 36 39 34 61 64 38 61  |....?..$c694ad8a|
+00000130  2d 66 37 31 34 2d 34 65  61 33 2d 39 30 37 64 2d  |-f714-4ea3-907d-|
+00000140  66 64 35 34 66 62 32 35  64 39 62 35 a3 01 08 01  |fd54fb25d9b5....|
+00000150  12 06 56 61 73 69 6c 79  28 ce ca f4 cf ee 0c 52  |..Vasily(......R|
+00000160  07 53 69 64 6f 72 6f 76  72 0c 00 00 7a 43 00 00  |.Sidorovr...zC..|
+00000170  74 43 00 00 20 41 79 50  e0 27 5c 00 00 00 00 c8  |tC.. AyP.'\.....|
+00000180  02 05 c2 05 04 00 00 80  41 9a 06 05 70 6f 75 6e  |........A...poun|
+00000190  64 a1 06 00 00 00 00 00  00 89 40 a8 06 44 a8 06  |d.........@..D..|
+000001a0  21 bd 06 db 0f 49 40 fa  06 02 32 33 90 08 d3 05  |!....I@...23....|
+000001b0  e1 08 00 f5 33 09 00 00  00 00 b0 09 fb 48 d0 0c  |....3........H..|
+000001c0  80 d0 db c3 f4 02 e2 12  24 61 37 64 61 31 61 61  |........$a7da1aa|
+000001d0  36 2d 66 34 32 35 2d 34  37 38 39 2d 38 39 34 37  |6-f425-4789-8947|
+000001e0  2d 62 30 33 34 37 38 36  65 64 33 37 34 a0 38 f7  |-b034786ed374.8.|
+000001f0  03                                                |.|
+000001f1
+
+MESSAGE #1 AT 0x00000002
+isOnline: online
+name: "Ivan"
+phoneNumber: 74951234567
+surname: "Petrov"
+color: 255
+color: 0
+color: 0
+visitTime: 1546703100
+temperature: 5
+measureUnits_coef: 1
+measureUnits_coef: 0.01
+measureUnits_coef: 1000
+measureUnits_unit: "meter"
+measureUnits_unit: "centimeter"
+measureUnits_unit: "kilometer"
+someRatio: 0.10000000149011612
+location: 55
+location: 37
+pi: 3.14
+age: "38"
+zodiacSign: 1222
+randomBigNumber: 17060000000
+birthDate: 4015
+lotteryWin: 214
+uuid: "a7522158-3d41-4b77-ad69-6c598ee55c49"
+nestiness_a_b_c_d: 500
+nestiness_a_b_c_e: 501
+nestiness_a_b_c_e: 502
+MESSAGE #2 AT 0x000000C8
+name: "Natalia"
+surname: "Sokolova"
+color: 100
+color: 200
+color: 50
+temperature: 5
+someRatio: 0.0070000002160668373
+location: 50
+location: -4
+gender: female
+pi: 3.14159
+age: "26"
+zodiacSign: 219
+randomBigNumber: -20000000000000
+birthDate: 8102
+uuid: "c694ad8a-f714-4ea3-907d-fd54fb25d9b5"
+MESSAGE #3 AT 0x0000014E
+isOnline: online
+name: "Vasily"
+phoneNumber: 442012345678
+surname: "Sidorov"
+color: 250
+color: 244
+color: 10
+visitTime: 1546117200
+temperature: -3
+measureUnits_coef: 16
+measureUnits_unit: "pound"
+someRatio: 800
+location: 68
+location: 33
+pi: 3.14159274
+age: "23"
+zodiacSign: 723
+randomBigNumber: 154400000
+birthDate: 9339
+lotteryWin: 100000000000
+uuid: "a7da1aa6-f425-4789-8947-b034786ed374"
+nestiness_a_b_c_d: 503
+
+Binary representation is as expected
+
+Roundtrip:
+a7522158-3d41-4b77-ad69-6c598ee55c49	Ivan	Petrov	male	1980-12-29	\N	74951234567\0\0	1	2019-01-05 18:45:00	38	capricorn	[]	[255,0,0]		[55.000000,37.000000]	3.140000104904175	214.00	0.1	5.0	17060000000	['meter','centimeter','kilometer']	[1,0.01,1000]	500	[501,502]
+c694ad8a-f714-4ea3-907d-fd54fb25d9b5	Natalia	Sokolova	female	1992-03-08	\N	\N	0	\N	26	pisces	[]	[100,200,50]		[50.000000,-4.000000]	3.141590118408203	\N	0.007	5.0	-20000000000000	[]	[]	\N	[]
+a7da1aa6-f425-4789-8947-b034786ed374	Vasily	Sidorov	male	1995-07-28	\N	442012345678\0	1	2018-12-30 00:00:00	23	leo	[]	[250,244,10]		[68.000000,33.000000]	3.1415927410125732	100000000000.00	800	-3.0	154400000	['pound']	[16]	503	[]
+
+Schema 00825_protobuf_format_persons:StrPerson
+
+Binary representation:
+00000000  a7 02 0a 24 61 37 35 32  32 31 35 38 2d 33 64 34  |...$a7522158-3d4|
+00000010  31 2d 34 62 37 37 2d 61  64 36 39 2d 36 63 35 39  |1-4b77-ad69-6c59|
+00000020  38 65 65 35 35 63 34 39  12 04 49 76 61 6e 1a 06  |8ee55c49..Ivan..|
+00000030  50 65 74 72 6f 76 22 04  6d 61 6c 65 2a 0a 31 39  |Petrov".male*.19|
+00000040  38 30 2d 31 32 2d 32 39  3a 0d 2b 37 34 39 35 31  |80-12-29:.+74951|
+00000050  32 33 34 35 36 37 00 42  01 31 4a 13 32 30 31 39  |234567.B.1J.2019|
+00000060  2d 30 31 2d 30 35 20 31  38 3a 34 35 3a 30 30 52  |-01-05 18:45:00R|
+00000070  02 33 38 5a 09 63 61 70  72 69 63 6f 72 6e 62 09  |.38Z.capricornb.|
+00000080  59 65 73 74 65 72 64 61  79 62 07 46 6c 6f 77 65  |Yesterdayb.Flowe|
+00000090  72 73 6a 03 32 35 35 6a  01 30 6a 01 30 72 06 4d  |rsj.255j.0j.0r.M|
+000000a0  6f 73 63 6f 77 7a 09 35  35 2e 37 35 33 32 31 35  |oscowz.55.753215|
+000000b0  7a 09 33 37 2e 36 32 32  35 30 34 82 01 04 33 2e  |z.37.622504...3.|
+000000c0  31 34 8a 01 06 32 31 34  2e 31 30 92 01 03 30 2e  |14...214.10...0.|
+000000d0  31 9a 01 03 35 2e 38 a2  01 0b 31 37 30 36 30 30  |1...5.8...170600|
+000000e0  30 30 30 30 30 aa 01 2d  0a 05 6d 65 74 65 72 0a  |00000..-..meter.|
+000000f0  0a 63 65 6e 74 69 6d 65  74 65 72 0a 09 6b 69 6c  |.centimeter..kil|
+00000100  6f 6d 65 74 65 72 12 01  31 12 04 30 2e 30 31 12  |ometer..1..0.01.|
+00000110  04 31 30 30 30 b2 01 11  0a 0f 0a 03 35 30 30 12  |.1000.......500.|
+00000120  03 35 30 31 12 03 35 30  32 b4 01 0a 24 63 36 39  |.501..502...$c69|
+00000130  34 61 64 38 61 2d 66 37  31 34 2d 34 65 61 33 2d  |4ad8a-f714-4ea3-|
+00000140  39 30 37 64 2d 66 64 35  34 66 62 32 35 64 39 62  |907d-fd54fb25d9b|
+00000150  35 12 07 4e 61 74 61 6c  69 61 1a 08 53 6f 6b 6f  |5..Natalia..Soko|
+00000160  6c 6f 76 61 22 06 66 65  6d 61 6c 65 2a 0a 31 39  |lova".female*.19|
+00000170  39 32 2d 30 33 2d 30 38  42 01 30 52 02 32 36 5a  |92-03-08B.0R.26Z|
+00000180  06 70 69 73 63 65 73 6a  03 31 30 30 6a 03 32 30  |.piscesj.100j.20|
+00000190  30 6a 02 35 30 72 08 50  6c 79 6d 6f 75 74 68 7a  |0j.50r.Plymouthz|
+000001a0  09 35 30 2e 34 30 33 37  32 34 7a 09 2d 34 2e 31  |.50.403724z.-4.1|
+000001b0  34 32 31 32 33 82 01 07  33 2e 31 34 31 35 39 92  |42123...3.14159.|
+000001c0  01 05 30 2e 30 30 37 9a  01 03 35 2e 34 a2 01 0f  |..0.007...5.4...|
+000001d0  2d 32 30 30 30 30 30 30  30 30 30 30 30 30 30 84  |-20000000000000.|
+000001e0  02 0a 24 61 37 64 61 31  61 61 36 2d 66 34 32 35  |..$a7da1aa6-f425|
+000001f0  2d 34 37 38 39 2d 38 39  34 37 2d 62 30 33 34 37  |-4789-8947-b0347|
+00000200  38 36 65 64 33 37 34 12  06 56 61 73 69 6c 79 1a  |86ed374..Vasily.|
+00000210  07 53 69 64 6f 72 6f 76  22 04 6d 61 6c 65 2a 0a  |.Sidorov".male*.|
+00000220  31 39 39 35 2d 30 37 2d  32 38 3a 0d 2b 34 34 32  |1995-07-28:.+442|
+00000230  30 31 32 33 34 35 36 37  38 42 01 31 4a 13 32 30  |012345678B.1J.20|
+00000240  31 38 2d 31 32 2d 33 30  20 30 30 3a 30 30 3a 30  |18-12-30 00:00:0|
+00000250  30 52 02 32 33 5a 03 6c  65 6f 62 05 53 75 6e 6e  |0R.23Z.leob.Sunn|
+00000260  79 6a 03 32 35 30 6a 03  32 34 34 6a 02 31 30 72  |yj.250j.244j.10r|
+00000270  08 4d 75 72 6d 61 6e 73  6b 7a 09 36 38 2e 39 37  |.Murmanskz.68.97|
+00000280  30 36 38 32 7a 09 33 33  2e 30 37 34 39 38 31 82  |0682z.33.074981.|
+00000290  01 10 33 2e 31 34 31 35  39 32 36 35 33 35 38 39  |..3.141592653589|
+000002a0  37 39 8a 01 0f 31 30 30  30 30 30 30 30 30 30 30  |79...10000000000|
+000002b0  30 2e 30 30 92 01 03 38  30 30 9a 01 04 2d 33 2e  |0.00...800...-3.|
+000002c0  32 a2 01 09 31 35 34 34  30 30 30 30 30 aa 01 0b  |2...154400000...|
+000002d0  0a 05 70 6f 75 6e 64 12  02 31 36 b2 01 07 0a 05  |..pound..16.....|
+000002e0  0a 03 35 30 33                                    |..503|
+000002e5
+
+MESSAGE #1 AT 0x00000002
+uuid: "a7522158-3d41-4b77-ad69-6c598ee55c49"
+name: "Ivan"
+surname: "Petrov"
+gender: "male"
+birthDate: "1980-12-29"
+phoneNumber: "+74951234567\000"
+isOnline: "1"
+visitTime: "2019-01-05 18:45:00"
+age: "38"
+zodiacSign: "capricorn"
+songs: "Yesterday"
+songs: "Flowers"
+color: "255"
+color: "0"
+color: "0"
+hometown: "Moscow"
+location: "55.753215"
+location: "37.622504"
+pi: "3.14"
+lotteryWin: "214.10"
+someRatio: "0.1"
+temperature: "5.8"
+randomBigNumber: "17060000000"
+measureUnits {
+  unit: "meter"
+  unit: "centimeter"
+  unit: "kilometer"
+  coef: "1"
+  coef: "0.01"
+  coef: "1000"
+}
+nestiness_a {
+  b_c {
+    d: "500"
+    e: "501"
+    e: "502"
+  }
+}
+MESSAGE #2 AT 0x0000012B
+uuid: "c694ad8a-f714-4ea3-907d-fd54fb25d9b5"
+name: "Natalia"
+surname: "Sokolova"
+gender: "female"
+birthDate: "1992-03-08"
+isOnline: "0"
+age: "26"
+zodiacSign: "pisces"
+color: "100"
+color: "200"
+color: "50"
+hometown: "Plymouth"
+location: "50.403724"
+location: "-4.142123"
+pi: "3.14159"
+someRatio: "0.007"
+temperature: "5.4"
+randomBigNumber: "-20000000000000"
+MESSAGE #3 AT 0x000001E1
+uuid: "a7da1aa6-f425-4789-8947-b034786ed374"
+name: "Vasily"
+surname: "Sidorov"
+gender: "male"
+birthDate: "1995-07-28"
+phoneNumber: "+442012345678"
+isOnline: "1"
+visitTime: "2018-12-30 00:00:00"
+age: "23"
+zodiacSign: "leo"
+songs: "Sunny"
+color: "250"
+color: "244"
+color: "10"
+hometown: "Murmansk"
+location: "68.970682"
+location: "33.074981"
+pi: "3.14159265358979"
+lotteryWin: "100000000000.00"
+someRatio: "800"
+temperature: "-3.2"
+randomBigNumber: "154400000"
+measureUnits {
+  unit: "pound"
+  coef: "16"
+}
+nestiness_a {
+  b_c {
+    d: "503"
+  }
+}
+
+Binary representation is as expected
+
+Roundtrip:
+a7522158-3d41-4b77-ad69-6c598ee55c49	Ivan	Petrov	male	1980-12-29	\N	+74951234567\0	1	2019-01-05 18:45:00	38	capricorn	['Yesterday','Flowers']	[255,0,0]	Moscow	[55.753215,37.622504]	3.14	214.10	0.1	5.8	17060000000	['meter','centimeter','kilometer']	[1,0.01,1000]	500	[501,502]
+c694ad8a-f714-4ea3-907d-fd54fb25d9b5	Natalia	Sokolova	female	1992-03-08	\N	\N	0	\N	26	pisces	[]	[100,200,50]	Plymouth	[50.403724,-4.142123]	3.14159	\N	0.007	5.4	-20000000000000	[]	[]	\N	[]
+a7da1aa6-f425-4789-8947-b034786ed374	Vasily	Sidorov	male	1995-07-28	\N	+442012345678	1	2018-12-30 00:00:00	23	leo	['Sunny']	[250,244,10]	Murmansk	[68.970682,33.074981]	3.14159265358979	100000000000.00	800	-3.2	154400000	['pound']	[16]	503	[]
+
+Schema 00825_protobuf_format_syntax2:Syntax2Person
+
+Binary representation:
+00000000  f1 01 0a 24 61 37 35 32  32 31 35 38 2d 33 64 34  |...$a7522158-3d4|
+00000010  31 2d 34 62 37 37 2d 61  64 36 39 2d 36 63 35 39  |1-4b77-ad69-6c59|
+00000020  38 65 65 35 35 63 34 39  12 04 49 76 61 6e 1a 06  |8ee55c49..Ivan..|
+00000030  50 65 74 72 6f 76 20 01  28 af 1f 32 03 70 6e 67  |Petrov .(..2.png|
+00000040  3a 0d 2b 37 34 39 35 31  32 33 34 35 36 37 00 40  |:.+74951234567.@|
+00000050  01 4d fc d0 30 5c 50 26  58 09 62 09 59 65 73 74  |.M..0\P&X.b.Yest|
+00000060  65 72 64 61 79 62 07 46  6c 6f 77 65 72 73 68 ff  |erdayb.Flowersh.|
+00000070  01 68 00 68 00 72 06 4d  6f 73 63 6f 77 7a 08 4b  |.h.h.r.Moscowz.K|
+00000080  03 5f 42 72 7d 16 42 81  01 1f 85 eb 51 b8 1e 09  |._Br}.B.....Q...|
+00000090  40 89 01 33 33 33 33 33  c3 6a 40 95 01 cd cc cc  |@..33333.j@.....|
+000000a0  3d 9d 01 9a 99 b9 40 a0  01 80 c4 d7 8d 7f ab 01  |=.....@.........|
+000000b0  0d 00 00 80 3f 0d 0a d7  23 3c 0d 00 00 7a 44 12  |....?...#<...zD.|
+000000c0  05 6d 65 74 65 72 12 0a  63 65 6e 74 69 6d 65 74  |.meter..centimet|
+000000d0  65 72 12 09 6b 69 6c 6f  6d 65 74 65 72 ac 01 b3  |er..kilometer...|
+000000e0  01 0b a2 06 0b 0b 08 f4  03 10 f5 03 10 f6 03 0c  |................|
+000000f0  0c b4 01 83 01 0a 24 63  36 39 34 61 64 38 61 2d  |......$c694ad8a-|
+00000100  66 37 31 34 2d 34 65 61  33 2d 39 30 37 64 2d 66  |f714-4ea3-907d-f|
+00000110  64 35 34 66 62 32 35 64  39 62 35 12 07 4e 61 74  |d54fb25d9b5..Nat|
+00000120  61 6c 69 61 1a 08 53 6f  6b 6f 6c 6f 76 61 20 00  |alia..Sokolova .|
+00000130  28 a6 3f 32 03 6a 70 67  40 00 50 1a 58 0b 68 64  |(.?2.jpg@.P.X.hd|
+00000140  68 c8 01 68 32 72 08 50  6c 79 6d 6f 75 74 68 7a  |h..h2r.Plymouthz|
+00000150  08 6a 9d 49 42 46 8c 84  c0 81 01 6e 86 1b f0 f9  |.j.IBF.....n....|
+00000160  21 09 40 95 01 42 60 e5  3b 9d 01 cd cc ac 40 a0  |!.@..B`.;.....@.|
+00000170  01 ff ff a9 ce 93 8c 09  c3 01 0a 24 61 37 64 61  |...........$a7da|
+00000180  31 61 61 36 2d 66 34 32  35 2d 34 37 38 39 2d 38  |1aa6-f425-4789-8|
+00000190  39 34 37 2d 62 30 33 34  37 38 36 65 64 33 37 34  |947-b034786ed374|
+000001a0  12 06 56 61 73 69 6c 79  1a 07 53 69 64 6f 72 6f  |..Vasily..Sidoro|
+000001b0  76 20 01 28 fb 48 32 03  62 6d 70 3a 0d 2b 34 34  |v .(.H2.bmp:.+44|
+000001c0  32 30 31 32 33 34 35 36  37 38 40 01 4d 50 e0 27  |2012345678@.MP.'|
+000001d0  5c 50 17 58 04 62 05 53  75 6e 6e 79 68 fa 01 68  |\P.X.b.Sunnyh..h|
+000001e0  f4 01 68 0a 72 08 4d 75  72 6d 61 6e 73 6b 7a 08  |..h.r.Murmanskz.|
+000001f0  fd f0 89 42 c8 4c 04 42  81 01 11 2d 44 54 fb 21  |...B.L.B...-DT.!|
+00000200  09 40 89 01 00 00 00 e8  76 48 37 42 95 01 00 00  |.@......vH7B....|
+00000210  48 44 9d 01 cd cc 4c c0  a0 01 80 d4 9f 93 01 ab  |HD....L.........|
+00000220  01 0d 00 00 80 41 12 05  70 6f 75 6e 64 ac 01 b3  |.....A..pound...|
+00000230  01 0b a2 06 05 0b 08 f7  03 0c 0c b4 01           |.............|
+0000023d
+
+MESSAGE #1 AT 0x00000002
+uuid: "a7522158-3d41-4b77-ad69-6c598ee55c49"
+name: "Ivan"
+surname: "Petrov"
+gender: male
+birthDate: 4015
+photo: "png"
+phoneNumber: "+74951234567\000"
+isOnline: true
+visitTime: 1546703100
+age: 38
+zodiacSign: capricorn
+songs: "Yesterday"
+songs: "Flowers"
+color: 255
+color: 0
+color: 0
+hometown: "Moscow"
+location: 55.7532158
+location: 37.6225052
+pi: 3.14
+lotteryWin: 214.1
+someRatio: 0.1
+temperature: 5.8
+randomBigNumber: 17060000000
+MeasureUnits {
+  coef: 1
+  coef: 0.01
+  coef: 1000
+  unit: "meter"
+  unit: "centimeter"
+  unit: "kilometer"
+}
+Nestiness {
+  A {
+    b {
+      C {
+        d: 500
+        e: 501
+        e: 502
+      }
+    }
+  }
+}
+MESSAGE #2 AT 0x000000F5
+uuid: "c694ad8a-f714-4ea3-907d-fd54fb25d9b5"
+name: "Natalia"
+surname: "Sokolova"
+gender: female
+birthDate: 8102
+photo: "jpg"
+isOnline: false
+age: 26
+zodiacSign: pisces
+color: 100
+color: 200
+color: 50
+hometown: "Plymouth"
+location: 50.4037247
+location: -4.14212322
+pi: 3.14159
+someRatio: 0.007
+temperature: 5.4
+randomBigNumber: -20000000000000
+MESSAGE #3 AT 0x0000017A
+uuid: "a7da1aa6-f425-4789-8947-b034786ed374"
+name: "Vasily"
+surname: "Sidorov"
+gender: male
+birthDate: 9339
+photo: "bmp"
+phoneNumber: "+442012345678"
+isOnline: true
+visitTime: 1546117200
+age: 23
+zodiacSign: leo
+songs: "Sunny"
+color: 250
+color: 244
+color: 10
+hometown: "Murmansk"
+location: 68.9706802
+location: 33.0749817
+pi: 3.14159265358979
+lotteryWin: 100000000000
+someRatio: 800
+temperature: -3.2
+randomBigNumber: 154400000
+MeasureUnits {
+  coef: 16
+  unit: "pound"
+}
+Nestiness {
+  A {
+    b {
+      C {
+        d: 503
+      }
+    }
+  }
+}
+
+Binary representation is as expected
+
+Roundtrip:
+a7522158-3d41-4b77-ad69-6c598ee55c49	Ivan	Petrov	male	1980-12-29	png	+74951234567\0	1	2019-01-05 18:45:00	38	capricorn	['Yesterday','Flowers']	[255,0,0]	Moscow	[55.753216,37.622504]	3.14	214.10	0.1	5.8	17060000000	['meter','centimeter','kilometer']	[1,0.01,1000]	500	[501,502]
+c694ad8a-f714-4ea3-907d-fd54fb25d9b5	Natalia	Sokolova	female	1992-03-08	jpg	\N	0	\N	26	pisces	[]	[100,200,50]	Plymouth	[50.403724,-4.142123]	3.14159	\N	0.007	5.4	-20000000000000	[]	[]	\N	[]
+a7da1aa6-f425-4789-8947-b034786ed374	Vasily	Sidorov	male	1995-07-28	bmp	+442012345678	1	2018-12-30 00:00:00	23	leo	['Sunny']	[250,244,10]	Murmansk	[68.970680,33.074982]	3.14159265358979	100000000000.00	800	-3.2	154400000	['pound']	[16]	503	[]
diff --git a/tests/queries/0_stateless/00825_protobuf_format_persons.sh b/tests/queries/0_stateless/00825_protobuf_format_persons.sh
new file mode 100755
index 00000000000..957ed738d99
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_persons.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+
+# To generate reference file for this test use the following commands:
+# ninja ProtobufDelimitedMessagesSerializer
+# build/utils/test-data-generator/ProtobufDelimitedMessagesSerializer
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+SCHEMADIR=$CURDIR/format_schemas
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<EOF
+DROP TABLE IF EXISTS persons_00825;
+DROP TABLE IF EXISTS roundtrip_persons_00825;
+DROP TABLE IF EXISTS alt_persons_00825;
+DROP TABLE IF EXISTS str_persons_00825;
+DROP TABLE IF EXISTS syntax2_persons_00825;
+
+CREATE TABLE persons_00825 (uuid UUID,
+                            name String,
+                            surname String,
+                            gender Enum8('male'=1, 'female'=0),
+                            birthDate Date,
+                            photo Nullable(String),
+                            phoneNumber Nullable(FixedString(13)),
+                            isOnline UInt8,
+                            visitTime Nullable(DateTime),
+                            age UInt8,
+                            zodiacSign Enum16('aries'=321, 'taurus'=420, 'gemini'=521, 'cancer'=621, 'leo'=723, 'virgo'=823,
+                                              'libra'=923, 'scorpius'=1023, 'sagittarius'=1122, 'capricorn'=1222, 'aquarius'=120,
+                                              'pisces'=219),
+                            songs Array(String),
+                            color Array(UInt8),
+                            hometown LowCardinality(String),
+                            location Array(Decimal32(6)),
+                            pi Nullable(Float64),
+                            lotteryWin Nullable(Decimal64(2)),
+                            someRatio Float32,
+                            temperature Decimal32(1),
+                            randomBigNumber Int64,
+                            measureUnits Nested(unit  String, coef Float32),
+                            nestiness_a_b_c_d Nullable(UInt32),
+                            \`nestiness_a_B.c_E\` Array(UInt32)
+                           ) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO persons_00825 VALUES (toUUID('a7522158-3d41-4b77-ad69-6c598ee55c49'), 'Ivan', 'Petrov', 'male', toDate('1980-12-29'), 'png', '+74951234567', 1, toDateTime('2019-01-05 18:45:00'), 38, 'capricorn', ['Yesterday', 'Flowers'], [255, 0, 0], 'Moscow', [55.753215, 37.622504], 3.14, 214.10, 0.1, 5.8, 17060000000, ['meter', 'centimeter', 'kilometer'], [1, 0.01, 1000], 500, [501, 502]);
+INSERT INTO persons_00825 VALUES (toUUID('c694ad8a-f714-4ea3-907d-fd54fb25d9b5'), 'Natalia', 'Sokolova', 'female', toDate('1992-03-08'), 'jpg', NULL, 0, NULL, 26, 'pisces', [], [100, 200, 50], 'Plymouth', [50.403724, -4.142123], 3.14159, NULL, 0.007, 5.4, -20000000000000, [], [], NULL, []);
+INSERT INTO persons_00825 VALUES (toUUID('a7da1aa6-f425-4789-8947-b034786ed374'), 'Vasily', 'Sidorov', 'male', toDate('1995-07-28'), 'bmp', '+442012345678', 1, toDateTime('2018-12-30 00:00:00'), 23, 'leo', ['Sunny'], [250, 244, 10], 'Murmansk', [68.970682, 33.074981], 3.14159265358979, 100000000000, 800, -3.2, 154400000, ['pound'], [16], 503, []);
+
+SELECT * FROM persons_00825 ORDER BY name;
+EOF
+
+# Use schema 00825_protobuf_format_persons:Person
+echo
+echo "Schema 00825_protobuf_format_persons:Person"
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_persons.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_persons:Person'" > $BINARY_FILE_PATH
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_persons:Person" --input "$BINARY_FILE_PATH"
+echo
+echo "Roundtrip:"
+$CLICKHOUSE_CLIENT --query "CREATE TABLE roundtrip_persons_00825 AS persons_00825"
+$CLICKHOUSE_CLIENT --query "INSERT INTO roundtrip_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:Person'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM roundtrip_persons_00825 ORDER BY name"
+rm "$BINARY_FILE_PATH"
+
+# Use schema 00825_protobuf_format_persons:AltPerson
+echo
+echo "Schema 00825_protobuf_format_persons:AltPerson"
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_persons.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_persons:AltPerson'" > $BINARY_FILE_PATH
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_persons:AltPerson" --input "$BINARY_FILE_PATH"
+echo
+echo "Roundtrip:"
+$CLICKHOUSE_CLIENT --query "CREATE TABLE alt_persons_00825 AS persons_00825"
+$CLICKHOUSE_CLIENT --query "INSERT INTO alt_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:AltPerson'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM alt_persons_00825 ORDER BY name"
+rm "$BINARY_FILE_PATH"
+
+# Use schema 00825_protobuf_format_persons:StrPerson
+echo
+echo "Schema 00825_protobuf_format_persons:StrPerson"
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_persons.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_persons:StrPerson'" > $BINARY_FILE_PATH
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_persons:StrPerson" --input "$BINARY_FILE_PATH"
+echo
+echo "Roundtrip:"
+$CLICKHOUSE_CLIENT --query "CREATE TABLE str_persons_00825 AS persons_00825"
+$CLICKHOUSE_CLIENT --query "INSERT INTO str_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:StrPerson'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM str_persons_00825 ORDER BY name"
+rm "$BINARY_FILE_PATH"
+
+# Use schema 00825_protobuf_format_syntax2:Syntax2Person
+echo
+echo "Schema 00825_protobuf_format_syntax2:Syntax2Person"
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_persons.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_persons_syntax2:Syntax2Person'" > $BINARY_FILE_PATH
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_persons_syntax2:Syntax2Person" --input "$BINARY_FILE_PATH"
+echo
+echo "Roundtrip:"
+$CLICKHOUSE_CLIENT --query "CREATE TABLE syntax2_persons_00825 AS persons_00825"
+$CLICKHOUSE_CLIENT --query "INSERT INTO syntax2_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons_syntax2:Syntax2Person'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM syntax2_persons_00825 ORDER BY name"
+rm "$BINARY_FILE_PATH"
+
+$CLICKHOUSE_CLIENT --multiquery <<EOF
+DROP TABLE persons_00825;
+DROP TABLE roundtrip_persons_00825;
+DROP TABLE alt_persons_00825;
+DROP TABLE str_persons_00825;
+DROP TABLE syntax2_persons_00825;
+EOF
diff --git a/tests/queries/0_stateless/00825_protobuf_format_squares.reference b/tests/queries/0_stateless/00825_protobuf_format_squares.reference
new file mode 100644
index 00000000000..83a09bd547c
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_squares.reference
@@ -0,0 +1,24 @@
+2	4
+0	0
+3	9
+
+Binary representation:
+00000000  04 08 02 10 04 00 04 08  03 10 09                 |...........|
+0000000b
+
+MESSAGE #1 AT 0x00000001
+number: 2
+square: 4
+MESSAGE #2 AT 0x00000006
+MESSAGE #3 AT 0x00000007
+number: 3
+square: 9
+
+Binary representation is as expected
+
+2	4
+0	0
+3	9
+2	4
+0	0
+3	9
diff --git a/tests/queries/0_stateless/00825_protobuf_format_squares.sh b/tests/queries/0_stateless/00825_protobuf_format_squares.sh
new file mode 100755
index 00000000000..d3ff3ad2f4f
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_squares.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+SCHEMADIR=$CURDIR/format_schemas
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<EOF
+DROP TABLE IF EXISTS squares_protobuf_00825;
+
+CREATE TABLE squares_protobuf_00825 (number UInt32, square UInt64) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO squares_protobuf_00825 VALUES (2, 4), (0, 0), (3, 9);
+
+SELECT * FROM squares_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_optional.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM squares_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_squares:NumberAndSquare'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_squares:NumberAndSquare" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO squares_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_squares:NumberAndSquare'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM squares_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "DROP TABLE squares_protobuf_00825"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.sh b/tests/queries/0_stateless/00825_protobuf_format_table_default.sh
index 97f7769269a..96664ca7f20 100755
--- a/tests/queries/0_stateless/00825_protobuf_format_table_default.sh
+++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.sh
@@ -1,13 +1,14 @@
 #!/usr/bin/env bash
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+SCHEMADIR=$CURDIR/format_schemas
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
 set -eo pipefail
 
 # Run the client.
-$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+$CLICKHOUSE_CLIENT --multiquery <<EOF
 DROP TABLE IF EXISTS table_default_protobuf_00825;
 
 CREATE TABLE table_default_protobuf_00825
@@ -24,15 +25,16 @@ SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z;
 EOF
 
 BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_table_default.XXXXXX.binary")
-$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_table_default:Message'" > "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_table_default:Message'" > "$BINARY_FILE_PATH"
 
 # Check the output in the protobuf format
 echo
-$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_table_default:Message" --input "$BINARY_FILE_PATH"
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_table_default:Message" --input "$BINARY_FILE_PATH"
 
 # Check the input in the protobuf format (now the table contains the same data twice).
 echo
-$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_table_default:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_table_default:Message'" < "$BINARY_FILE_PATH"
 $CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z"
 
 rm "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "DROP TABLE table_default_protobuf_00825"
diff --git a/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh b/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh
index e5f4d12ee18..22ab745d7c0 100755
--- a/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh
+++ b/tests/queries/0_stateless/01057_http_compression_prefer_brotli.sh
@@ -11,3 +11,4 @@ ${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip,deflate,br' "${CLICKHOUSE_URL}&
 ${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip,deflate'    "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT 1' | gzip -d
 ${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip'            "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT number FROM numbers(1000000)' | gzip -d | tail -n3
 ${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br'              "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT number FROM numbers(1000000)' | brotli -d | tail -n3
+
diff --git a/tests/queries/0_stateless/01272_totals_and_filter_bug.reference b/tests/queries/0_stateless/01272_totals_and_filter_bug.reference
index 0db840561fd..5b407738cb8 100644
--- a/tests/queries/0_stateless/01272_totals_and_filter_bug.reference
+++ b/tests/queries/0_stateless/01272_totals_and_filter_bug.reference
@@ -1,6 +1,6 @@
 1	1
 
-0	2
+0	1
 -
 test1	10	0
 
diff --git a/tests/queries/0_stateless/01550_create_map_type.reference b/tests/queries/0_stateless/01550_create_map_type.reference
index 877199e92d5..d4794ac1459 100644
--- a/tests/queries/0_stateless/01550_create_map_type.reference
+++ b/tests/queries/0_stateless/01550_create_map_type.reference
@@ -6,6 +6,8 @@ lisi
 female
 zhangsan
 gender
+2020-01-01	{1:0,2:1}
+2020-01-01	{1:0,2:-1}
 1116
 1117
 1118
diff --git a/tests/queries/0_stateless/01550_create_map_type.sql b/tests/queries/0_stateless/01550_create_map_type.sql
index 695a40c42b4..26bbf3c7dde 100644
--- a/tests/queries/0_stateless/01550_create_map_type.sql
+++ b/tests/queries/0_stateless/01550_create_map_type.sql
@@ -22,7 +22,21 @@ select a[b] from table_map;
 select b from table_map where a = map('name','lisi', 'gender', 'female');
 drop table if exists table_map;
 
--- Int type
+-- Big Integer type
+
+create table table_map (d DATE, m Map(Int8, UInt256)) ENGINE = MergeTree() order by d;
+insert into table_map values ('2020-01-01', map(1, 0, 2, 1));
+select * from table_map;
+drop table table_map;
+
+-- Integer type
+
+create table table_map (d DATE, m Map(Int8, Int8)) ENGINE = MergeTree() order by d;
+insert into table_map values ('2020-01-01', map(1, 0, 2, -1));
+select * from table_map;
+drop table table_map;
+
+-- Unsigned Int type
 drop table if exists table_map;
 create table table_map(a Map(UInt8, UInt64), b UInt8) Engine = MergeTree() order by b;
 insert into table_map select map(number, number+5), number from numbers(1111,4);
diff --git a/tests/queries/0_stateless/01592_long_window_functions1.reference b/tests/queries/0_stateless/01592_long_window_functions1.reference
new file mode 100644
index 00000000000..5160cca9c3e
--- /dev/null
+++ b/tests/queries/0_stateless/01592_long_window_functions1.reference
@@ -0,0 +1,4 @@
+---- arrays ----
+6360452672161319041
+---- window f ----
+6360452672161319041
diff --git a/tests/queries/0_stateless/01592_long_window_functions1.sql b/tests/queries/0_stateless/01592_long_window_functions1.sql
new file mode 100644
index 00000000000..bb0f77ff60a
--- /dev/null
+++ b/tests/queries/0_stateless/01592_long_window_functions1.sql
@@ -0,0 +1,36 @@
+drop table if exists stack;
+
+set allow_experimental_window_functions = 1;
+set max_insert_threads = 4;
+
+create table stack(item_id Int64, brand_id Int64, rack_id Int64, dt DateTime, expiration_dt DateTime, quantity UInt64)
+Engine = MergeTree 
+partition by toYYYYMM(dt) 
+order by (brand_id, toStartOfHour(dt));
+
+insert into stack 
+select number%99991, number%11, number%1111, toDateTime('2020-01-01 00:00:00')+number/100, 
+   toDateTime('2020-02-01 00:00:00')+number/10, intDiv(number,100)+1
+from numbers_mt(10000000);
+
+select '---- arrays ----';
+
+select cityHash64( toString( groupArray (tuple(*) ) )) from (
+    select brand_id, rack_id, arrayJoin(arraySlice(arraySort(groupArray(quantity)),1,2)) quantity
+    from stack
+    group by brand_id, rack_id
+    order by brand_id, rack_id, quantity
+) t;
+
+
+select '---- window f ----';
+
+select cityHash64( toString( groupArray (tuple(*) ) )) from (
+    select brand_id, rack_id,  quantity from
+       ( select brand_id, rack_id, quantity, row_number() over (partition by brand_id, rack_id order by quantity) rn
+         from stack ) as t0 
+    where rn <= 2  
+    order by brand_id, rack_id, quantity
+) t;
+
+drop table if exists stack;
diff --git a/tests/queries/0_stateless/01592_window_functions.reference b/tests/queries/0_stateless/01592_window_functions.reference
new file mode 100644
index 00000000000..8ca5086821b
--- /dev/null
+++ b/tests/queries/0_stateless/01592_window_functions.reference
@@ -0,0 +1,81 @@
+---- Q1 ----
+Dell Vostro	800.00	Laptop	850
+HP Elite	1200.00	Laptop	850
+Lenovo Thinkpad	700.00	Laptop	850
+Sony VAIO	700.00	Laptop	850
+HTC One	400.00	Smartphone	500
+Microsoft Lumia	200.00	Smartphone	500
+Nexus	500.00	Smartphone	500
+iPhone	900.00	Smartphone	500
+Kindle Fire	150.00	Tablet	350
+Samsung Galaxy Tab	200.00	Tablet	350
+iPad	700.00	Tablet	350
+---- Q2 ----
+Lenovo Thinkpad	Laptop	700.00	1
+Sony VAIO	Laptop	700.00	1
+Dell Vostro	Laptop	800.00	3
+HP Elite	Laptop	1200.00	4
+Microsoft Lumia	Smartphone	200.00	1
+HTC One	Smartphone	400.00	2
+Nexus	Smartphone	500.00	3
+iPhone	Smartphone	900.00	4
+Kindle Fire	Tablet	150.00	1
+Samsung Galaxy Tab	Tablet	200.00	2
+iPad	Tablet	700.00	3
+---- Q3 ----
+HP Elite	Laptop	1200.00	1
+Dell Vostro	Laptop	800.00	2
+Lenovo Thinkpad	Laptop	700.00	3
+Sony VAIO	Laptop	700.00	4
+iPhone	Smartphone	900.00	1
+Nexus	Smartphone	500.00	2
+HTC One	Smartphone	400.00	3
+Microsoft Lumia	Smartphone	200.00	4
+iPad	Tablet	700.00	1
+Samsung Galaxy Tab	Tablet	200.00	2
+Kindle Fire	Tablet	150.00	3
+---- Q4 ----
+Lenovo Thinkpad	Laptop	700.00	700.00	1
+Sony VAIO	Laptop	700.00	700.00	1
+Dell Vostro	Laptop	800.00	700.00	2
+HP Elite	Laptop	1200.00	700.00	3
+Microsoft Lumia	Smartphone	200.00	200.00	1
+HTC One	Smartphone	400.00	200.00	2
+Nexus	Smartphone	500.00	200.00	3
+iPhone	Smartphone	900.00	200.00	4
+---- Q5 ----
+Sony VAIO	Laptop	700.00	700.00
+Lenovo Thinkpad	Laptop	700.00	700.00
+HP Elite	Laptop	1200.00	700.00
+Dell Vostro	Laptop	800.00	700.00
+iPhone	Smartphone	900.00	900.00
+Nexus	Smartphone	500.00	900.00
+Microsoft Lumia	Smartphone	200.00	900.00
+HTC One	Smartphone	400.00	900.00
+iPad	Tablet	700.00	700.00
+Samsung Galaxy Tab	Tablet	200.00	700.00
+Kindle Fire	Tablet	150.00	700.00
+---- Q6 ----
+Dell Vostro	Laptop	800.00	1200.00
+HP Elite	Laptop	1200.00	1200.00
+Lenovo Thinkpad	Laptop	700.00	1200.00
+Sony VAIO	Laptop	700.00	1200.00
+HTC One	Smartphone	400.00	900.00
+Microsoft Lumia	Smartphone	200.00	900.00
+Nexus	Smartphone	500.00	900.00
+iPhone	Smartphone	900.00	900.00
+Kindle Fire	Tablet	150.00	700.00
+Samsung Galaxy Tab	Tablet	200.00	700.00
+iPad	Tablet	700.00	700.00
+---- Q7 ----
+Dell Vostro	800.00	Laptop	733	850
+HP Elite	1200.00	Laptop	850	850
+Lenovo Thinkpad	700.00	Laptop	700	850
+Sony VAIO	700.00	Laptop	700	850
+HTC One	400.00	Smartphone	300	500
+Microsoft Lumia	200.00	Smartphone	200	500
+Nexus	500.00	Smartphone	367	500
+iPhone	900.00	Smartphone	500	500
+Kindle Fire	150.00	Tablet	150	350
+Samsung Galaxy Tab	200.00	Tablet	175	350
+iPad	700.00	Tablet	350	350
diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql
new file mode 100644
index 00000000000..8d5033fc821
--- /dev/null
+++ b/tests/queries/0_stateless/01592_window_functions.sql
@@ -0,0 +1,107 @@
+set allow_experimental_window_functions = 1;
+
+drop table if exists product_groups;
+drop table if exists products;
+
+CREATE TABLE product_groups (
+	group_id Int64,
+	group_name String
+) Engine = Memory;
+
+
+CREATE TABLE products (
+	product_id Int64,
+	product_name String,
+	price DECIMAL(11, 2),
+	group_id Int64
+) Engine = Memory;
+
+INSERT INTO product_groups  VALUES	(1, 'Smartphone'),(2, 'Laptop'),(3, 'Tablet');
+
+INSERT INTO products (product_id,product_name, group_id,price) VALUES (1, 'Microsoft Lumia', 1, 200), (2, 'HTC One', 1, 400), (3, 'Nexus', 1, 500), (4, 'iPhone', 1, 900),(5, 'HP Elite', 2, 1200),(6, 'Lenovo Thinkpad', 2, 700),(7, 'Sony VAIO', 2, 700),(8, 'Dell Vostro', 2, 800),(9, 'iPad', 3, 700),(10, 'Kindle Fire', 3, 150),(11, 'Samsung Galaxy Tab', 3, 200);
+
+select '---- Q1 ----';
+
+SELECT
+	product_name,
+	price,
+	group_name,
+	AVG(price) OVER (PARTITION BY group_name)
+FROM products INNER JOIN  product_groups USING (group_id)
+order by group_name, product_name, price;
+
+select '---- Q2 ----';
+
+SELECT
+	product_name,
+	group_name,
+  price,
+	rank() OVER (PARTITION BY group_name ORDER BY price) rank
+FROM products INNER JOIN product_groups USING (group_id)
+order by  group_name, rank, price;
+
+select '---- Q3 ----';
+SELECT
+	product_name,
+	group_name,
+	price,
+	row_number() OVER (PARTITION BY group_name ORDER BY price desc) rn
+FROM products INNER JOIN product_groups USING (group_id)
+ORDER BY group_name, rn;
+
+select '---- Q4 ----';
+SELECT *
+FROM
+(
+    SELECT
+        product_name,
+        group_name,
+        price,
+        min(price) OVER (PARTITION BY group_name) AS min_price,
+        dense_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS r
+    FROM products
+    INNER JOIN product_groups USING (group_id)
+) AS t
+WHERE min_price > 160
+ORDER BY
+    group_name ASC,
+    r ASC,
+    product_name ASC;
+
+select '---- Q5 ----';
+SELECT
+	product_name,
+	group_name,
+	price,
+	FIRST_VALUE (price) OVER (PARTITION BY group_name ORDER BY product_name desc) AS price_per_group_per_alphab
+FROM products INNER JOIN product_groups USING (group_id)
+order by group_name, product_name desc;
+
+select '---- Q6 ----';
+SELECT
+	product_name,
+	group_name,
+	price,
+	LAST_VALUE (price) OVER (PARTITION BY group_name ORDER BY
+			price RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+  ) AS highest_price_per_group
+FROM
+	products
+INNER JOIN product_groups USING (group_id)
+order by group_name, product_name;
+
+select '---- Q7 ----';
+select product_name, price, group_name, round(avg0), round(avg1)
+from (
+SELECT
+	product_name,
+	price,
+	group_name,
+	avg(price) OVER (PARTITION BY group_name ORDER BY price) avg0,
+	avg(price) OVER (PARTITION BY group_name ORDER BY
+			price RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) avg1
+FROM products INNER JOIN  product_groups USING (group_id)) t
+order by group_name, product_name, price;
+
+drop table product_groups;
+drop table products;
diff --git a/tests/queries/0_stateless/01637_nullable_fuzz3.reference b/tests/queries/0_stateless/01637_nullable_fuzz3.reference
index d9cf16b9d2a..795a0159ac8 100644
--- a/tests/queries/0_stateless/01637_nullable_fuzz3.reference
+++ b/tests/queries/0_stateless/01637_nullable_fuzz3.reference
@@ -1,2 +1,6 @@
 
 0
+
+0
+
+0	0	0
diff --git a/tests/queries/0_stateless/01637_nullable_fuzz3.sql b/tests/queries/0_stateless/01637_nullable_fuzz3.sql
index 21bf8999eae..6cfd0fc7d1c 100644
--- a/tests/queries/0_stateless/01637_nullable_fuzz3.sql
+++ b/tests/queries/0_stateless/01637_nullable_fuzz3.sql
@@ -1,4 +1,6 @@
 DROP TABLE IF EXISTS t;
 CREATE TABLE t (`item_id` UInt64, `price_sold` Float32, `date` Date) ENGINE = MergeTree ORDER BY item_id;
 SELECT item_id FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) AS l FULL OUTER JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) AS r USING (item_id);
+SELECT item_id FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) AS l FULL OUTER JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) AS r USING (item_id) SETTINGS join_use_nulls = '1';
+SELECT * FROM (SELECT item_id, sum(price_sold) as price_sold FROM t GROUP BY item_id WITH TOTALS) AS l FULL OUTER JOIN (SELECT item_id, sum(price_sold) as price_sold FROM t GROUP BY item_id WITH TOTALS) AS r USING (item_id) SETTINGS join_use_nulls = '1';
 DROP TABLE t;
diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference
index fda40305f9d..99b32b74ca7 100644
--- a/tests/queries/0_stateless/01655_plan_optimizations.reference
+++ b/tests/queries/0_stateless/01655_plan_optimizations.reference
@@ -1,7 +1,9 @@
-sipHash should be calculated after filtration
+Too many optimizations applied to query plan
+Too many optimizations applied to query plan
+> sipHash should be calculated after filtration
 FUNCTION sipHash64
 Filter column: equals
-sorting steps should know about limit
+> sorting steps should know about limit
 Limit 10
 MergingSorted
 Limit 10
@@ -9,3 +11,115 @@ MergeSorting
 Limit 10
 PartialSorting
 Limit 10
+-- filter push down --
+> filter should be pushed down after aggregating
+Aggregating
+Filter
+0	1
+1	2
+2	3
+3	4
+4	5
+5	6
+6	7
+7	8
+8	9
+9	10
+> filter should be pushed down after aggregating, column after aggregation is const
+COLUMN Const(UInt8) -> notEquals(y, 0)
+Aggregating
+Filter
+Filter
+0	1	1
+1	2	1
+2	3	1
+3	4	1
+4	5	1
+5	6	1
+6	7	1
+7	8	1
+8	9	1
+9	10	1
+> one condition of filter should be pushed down after aggregating, other condition is aliased
+Filter column
+ALIAS notEquals(s, 4) :: 1 -> and(notEquals(y, 0), notEquals(s, 4))
+Aggregating
+Filter column: notEquals(y, 0)
+0	1
+1	2
+2	3
+3	4
+5	6
+6	7
+7	8
+8	9
+9	10
+> one condition of filter should be pushed down after aggregating, other condition is casted
+Filter column
+FUNCTION CAST(minus(s, 4) :: 1, UInt8 :: 3) -> and(notEquals(y, 0), minus(s, 4))
+Aggregating
+Filter column: notEquals(y, 0)
+0	1
+1	2
+2	3
+3	4
+5	6
+6	7
+7	8
+8	9
+9	10
+> one condition of filter should be pushed down after aggregating, other two conditions are ANDed
+Filter column
+FUNCTION and(minus(s, 8) :: 1, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))
+Aggregating
+Filter column: notEquals(y, 0)
+0	1
+1	2
+2	3
+3	4
+5	6
+6	7
+7	8
+9	10
+> two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased
+Filter column
+ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4))
+Aggregating
+Filter column: and(notEquals(y, 0), minus(y, 4))
+0	1
+1	2
+2	3
+4	5
+5	6
+6	7
+7	8
+9	10
+> filter is split, one part is filtered before ARRAY JOIN
+Filter column: and(notEquals(y, 2), notEquals(x, 0))
+ARRAY JOIN x
+Filter column: notEquals(y, 2)
+1	3
+> filter is pushed down before Distinct
+Distinct
+Distinct
+Filter column: notEquals(y, 2)
+0	0
+0	1
+1	0
+1	1
+> filter is pushed down before sorting steps
+MergingSorted
+MergeSorting
+PartialSorting
+Filter column: and(notEquals(x, 0), notEquals(y, 0))
+1	2
+1	1
+> filter is pushed down before TOTALS HAVING and aggregating
+TotalsHaving
+Aggregating
+Filter column: notEquals(y, 2)
+0	12
+1	15
+3	10
+
+0	37
diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh
index 4f3541f9dde..3148dc4a597 100755
--- a/tests/queries/0_stateless/01655_plan_optimizations.sh
+++ b/tests/queries/0_stateless/01655_plan_optimizations.sh
@@ -4,7 +4,149 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-echo "sipHash should be calculated after filtration"
+$CLICKHOUSE_CLIENT -q "select x + 1 from (select y + 2 as x from (select dummy + 3 as y)) settings query_plan_max_optimizations_to_apply = 1" 2>&1 |
+     grep -o "Too many optimizations applied to query plan"
+
+echo "> sipHash should be calculated after filtration"
 $CLICKHOUSE_CLIENT -q "explain actions = 1 select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(1000000000) limit 1000000000) where y = 0" | grep -o "FUNCTION sipHash64\|Filter column: equals"
-echo "sorting steps should know about limit"
+echo "> sorting steps should know about limit"
 $CLICKHOUSE_CLIENT -q "explain actions = 1 select number from (select number from numbers(500000000) order by -number) limit 10" | grep -o "MergingSorted\|MergeSorting\|PartialSorting\|Limit 10"
+
+echo "-- filter push down --"
+echo "> filter should be pushed down after aggregating"
+$CLICKHOUSE_CLIENT -q "
+    explain select * from (select sum(x), y from (
+        select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0
+    settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter"
+$CLICKHOUSE_CLIENT -q "
+    select s, y from (select sum(x) as s, y from (
+        select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0 order by s, y
+    settings enable_optimize_predicate_expression=0"
+
+echo "> filter should be pushed down after aggregating, column after aggregation is const"
+$CLICKHOUSE_CLIENT -q "
+    explain actions = 1 select s, y, y != 0 from (select sum(x) as s, y from (
+        select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0
+    settings enable_optimize_predicate_expression=0" | grep -o "Aggregating\|Filter\|COLUMN Const(UInt8) -> notEquals(y, 0)"
+$CLICKHOUSE_CLIENT -q "
+    select s, y, y != 0 from (select sum(x) as s, y from (
+        select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0 order by s, y, y != 0
+    settings enable_optimize_predicate_expression=0"
+
+echo "> one condition of filter should be pushed down after aggregating, other condition is aliased"
+$CLICKHOUSE_CLIENT -q "
+    explain actions = 1 select s, y from (
+        select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0 and s != 4
+    settings enable_optimize_predicate_expression=0" |
+    grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|ALIAS notEquals(s, 4) :: 1 -> and(notEquals(y, 0), notEquals(s, 4))"
+$CLICKHOUSE_CLIENT -q "
+    select s, y from (
+        select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0 and s != 4 order by s, y
+    settings enable_optimize_predicate_expression=0"
+
+echo "> one condition of filter should be pushed down after aggregating, other condition is casted"
+$CLICKHOUSE_CLIENT -q "
+    explain actions = 1 select s, y from (
+        select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0 and s - 4
+    settings enable_optimize_predicate_expression=0" |
+    grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION CAST(minus(s, 4) :: 1, UInt8 :: 3) -> and(notEquals(y, 0), minus(s, 4))"
+$CLICKHOUSE_CLIENT -q "
+    select s, y from (
+        select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0 and s - 4 order by s, y
+    settings enable_optimize_predicate_expression=0"
+
+echo "> one condition of filter should be pushed down after aggregating, other two conditions are ANDed"
+$CLICKHOUSE_CLIENT -q "
+    explain actions = 1 select s, y from (
+        select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0 and s - 8 and s - 4
+    settings enable_optimize_predicate_expression=0" |
+    grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 8) :: 1, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))"
+$CLICKHOUSE_CLIENT -q "
+    select s, y from (
+        select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0 and s - 8 and s - 4 order by s, y
+    settings enable_optimize_predicate_expression=0"
+
+echo "> two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased"
+$CLICKHOUSE_CLIENT -q "
+    explain actions = 1 select s, y from (
+        select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0 and s != 8 and y - 4
+    settings enable_optimize_predicate_expression=0" |
+    grep -o "Aggregating\|Filter column\|Filter column: and(notEquals(y, 0), minus(y, 4))\|ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4))"
+$CLICKHOUSE_CLIENT -q "
+    select s, y from (
+        select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y
+    ) where y != 0 and s != 8 and y - 4 order by s, y
+    settings enable_optimize_predicate_expression=0"
+
+echo "> filter is split, one part is filtered before ARRAY JOIN"
+$CLICKHOUSE_CLIENT -q "
+    explain actions = 1 select x, y from (
+        select range(number) as x, number + 1 as y from numbers(3)
+    ) array join x where y != 2 and x != 0" |
+    grep -o "Filter column: and(notEquals(y, 2), notEquals(x, 0))\|ARRAY JOIN x\|Filter column: notEquals(y, 2)"
+$CLICKHOUSE_CLIENT -q "
+    select x, y from (
+        select range(number) as x, number + 1 as y from numbers(3)
+    ) array join x where y != 2 and x != 0 order by x, y"
+
+# echo "> filter is split, one part is filtered before Aggregating and Cube"
+# $CLICKHOUSE_CLIENT -q "
+#     explain actions = 1 select * from (
+#         select sum(x) as s, x, y from (select number as x, number + 1 as y from numbers(10)) group by x, y　with cube
+#     ) where y != 0 and s != 4
+#     settings enable_optimize_predicate_expression=0" |
+#     grep -o "Cube\|Aggregating\|Filter column: notEquals(y, 0)"
+# $CLICKHOUSE_CLIENT -q "
+#     select s, x, y from (
+#         select sum(x) as s, x, y from (select number as x, number + 1 as y from numbers(10)) group by x, y　with cube
+#     ) where y != 0 and s != 4 order by s, x, y
+#     settings enable_optimize_predicate_expression=0"
+
+echo "> filter is pushed down before Distinct"
+$CLICKHOUSE_CLIENT -q "
+    explain actions = 1 select x, y from (
+        select distinct x, y from (select number % 2 as x, number % 3 as y from numbers(10))
+    ) where y != 2
+    settings enable_optimize_predicate_expression=0" |
+    grep -o "Distinct\|Filter column: notEquals(y, 2)"
+$CLICKHOUSE_CLIENT -q "
+    select x, y from (
+        select distinct x, y from (select number % 2 as x, number % 3 as y from numbers(10))
+    ) where y != 2 order by x, y
+    settings enable_optimize_predicate_expression=0"
+
+echo "> filter is pushed down before sorting steps"
+$CLICKHOUSE_CLIENT -q "
+    explain actions = 1 select x, y from (
+        select number % 2 as x, number % 3 as y from numbers(6) order by y desc
+    ) where x != 0 and y != 0
+    settings enable_optimize_predicate_expression = 0" |
+    grep -o "MergingSorted\|MergeSorting\|PartialSorting\|Filter column: and(notEquals(x, 0), notEquals(y, 0))"
+$CLICKHOUSE_CLIENT -q "
+    select x, y from (
+        select number % 2 as x, number % 3 as y from numbers(6) order by y desc
+    ) where x != 0 and y != 0
+    settings enable_optimize_predicate_expression = 0"
+
+echo "> filter is pushed down before TOTALS HAVING and aggregating"
+$CLICKHOUSE_CLIENT -q "
+    explain actions = 1 select * from (
+        select y, sum(x) from (select number as x, number % 4 as y from numbers(10)) group by y with totals
+    ) where y != 2
+    settings enable_optimize_predicate_expression=0" |
+    grep -o "TotalsHaving\|Aggregating\|Filter column: notEquals(y, 2)"
+$CLICKHOUSE_CLIENT -q "
+    select * from (
+        select y, sum(x) from (select number as x, number % 4 as y from numbers(10)) group by y with totals
+    ) where y != 2"
diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference
index 324890c0a5a..af7feae5a38 100644
--- a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference
+++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference
@@ -1,8 +1,8 @@
-2	 worl	[123,1,1]	49	\N	50	4950	Nullable(UInt64)	50
-2	 worl	[123,1,1]	49	\N	50	4950	Nullable(UInt64)	50
+2	 worl	1	1	0	4950	99	[123,1,1]	49	\N	50	4950	Nullable(UInt64)	50
+2	 worl	1	1	0	4950	99	[123,1,1]	49	\N	50	4950	Nullable(UInt64)	50
 
 arraySort(used_aggregate_functions)
-['avg','count','groupBitAnd','sum','uniq']
+['avg','count','groupBitAnd','max','sum','uniq']
 
 arraySort(used_aggregate_function_combinators)
 ['Array','If','OrDefault','OrNull']
@@ -11,7 +11,7 @@ arraySort(used_table_functions)
 ['numbers']
 
 arraySort(used_functions)
-['CAST','addDays','array','arrayFlatten','modulo','plus','substring','toDate','toDayOfYear','toTypeName','toWeek']
+['CAST','CRC32','addDays','array','arrayFlatten','modulo','plus','pow','round','substring','tanh','toDate','toDayOfYear','toTypeName','toWeek']
 
 arraySort(used_data_type_families)
 ['Array','Int32','Nullable','String']
@@ -20,5 +20,5 @@ used_database_engines
 ['Atomic']
 
 arraySort(used_data_type_families)	used_storages
-['DateTime','Int64']	['Memory']
+['Int64','datetime']	['Memory']
 
diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
index 17657cf60f5..3a890ce16f9 100644
--- a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
+++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
@@ -2,6 +2,8 @@ SET database_atomic_wait_for_drop_and_detach_synchronously=1;
 
 SELECT uniqArray([1, 1, 2]),
        SUBSTRING('Hello, world', 7, 5),
+       POW(1, 2), ROUND(TANh(1)), CrC32(''),
+       SUM(number), MAX(number),
        flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]),
        week(toDate('2000-12-05')),
        CAST(arrayJoin([NULL, NULL]) AS Nullable(TEXT)),
@@ -49,7 +51,7 @@ WHERE current_database = currentDatabase() AND type == 'QueryFinish' AND (query
 ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
 SELECT '';
 
-CREATE OR REPLACE TABLE test_query_log_factories_info1.memory_table (id BIGINT, date DateTime) ENGINE=Memory();
+CREATE OR REPLACE TABLE test_query_log_factories_info1.memory_table (id BIGINT, date DATETIME) ENGINE=Memory();
 
 SYSTEM FLUSH LOGS;
 SELECT arraySort(used_data_type_families), used_storages
diff --git a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference
index 63b3707b9b4..9cca4934551 100644
--- a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference
+++ b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference
@@ -2,7 +2,7 @@
 
 Here is CDTATA.
 This is a white space test.
-This is a complex test. <script type="text/javascript">Hello, world</script> world <style> hello
+This is a complex test.<script type="text/javascript">Hello, world</script> world <style> hello
 hello, world
 
 hello, world
diff --git a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
index 65c243687c1..fd1292eb3fb 100644
--- a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
+++ b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
@@ -1,8 +1,9 @@
-SELECT htmlOrXmlCoarseParse('<script>Here is script.</script>');
-SELECT htmlOrXmlCoarseParse('<style>Here is style.</style>');
-SELECT htmlOrXmlCoarseParse('<![CDATA[Here is CDTATA.]]>');
-SELECT htmlOrXmlCoarseParse('This is a     white   space test.');
-SELECT htmlOrXmlCoarseParse('This is a complex test. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><![CDATA[<script type="text/javascript">Hello, world</script> ]]><hello />world<![CDATA[ <style> ]]> hello</style>\n<script><![CDATA[</script>]]>hello</script>\n</html>');
+SELECT extractTextFromHTML('<script>Here is script.</script>');
+SELECT extractTextFromHTML('<style>Here is style.</style>');
+SELECT extractTextFromHTML('<![CDATA[Here is CDTATA.]]>');
+SELECT extractTextFromHTML('This is a     white   space test.');
+SELECT extractTextFromHTML('This is a complex test. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><![CDATA[<script type="text/javascript">Hello, world</script> ]]><hello />world<![CDATA[ <style> ]]> hello</style>\n<script><![CDATA[</script>]]>hello</script>\n</html>');
+
 DROP TABLE IF EXISTS defaults;
 CREATE TABLE defaults
 (
@@ -11,5 +12,5 @@ CREATE TABLE defaults
 
 INSERT INTO defaults values ('<common tag>hello, world<tag>'), ('<script desc=content> some content </script>'), ('<![CDATA[hello, world]]>'), ('white space    collapse');
 
-SELECT htmlOrXmlCoarseParse(stringColumn) FROM defaults;
+SELECT extractTextFromHTML(stringColumn) FROM defaults;
 DROP table defaults;
diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.reference b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
index 849f8139640..3adc9a17e5c 100644
--- a/tests/queries/0_stateless/01691_DateTime64_clamp.reference
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
@@ -1,19 +1,17 @@
 -- { echo }
-SELECT toDateTime(-2, 2);
+SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow');
 1970-01-01 03:00:00.00
-SELECT toDateTime64(-2, 2);
+SELECT toDateTime64(-2, 2, 'Europe/Moscow');
 1970-01-01 03:00:00.00
-SELECT CAST(-1 AS DateTime64);
-1970-01-01 03:00:00.000
-SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64);
-2020-01-01 00:00:00.300
-SELECT toDateTime64(bitShiftLeft(toUInt64(1),33), 2);
-2106-02-07 09:28:15.00
-SELECT toDateTime(-2., 2);
+SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow'));
+1970-01-01 03:00:00
+SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow'));
+2020-01-01 00:00:00
+SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null;
+SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow');
 1970-01-01 03:00:00.00
-SELECT toDateTime64(-2., 2);
+SELECT toDateTime64(-2., 2, 'Europe/Moscow');
 1970-01-01 03:00:00.00
-SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2);
-2106-02-07 09:28:16.00
-SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2);
-2106-02-07 09:28:15.00
+SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow');
+2106-02-07 09:00:00.00
+SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null;
diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.sql b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
index f02d45a2cff..92d5a33328f 100644
--- a/tests/queries/0_stateless/01691_DateTime64_clamp.sql
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
@@ -1,10 +1,10 @@
 -- { echo }
-SELECT toDateTime(-2, 2);
-SELECT toDateTime64(-2, 2);
-SELECT CAST(-1 AS DateTime64);
-SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64);
-SELECT toDateTime64(bitShiftLeft(toUInt64(1),33), 2);
-SELECT toDateTime(-2., 2);
-SELECT toDateTime64(-2., 2);
-SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2);
-SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2);
+SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow');
+SELECT toDateTime64(-2, 2, 'Europe/Moscow');
+SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow'));
+SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow'));
+SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null;
+SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow');
+SELECT toDateTime64(-2., 2, 'Europe/Moscow');
+SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow');
+SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null;
diff --git a/tests/queries/0_stateless/01698_fix_toMinute.reference b/tests/queries/0_stateless/01698_fix_toMinute.reference
new file mode 100644
index 00000000000..7675aad3a57
--- /dev/null
+++ b/tests/queries/0_stateless/01698_fix_toMinute.reference
@@ -0,0 +1,24 @@
+Check the bug causing situation: the special Australia/Lord_Howe time zone. toDateTime and toString functions are all tested at once
+1554559200	2019-04-07 01:00:00	2019-04-07 01:00:00
+1554559800	2019-04-07 01:10:00	2019-04-07 01:10:00
+1554560400	2019-04-07 01:20:00	2019-04-07 01:20:00
+1554561000	2019-04-07 01:30:00	2019-04-07 01:30:00
+1554561600	2019-04-07 01:40:00	2019-04-07 01:40:00
+1554562200	2019-04-07 01:50:00	2019-04-07 01:50:00
+1554562800	2019-04-07 01:30:00	2019-04-07 01:30:00
+1554563400	2019-04-07 01:40:00	2019-04-07 01:40:00
+1554564000	2019-04-07 01:50:00	2019-04-07 01:50:00
+1554564600	2019-04-07 02:00:00	2019-04-07 02:00:00
+1554565200	2019-04-07 02:10:00	2019-04-07 02:10:00
+1554565800	2019-04-07 02:20:00	2019-04-07 02:20:00
+1554566400	2019-04-07 02:30:00	2019-04-07 02:30:00
+1554567000	2019-04-07 02:40:00	2019-04-07 02:40:00
+1554567600	2019-04-07 02:50:00	2019-04-07 02:50:00
+1554568200	2019-04-07 03:00:00	2019-04-07 03:00:00
+1554568800	2019-04-07 03:10:00	2019-04-07 03:10:00
+1554569400	2019-04-07 03:20:00	2019-04-07 03:20:00
+1554570000	2019-04-07 03:30:00	2019-04-07 03:30:00
+1554570600	2019-04-07 03:40:00	2019-04-07 03:40:00
+4 days test in batch comparing with manually computation result for Europe/Moscow whose timezone epoc is of whole hour:
+4 days test in batch comparing with manually computation result for Asia/Tehran whose timezone epoc is of half hour:
+4 days test in batch comparing with manually computation result for Australia/Lord_Howe whose timezone epoc is of half hour and also its DST offset is half hour:
diff --git a/tests/queries/0_stateless/01698_fix_toMinute.sql b/tests/queries/0_stateless/01698_fix_toMinute.sql
new file mode 100644
index 00000000000..f582806719d
--- /dev/null
+++ b/tests/queries/0_stateless/01698_fix_toMinute.sql
@@ -0,0 +1,16 @@
+/* toDateTime or toString or other functions which should call the toMinute() function will all meet this bug. tests below will verify the toDateTime and toString. */
+SELECT 'Check the bug causing situation: the special Australia/Lord_Howe time zone. toDateTime and toString functions are all tested at once';
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x,  toString(x) as xx FROM numbers(20);
+
+/* The Batch Part. Test period is whole 4 days*/
+SELECT '4 days test in batch comparing with manually computation result for Europe/Moscow whose timezone epoc is of whole hour:';
+SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+
+SELECT '4 days test in batch comparing with manually computation result for Asia/Tehran whose timezone epoc is of half hour:';
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-09-20 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+
+SELECT '4 days test in batch comparing with manually computation result for Australia/Lord_Howe whose timezone epoc is of half hour and also its DST offset is half hour:';
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-10-04 01:40:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
diff --git a/tests/queries/0_stateless/01699_timezoneOffset.reference b/tests/queries/0_stateless/01699_timezoneOffset.reference
index e70c5fa62ee..45f30314f5a 100644
--- a/tests/queries/0_stateless/01699_timezoneOffset.reference
+++ b/tests/queries/0_stateless/01699_timezoneOffset.reference
@@ -50,57 +50,29 @@ DST boundary test for Australia/Lord_Howe. This is a special timezone with DST o
 DST boundary test for Australia/Lord_Howe:
 0	2020-10-04 01:40:00	37800	1601737800
 1	2020-10-04 01:50:00	37800	1601738400
-2	2020-10-04 02:00:00	39600	1601739000
-3	2020-10-04 02:10:00	39600	1601739600
+2	2020-10-04 02:30:00	39600	1601739000
+3	2020-10-04 02:40:00	39600	1601739600
 0	2019-04-07 01:00:00	39600	1554559200
 1	2019-04-07 01:10:00	39600	1554559800
 2	2019-04-07 01:20:00	39600	1554560400
 3	2019-04-07 01:30:00	39600	1554561000
 4	2019-04-07 01:40:00	39600	1554561600
 5	2019-04-07 01:50:00	39600	1554562200
-6	2019-04-07 01:00:00	37800	1554562800
-7	2019-04-07 01:10:00	37800	1554563400
-8	2019-04-07 01:20:00	37800	1554564000
-9	2019-04-07 02:30:00	37800	1554564600
-10	2019-04-07 02:40:00	37800	1554565200
-11	2019-04-07 02:50:00	37800	1554565800
-12	2019-04-07 02:00:00	37800	1554566400
-13	2019-04-07 02:10:00	37800	1554567000
-14	2019-04-07 02:20:00	37800	1554567600
-15	2019-04-07 03:30:00	37800	1554568200
-16	2019-04-07 03:40:00	37800	1554568800
-17	2019-04-07 03:50:00	37800	1554569400
+6	2019-04-07 01:30:00	37800	1554562800
+7	2019-04-07 01:40:00	37800	1554563400
+8	2019-04-07 01:50:00	37800	1554564000
+9	2019-04-07 02:00:00	37800	1554564600
+10	2019-04-07 02:10:00	37800	1554565200
+11	2019-04-07 02:20:00	37800	1554565800
+12	2019-04-07 02:30:00	37800	1554566400
+13	2019-04-07 02:40:00	37800	1554567000
+14	2019-04-07 02:50:00	37800	1554567600
+15	2019-04-07 03:00:00	37800	1554568200
+16	2019-04-07 03:10:00	37800	1554568800
+17	2019-04-07 03:20:00	37800	1554569400
 4 days test in batch comparing with manually computation result for Europe/Moscow:
 4 days test in batch comparing with manually computation result for Asia/Tehran:
-The result maybe wrong for toDateTime processing Australia/Lord_Howe
-1601739000	2020-10-04 02:00:00	39600	37800
-1601739600	2020-10-04 02:10:00	39600	37800
-1601740200	2020-10-04 02:20:00	39600	37800
-1601740800	2020-10-04 03:30:00	39600	41400
-1601741400	2020-10-04 03:40:00	39600	41400
-1601742000	2020-10-04 03:50:00	39600	41400
-1601742600	2020-10-04 03:00:00	39600	37800
-1601743200	2020-10-04 03:10:00	39600	37800
-1601743800	2020-10-04 03:20:00	39600	37800
-1601744400	2020-10-04 04:30:00	39600	41400
-1601745000	2020-10-04 04:40:00	39600	41400
-1601745600	2020-10-04 04:50:00	39600	41400
-1601746200	2020-10-04 04:00:00	39600	37800
-1601746800	2020-10-04 04:10:00	39600	37800
-1601747400	2020-10-04 04:20:00	39600	37800
-1601748000	2020-10-04 05:30:00	39600	41400
-1554562800	2019-04-07 01:00:00	37800	36000
-1554563400	2019-04-07 01:10:00	37800	36000
-1554564000	2019-04-07 01:20:00	37800	36000
-1554564600	2019-04-07 02:30:00	37800	39600
-1554565200	2019-04-07 02:40:00	37800	39600
-1554565800	2019-04-07 02:50:00	37800	39600
-1554566400	2019-04-07 02:00:00	37800	36000
-1554567000	2019-04-07 02:10:00	37800	36000
-1554567600	2019-04-07 02:20:00	37800	36000
-1554568200	2019-04-07 03:30:00	37800	39600
-1554568800	2019-04-07 03:40:00	37800	39600
-1554569400	2019-04-07 03:50:00	37800	39600
+4 days test in batch comparing with manually computation result for Australia/Lord_Howe
 Moscow DST Years:
 11	1981-06-01 00:00:00	14400
 12	1982-06-01 00:00:00	14400
diff --git a/tests/queries/0_stateless/01699_timezoneOffset.sql b/tests/queries/0_stateless/01699_timezoneOffset.sql
index 1b3f05ecdd7..8cabb23c4de 100644
--- a/tests/queries/0_stateless/01699_timezoneOffset.sql
+++ b/tests/queries/0_stateless/01699_timezoneOffset.sql
@@ -26,8 +26,7 @@ SELECT '4 days test in batch comparing with manually computation result for Asia
 SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
 SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-09-20 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
 
-/* During this test we got unexpected result comes from the toDateTime() function when process the special time zone of 'Australia/Lord_Howe', which may be some kind of bugs. */
-SELECT 'The result maybe wrong for toDateTime processing Australia/Lord_Howe';
+SELECT '4 days test in batch comparing with manually computation result for Australia/Lord_Howe';
 SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-10-04 01:40:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(18) where res != calc;
 SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(18) where res != calc;
 
diff --git a/tests/queries/0_stateless/01715_tuple_insert_null_as_default.reference b/tests/queries/0_stateless/01715_tuple_insert_null_as_default.reference
new file mode 100644
index 00000000000..20f0fadfdcf
--- /dev/null
+++ b/tests/queries/0_stateless/01715_tuple_insert_null_as_default.reference
@@ -0,0 +1,15 @@
+Tuple
+(0,1)
+(0,1)
+Tuple nested in Array
+[(0,2),(3,0),(0,4)]
+[(0,2),(3,0),(0,4)]
+Tuple nested in Array nested in Tuple
+(0,[(0,2),(3,0),(0,4)])
+(0,[(0,2),(3,0),(0,4)])
+Tuple nested in Map
+{'test':(0,1)}
+{'test':(0,1)}
+Tuple nested in Map nested in Tuple
+(0,{'test':(0,1)})
+(0,{'test':(0,1)})
diff --git a/tests/queries/0_stateless/01715_tuple_insert_null_as_default.sql b/tests/queries/0_stateless/01715_tuple_insert_null_as_default.sql
new file mode 100644
index 00000000000..d5fd9af22bd
--- /dev/null
+++ b/tests/queries/0_stateless/01715_tuple_insert_null_as_default.sql
@@ -0,0 +1,77 @@
+SELECT 'Tuple';
+
+DROP TABLE IF EXISTS test_tuple;
+CREATE TABLE test_tuple (value Tuple(UInt8, UInt8)) ENGINE=TinyLog;
+
+SET input_format_null_as_default = 1;
+INSERT INTO test_tuple VALUES ((NULL, 1));
+SELECT * FROM test_tuple;
+
+SET input_format_null_as_default = 0;
+INSERT INTO test_tuple VALUES ((NULL, 2)); -- { clientError 53 }
+SELECT * FROM test_tuple;
+
+DROP TABLE test_tuple;
+
+SELECT 'Tuple nested in Array';
+
+DROP TABLE IF EXISTS test_tuple_nested_in_array;
+CREATE TABLE test_tuple_nested_in_array (value Array(Tuple(UInt8, UInt8))) ENGINE=TinyLog;
+
+SET input_format_null_as_default = 1;
+INSERT INTO test_tuple_nested_in_array VALUES ([(NULL, 2), (3, NULL), (NULL, 4)]);
+SELECT * FROM test_tuple_nested_in_array;
+
+SET input_format_null_as_default = 0;
+INSERT INTO test_tuple_nested_in_array VALUES ([(NULL, 1)]); -- { clientError 53 }
+SELECT * FROM test_tuple_nested_in_array;
+
+DROP TABLE test_tuple_nested_in_array;
+
+SELECT 'Tuple nested in Array nested in Tuple';
+
+DROP TABLE IF EXISTS test_tuple_nested_in_array_nested_in_tuple;
+CREATE TABLE test_tuple_nested_in_array_nested_in_tuple (value Tuple(UInt8, Array(Tuple(UInt8, UInt8)))) ENGINE=TinyLog;
+
+SET input_format_null_as_default = 1;
+INSERT INTO test_tuple_nested_in_array_nested_in_tuple VALUES ( (NULL, [(NULL, 2), (3, NULL), (NULL, 4)]) );
+SELECT * FROM test_tuple_nested_in_array_nested_in_tuple;
+
+SET input_format_null_as_default = 0;
+INSERT INTO test_tuple_nested_in_array_nested_in_tuple VALUES ( (NULL, [(NULL, 1)]) ); -- { clientError 53 }
+SELECT * FROM test_tuple_nested_in_array_nested_in_tuple;
+
+DROP TABLE test_tuple_nested_in_array_nested_in_tuple;
+
+SELECT 'Tuple nested in Map';
+
+SET allow_experimental_map_type = 1;
+
+DROP TABLE IF EXISTS test_tuple_nested_in_map;
+CREATE TABLE test_tuple_nested_in_map (value Map(String, Tuple(UInt8, UInt8))) ENGINE=TinyLog;
+
+SET input_format_null_as_default = 1;
+INSERT INTO test_tuple_nested_in_map VALUES (map('test', (NULL, 1)));
+
+SELECT * FROM test_tuple_nested_in_map;
+
+SET input_format_null_as_default = 0;
+INSERT INTO test_tuple_nested_in_map VALUES (map('test', (NULL, 1))); -- { clientError 53 }
+SELECT * FROM test_tuple_nested_in_map;
+
+DROP TABLE test_tuple_nested_in_map;
+
+SELECT 'Tuple nested in Map nested in Tuple';
+
+DROP TABLE IF EXISTS test_tuple_nested_in_map_nested_in_tuple;
+CREATE TABLE test_tuple_nested_in_map_nested_in_tuple (value Tuple(UInt8, Map(String, Tuple(UInt8, UInt8)))) ENGINE=TinyLog;
+
+SET input_format_null_as_default = 1;
+INSERT INTO test_tuple_nested_in_map_nested_in_tuple VALUES ( (NULL, map('test', (NULL, 1))) );
+SELECT * FROM test_tuple_nested_in_map_nested_in_tuple;
+
+SET input_format_null_as_default = 0;
+INSERT INTO test_tuple_nested_in_map_nested_in_tuple VALUES ( (NULL, map('test', (NULL, 1))) ); -- { clientError 53 }
+SELECT * FROM test_tuple_nested_in_map_nested_in_tuple;
+
+DROP TABLE test_tuple_nested_in_map_nested_in_tuple;
diff --git a/tests/queries/0_stateless/01720_constraints_complex_types.reference b/tests/queries/0_stateless/01720_constraints_complex_types.reference
new file mode 100644
index 00000000000..01e79c32a8c
--- /dev/null
+++ b/tests/queries/0_stateless/01720_constraints_complex_types.reference
@@ -0,0 +1,3 @@
+1
+2
+3
diff --git a/tests/queries/0_stateless/01720_constraints_complex_types.sql b/tests/queries/0_stateless/01720_constraints_complex_types.sql
new file mode 100644
index 00000000000..273f509b6eb
--- /dev/null
+++ b/tests/queries/0_stateless/01720_constraints_complex_types.sql
@@ -0,0 +1,47 @@
+SET allow_suspicious_low_cardinality_types = 1;
+
+DROP TABLE IF EXISTS constraint_on_nullable_type;
+CREATE TABLE constraint_on_nullable_type
+(
+    `id` Nullable(UInt64),
+    CONSTRAINT `c0` CHECK `id` = 1
+)
+ENGINE = TinyLog();
+
+INSERT INTO constraint_on_nullable_type VALUES (0); -- {serverError 469}
+INSERT INTO constraint_on_nullable_type VALUES (1);
+
+SELECT * FROM constraint_on_nullable_type;
+
+DROP TABLE constraint_on_nullable_type;
+
+DROP TABLE IF EXISTS constraint_on_low_cardinality_type;
+CREATE TABLE constraint_on_low_cardinality_type
+(
+    `id` LowCardinality(UInt64),
+    CONSTRAINT `c0` CHECK `id` = 2
+)
+ENGINE = TinyLog;
+
+INSERT INTO constraint_on_low_cardinality_type VALUES (0); -- {serverError 469}
+INSERT INTO constraint_on_low_cardinality_type VALUES (2);
+
+SELECT * FROM constraint_on_low_cardinality_type;
+
+DROP TABLE constraint_on_low_cardinality_type;
+
+DROP TABLE IF EXISTS constraint_on_low_cardinality_nullable_type;
+
+CREATE TABLE constraint_on_low_cardinality_nullable_type
+(
+    `id` LowCardinality(Nullable(UInt64)),
+    CONSTRAINT `c0` CHECK `id` = 3
+)
+ENGINE = TinyLog;
+
+INSERT INTO constraint_on_low_cardinality_nullable_type VALUES (0); -- {serverError 469}
+INSERT INTO constraint_on_low_cardinality_nullable_type VALUES (3);
+
+SELECT * FROM constraint_on_low_cardinality_nullable_type;
+
+DROP TABLE constraint_on_low_cardinality_nullable_type;
diff --git a/tests/queries/0_stateless/01721_constraints_constant_expressions.reference b/tests/queries/0_stateless/01721_constraints_constant_expressions.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01721_constraints_constant_expressions.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01721_constraints_constant_expressions.sql b/tests/queries/0_stateless/01721_constraints_constant_expressions.sql
new file mode 100644
index 00000000000..d70c0cd4dc0
--- /dev/null
+++ b/tests/queries/0_stateless/01721_constraints_constant_expressions.sql
@@ -0,0 +1,40 @@
+DROP TABLE IF EXISTS constraint_constant_number_expression;
+CREATE TABLE constraint_constant_number_expression
+(
+    id UInt64,
+    CONSTRAINT `c0` CHECK 1,
+    CONSTRAINT `c1` CHECK 1 < 2,
+    CONSTRAINT `c2` CHECK isNull(cast(NULL, 'Nullable(UInt8)'))
+) ENGINE = TinyLog();
+
+INSERT INTO constraint_constant_number_expression VALUES (1);
+
+SELECT * FROM constraint_constant_number_expression;
+
+DROP TABLE constraint_constant_number_expression;
+
+DROP TABLE IF EXISTS constraint_constant_number_expression_non_uint8;
+CREATE TABLE constraint_constant_number_expression_non_uint8
+(
+    id UInt64,
+    CONSTRAINT `c0` CHECK toUInt64(1)
+) ENGINE = TinyLog();
+
+INSERT INTO constraint_constant_number_expression_non_uint8 VALUES (2); -- {serverError 1}
+
+SELECT * FROM constraint_constant_number_expression_non_uint8;
+
+DROP TABLE constraint_constant_number_expression_non_uint8;
+
+DROP TABLE IF EXISTS constraint_constant_nullable_expression_that_contains_null;
+CREATE TABLE constraint_constant_nullable_expression_that_contains_null
+(
+    id UInt64,
+    CONSTRAINT `c0` CHECK nullIf(1 % 2, 1)
+) ENGINE = TinyLog();
+
+INSERT INTO constraint_constant_nullable_expression_that_contains_null VALUES (3); -- {serverError 469}
+
+SELECT * FROM constraint_constant_nullable_expression_that_contains_null;
+
+DROP TABLE constraint_constant_nullable_expression_that_contains_null;
diff --git a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.reference b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.reference
new file mode 100644
index 00000000000..7c089a2fd05
--- /dev/null
+++ b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.reference
@@ -0,0 +1,23 @@
+		},
+		{
+			"datetime": "2020-12-12",
+			"pipeline": "test-pipeline",
+			"host": "clickhouse-test-host-001.clickhouse.com",
+			"home": "clickhouse",
+			"detail": "clickhouse",
+			"row_number": "999998"
+		},
+		{
+			"datetime": "2020-12-12",
+			"pipeline": "test-pipeline",
+			"host": "clickhouse-test-host-001.clickhouse.com",
+			"home": "clickhouse",
+			"detail": "clickhouse",
+			"row_number": "999999"
+		}
+	],
+
+	"rows": 1000000,
+
+	"rows_before_limit_at_least": 1048080,
+
diff --git a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh
new file mode 100755
index 00000000000..a187d778fdb
--- /dev/null
+++ b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br'              "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | brotli -d | tail -n30 | head -n23
diff --git a/tests/queries/0_stateless/01732_alters_bad_conversions.reference b/tests/queries/0_stateless/01732_alters_bad_conversions.reference
new file mode 100644
index 00000000000..5f570c78579
--- /dev/null
+++ b/tests/queries/0_stateless/01732_alters_bad_conversions.reference
@@ -0,0 +1,4 @@
+CREATE TABLE default.bad_conversions\n(\n    `a` UInt32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
+0
+CREATE TABLE default.bad_conversions_2\n(\n    `e` Enum8(\'foo\' = 1, \'bar\' = 2)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
+0
diff --git a/tests/queries/0_stateless/01732_alters_bad_conversions.sql b/tests/queries/0_stateless/01732_alters_bad_conversions.sql
new file mode 100644
index 00000000000..27da5242368
--- /dev/null
+++ b/tests/queries/0_stateless/01732_alters_bad_conversions.sql
@@ -0,0 +1,17 @@
+DROP TABLE IF EXISTS bad_conversions;
+DROP TABLE IF EXISTS bad_conversions_2;
+
+CREATE TABLE bad_conversions (a UInt32) ENGINE = MergeTree ORDER BY tuple();
+INSERT INTO bad_conversions VALUES (1);
+ALTER TABLE bad_conversions MODIFY COLUMN a Array(String); -- { serverError 53 }
+SHOW CREATE TABLE bad_conversions;
+SELECT count() FROM system.mutations WHERE table = 'bad_conversions' AND database = currentDatabase();
+
+CREATE TABLE bad_conversions_2 (e Enum('foo' = 1, 'bar' = 2)) ENGINE = MergeTree ORDER BY tuple();
+INSERT INTO bad_conversions_2 VALUES (1);
+ALTER TABLE bad_conversions_2 MODIFY COLUMN e Enum('bar' = 1, 'foo' = 2); -- { serverError 70 }
+SHOW CREATE TABLE bad_conversions_2;
+SELECT count() FROM system.mutations WHERE table = 'bad_conversions_2' AND database = currentDatabase();
+
+DROP TABLE IF EXISTS bad_conversions;
+DROP TABLE IF EXISTS bad_conversions_2;
diff --git a/tests/queries/0_stateless/01732_explain_syntax_union_query.reference b/tests/queries/0_stateless/01732_explain_syntax_union_query.reference
new file mode 100644
index 00000000000..fe5eb01a7ed
--- /dev/null
+++ b/tests/queries/0_stateless/01732_explain_syntax_union_query.reference
@@ -0,0 +1,66 @@
+SELECT 1
+UNION ALL
+SELECT 1
+UNION ALL
+SELECT 1
+UNION ALL
+SELECT 1
+UNION ALL
+SELECT 1
+ 
+SELECT 1
+UNION ALL
+(
+    SELECT 1
+    UNION DISTINCT
+    SELECT 1
+    UNION DISTINCT
+    SELECT 1
+)
+UNION ALL
+SELECT 1
+ 
+SELECT x
+FROM 
+(
+    SELECT 1 AS x
+    UNION ALL
+    (
+        SELECT 1
+        UNION DISTINCT
+        SELECT 1
+        UNION DISTINCT
+        SELECT 1
+    )
+    UNION ALL
+    SELECT 1
+)
+ 
+SELECT x
+FROM 
+(
+    SELECT 1 AS x
+    UNION ALL
+    SELECT 1
+    UNION ALL
+    SELECT 1
+)
+ 
+SELECT 1
+UNION DISTINCT
+SELECT 1
+UNION DISTINCT
+SELECT 1
+ 
+SELECT 1
+ 
+
+(
+    SELECT 1
+    UNION DISTINCT
+    SELECT 1
+    UNION DISTINCT
+    SELECT 1
+)
+UNION ALL
+SELECT 1
diff --git a/tests/queries/0_stateless/01732_explain_syntax_union_query.sql b/tests/queries/0_stateless/01732_explain_syntax_union_query.sql
new file mode 100644
index 00000000000..0dd1e19e765
--- /dev/null
+++ b/tests/queries/0_stateless/01732_explain_syntax_union_query.sql
@@ -0,0 +1,86 @@
+EXPLAIN SYNTAX
+SELECT 1
+UNION ALL
+(
+    SELECT 1
+    UNION ALL
+    (
+        SELECT 1
+        UNION ALL
+        SELECT 1
+    )
+    UNION ALL
+    SELECT 1
+);
+
+SELECT ' ';
+
+EXPLAIN SYNTAX
+SELECT 1
+UNION ALL
+(
+    SELECT 1
+    UNION DISTINCT
+    (
+        SELECT 1
+        UNION ALL
+        SELECT 1
+    )
+    UNION ALL
+    SELECT 1
+);
+
+SELECT ' ';
+
+EXPLAIN SYNTAX
+SELECT x
+FROM
+(
+    SELECT 1 AS x
+    UNION ALL
+    (
+        SELECT 1
+        UNION DISTINCT
+        (
+            SELECT 1
+            UNION ALL
+            SELECT 1
+        )
+        UNION ALL
+        SELECT 1
+    )
+);
+
+SELECT ' ';
+
+EXPLAIN SYNTAX
+SELECT x
+FROM 
+(
+    SELECT 1 AS x
+    UNION ALL
+    (
+        SELECT 1
+        UNION ALL
+        SELECT 1
+    )
+);
+
+SELECT ' ';
+
+EXPLAIN SYNTAX
+SELECT 1
+UNION ALL
+SELECT 1
+UNION DISTINCT
+SELECT 1;
+
+SELECT ' ';
+
+EXPLAIN SYNTAX
+(((((((((((((((SELECT 1)))))))))))))));
+
+SELECT ' ';
+
+EXPLAIN SYNTAX
+(((((((((((((((SELECT 1 UNION DISTINCT SELECT 1))) UNION DISTINCT SELECT 1)))) UNION ALL SELECT 1))))))));
diff --git a/tests/queries/0_stateless/01732_race_condition_storage_join_long.reference b/tests/queries/0_stateless/01732_race_condition_storage_join_long.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01732_race_condition_storage_join_long.sh b/tests/queries/0_stateless/01732_race_condition_storage_join_long.sh
new file mode 100755
index 00000000000..b7dd76760d4
--- /dev/null
+++ b/tests/queries/0_stateless/01732_race_condition_storage_join_long.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+unset CLICKHOUSE_LOG_COMMENT
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -o errexit
+set -o pipefail
+
+echo "
+	DROP TABLE IF EXISTS storage_join_race;
+	CREATE TABLE storage_join_race (x UInt64, y UInt64) Engine = Join(ALL, FULL, x);
+" | $CLICKHOUSE_CLIENT -n
+
+function read_thread_big()
+{
+    while true; do 
+        echo "
+            SELECT * FROM ( SELECT number AS x FROM numbers(100000) ) AS t1 ALL FULL JOIN storage_join_race USING (x) FORMAT Null;
+        " | $CLICKHOUSE_CLIENT -n
+    done
+}
+
+function read_thread_small()
+{
+    while true; do 
+        echo "
+            SELECT * FROM ( SELECT number AS x FROM numbers(10) ) AS t1 ALL FULL JOIN storage_join_race USING (x) FORMAT Null;
+        " | $CLICKHOUSE_CLIENT -n
+    done
+}
+
+function read_thread_select()
+{
+    while true; do
+        echo "
+            SELECT * FROM storage_join_race FORMAT Null;
+        " | $CLICKHOUSE_CLIENT -n
+    done
+}
+
+# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
+export -f read_thread_big;
+export -f read_thread_small;
+export -f read_thread_select;
+
+TIMEOUT=20
+
+timeout $TIMEOUT bash -c read_thread_big 2> /dev/null &
+timeout $TIMEOUT bash -c read_thread_small 2> /dev/null &
+timeout $TIMEOUT bash -c read_thread_select 2> /dev/null &
+
+echo "
+    INSERT INTO storage_join_race SELECT number AS x, number AS y FROM numbers (10000000);
+" | $CLICKHOUSE_CLIENT -n
+
+wait
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE storage_join_race"
diff --git a/tests/queries/0_stateless/01735_join_get_low_card_fix.reference b/tests/queries/0_stateless/01735_join_get_low_card_fix.reference
new file mode 100644
index 00000000000..0b20aead00e
--- /dev/null
+++ b/tests/queries/0_stateless/01735_join_get_low_card_fix.reference
@@ -0,0 +1 @@
+yyy
diff --git a/tests/queries/0_stateless/01735_join_get_low_card_fix.sql b/tests/queries/0_stateless/01735_join_get_low_card_fix.sql
new file mode 100644
index 00000000000..bdc979bc11e
--- /dev/null
+++ b/tests/queries/0_stateless/01735_join_get_low_card_fix.sql
@@ -0,0 +1,9 @@
+drop table if exists join_tbl;
+
+create table join_tbl (`id` String, `name` String) engine Join(any, left, id);
+
+insert into join_tbl values ('xxx', 'yyy');
+
+select joinGet('join_tbl', 'name', toLowCardinality('xxx'));
+
+drop table if exists join_tbl;
diff --git a/tests/queries/0_stateless/01735_to_datetime64.reference b/tests/queries/0_stateless/01735_to_datetime64.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01735_to_datetime64.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01735_to_datetime64.sql b/tests/queries/0_stateless/01735_to_datetime64.sql
new file mode 100644
index 00000000000..fe4eb521148
--- /dev/null
+++ b/tests/queries/0_stateless/01735_to_datetime64.sql
@@ -0,0 +1 @@
+SELECT toDate(toDateTime64(today(), 0, 'UTC')) = toDate(toDateTime(today(), 'UTC'));
diff --git a/tests/queries/0_stateless/01736_null_as_default.reference b/tests/queries/0_stateless/01736_null_as_default.reference
new file mode 100644
index 00000000000..baf83eb21d7
--- /dev/null
+++ b/tests/queries/0_stateless/01736_null_as_default.reference
@@ -0,0 +1,2 @@
+A
+\N
diff --git a/tests/queries/0_stateless/01736_null_as_default.sql b/tests/queries/0_stateless/01736_null_as_default.sql
new file mode 100644
index 00000000000..f9a4bc69acf
--- /dev/null
+++ b/tests/queries/0_stateless/01736_null_as_default.sql
@@ -0,0 +1,5 @@
+drop table if exists test_num;
+create table test_enum (c Nullable(Enum16('A' = 1, 'B' = 2))) engine Log;
+insert into test_enum values (1), (NULL);
+select * from test_enum;
+drop table if exists test_num;
diff --git a/tests/queries/0_stateless/01744_tuple_cast_to_map_bugfix.reference b/tests/queries/0_stateless/01744_tuple_cast_to_map_bugfix.reference
new file mode 100644
index 00000000000..c5143f7c4e9
--- /dev/null
+++ b/tests/queries/0_stateless/01744_tuple_cast_to_map_bugfix.reference
@@ -0,0 +1,3 @@
+{1:'Ready',2:'Steady',3:'Go'}
+{1:'Ready',2:'Steady',3:'Go'}
+{1:'Ready',2:'Steady',3:'Go'}
diff --git a/tests/queries/0_stateless/01744_tuple_cast_to_map_bugfix.sql b/tests/queries/0_stateless/01744_tuple_cast_to_map_bugfix.sql
new file mode 100644
index 00000000000..c5c50c5b039
--- /dev/null
+++ b/tests/queries/0_stateless/01744_tuple_cast_to_map_bugfix.sql
@@ -0,0 +1,3 @@
+SELECT CAST((['1', '2', '3'], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
+SELECT CAST((['1', '2', '3'], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
+SELECT CAST((['1', '2', '3'], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
diff --git a/tests/queries/0_stateless/01745_alter_delete_view.reference b/tests/queries/0_stateless/01745_alter_delete_view.reference
new file mode 100644
index 00000000000..dc3ab50ab0d
--- /dev/null
+++ b/tests/queries/0_stateless/01745_alter_delete_view.reference
@@ -0,0 +1,4 @@
+1	1
+2	1
+1	1
+2	1
diff --git a/tests/queries/0_stateless/01745_alter_delete_view.sql b/tests/queries/0_stateless/01745_alter_delete_view.sql
new file mode 100644
index 00000000000..c242f1be63e
--- /dev/null
+++ b/tests/queries/0_stateless/01745_alter_delete_view.sql
@@ -0,0 +1,28 @@
+DROP VIEW IF EXISTS test_view;
+DROP TABLE IF EXISTS test_table;
+
+CREATE TABLE test_table
+(
+    f1 Int32,
+    f2 Int32,
+    pk Int32
+)
+ENGINE = MergeTree()
+ORDER BY f1
+PARTITION BY pk;
+
+CREATE VIEW test_view AS
+SELECT f1, f2
+FROM test_table
+WHERE pk = 2;
+
+INSERT INTO test_table (f1, f2, pk) VALUES (1,1,1), (1,1,2), (2,1,1), (2,1,2);
+
+SELECT * FROM test_view ORDER BY f1, f2;
+
+ALTER TABLE test_view DELETE WHERE pk = 2; --{serverError 48}
+
+SELECT * FROM test_view ORDER BY f1, f2;
+
+DROP VIEW IF EXISTS test_view;
+DROP TABLE IF EXISTS test_table;
diff --git a/tests/queries/0_stateless/01746_extract_text_from_html.reference b/tests/queries/0_stateless/01746_extract_text_from_html.reference
new file mode 100644
index 00000000000..ee05e085ba4
--- /dev/null
+++ b/tests/queries/0_stateless/01746_extract_text_from_html.reference
@@ -0,0 +1,120 @@
+-- { echo }
+
+SELECT extractTextFromHTML('');
+
+SELECT extractTextFromHTML(' ');
+
+SELECT extractTextFromHTML('  ');
+
+SELECT extractTextFromHTML('Hello');
+Hello
+SELECT extractTextFromHTML('Hello, world');
+Hello, world
+SELECT extractTextFromHTML('Hello,  world');
+Hello, world
+SELECT extractTextFromHTML(' Hello,  world');
+Hello, world
+SELECT extractTextFromHTML(' Hello,  world ');
+Hello, world
+SELECT extractTextFromHTML(' \t Hello,\rworld \n ');
+Hello, world
+SELECT extractTextFromHTML('Hello<world');
+Hello
+SELECT extractTextFromHTML('Hello < world');
+Hello
+SELECT extractTextFromHTML('Hello > world');
+Hello > world
+SELECT extractTextFromHTML('Hello<world>');
+Hello
+SELECT extractTextFromHTML('Hello<>world');
+Hello world
+SELECT extractTextFromHTML('Hello<!>world');
+Hello world
+SELECT extractTextFromHTML('Hello<!->world');
+Hello world
+SELECT extractTextFromHTML('Hello<!-->world');
+Hello world
+SELECT extractTextFromHTML('Hello<!--->world');
+Hello world
+SELECT extractTextFromHTML('Hello<!---->world');
+Hello world
+SELECT extractTextFromHTML('Hello <!-- --> World');
+Hello World
+SELECT extractTextFromHTML('Hello<!-- --> World');
+Hello World
+SELECT extractTextFromHTML('Hello<!-- -->World');
+Hello World
+SELECT extractTextFromHTML('Hello <!-- -->World');
+Hello World
+SELECT extractTextFromHTML('Hello <u> World</u>');
+Hello World
+SELECT extractTextFromHTML('Hello <u>World</u>');
+Hello World
+SELECT extractTextFromHTML('Hello<u>World</u>');
+Hello World
+SELECT extractTextFromHTML('Hello<u> World</u>');
+Hello World
+SELECT extractTextFromHTML('<![CDATA[ \t Hello,\rworld \n ]]>');
+ \t Hello,\rworld \n 
+SELECT extractTextFromHTML('Hello <![CDATA[Hello\tworld]]> world!');
+HelloHello\tworld world!
+SELECT extractTextFromHTML('Hello<![CDATA[Hello\tworld]]>world!');
+HelloHello\tworld world!
+SELECT extractTextFromHTML('Hello <![CDATA[Hello <b>world</b>]]> world!');
+HelloHello <b>world</b> world!
+SELECT extractTextFromHTML('<![CDATA[<sender>John Smith</sender>]]>');
+<sender>John Smith</sender>
+SELECT extractTextFromHTML('<![CDATA[<sender>John <![CDATA[Smith</sender>]]>');
+<sender>John <![CDATA[Smith</sender>
+SELECT extractTextFromHTML('<![CDATA[<sender>John <![CDATA[]]>Smith</sender>]]>');
+<sender>John <![CDATA[ Smith ]]>
+SELECT extractTextFromHTML('<![CDATA[<sender>John ]]><![CDATA[Smith</sender>]]>');
+<sender>John Smith</sender>
+SELECT extractTextFromHTML('<![CDATA[<sender>John ]]> <![CDATA[Smith</sender>]]>');
+<sender>John Smith</sender>
+SELECT extractTextFromHTML('<![CDATA[<sender>John]]> <![CDATA[Smith</sender>]]>');
+<sender>JohnSmith</sender>
+SELECT extractTextFromHTML('<![CDATA[<sender>John ]]>]]><![CDATA[Smith</sender>]]>');
+<sender>John ]]>Smith</sender>
+SELECT extractTextFromHTML('Hello<script>World</script> goodbye');
+Hello goodbye
+SELECT extractTextFromHTML('Hello<script >World</script> goodbye');
+Hello goodbye
+SELECT extractTextFromHTML('Hello<scripta>World</scripta> goodbye');
+Hello World goodbye
+SELECT extractTextFromHTML('Hello<script type="text/javascript">World</script> goodbye');
+Hello goodbye
+SELECT extractTextFromHTML('Hello<style type="text/css">World</style> goodbye');
+Hello goodbye
+SELECT extractTextFromHTML('Hello<script:p>World</script:p> goodbye');
+Hello World goodbye
+SELECT extractTextFromHTML('Hello<script:p type="text/javascript">World</script:p> goodbye');
+Hello World goodbye
+SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </style> goodbye');
+Hello goodbye
+SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </style \n > goodbye');
+Hello goodbye
+SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </ style> goodbye');
+Hello
+SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </stylea> goodbye');
+Hello
+SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDATA[</style>]]> </stylea> goodbye');
+Hello
+SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDATA[</style>]]> </style> goodbye');
+Hello goodbye
+SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDAT[</style>]]> </style> goodbye');
+Hello ]]> goodbye
+SELECT extractTextFromHTML('Hello<style type="text/css">World <![endif]--> </style> goodbye');
+Hello goodbye
+SELECT extractTextFromHTML('Hello<style type="text/css">World <script>abc</script> </stylea> goodbye');
+Hello
+SELECT extractTextFromHTML('Hello<style type="text/css">World <script>abc</script> </style> goodbye');
+Hello goodbye
+SELECT extractTextFromHTML('<![CDATA[]]]]><![CDATA[>]]>');
+]]>
+SELECT extractTextFromHTML('
+<img src="pictures/power.png" style="margin-bottom: -30px;" />
+<br><span style="padding-right: 10px; font-size: 10px;">xkcd.com</span>
+</div>
+');
+xkcd.com
diff --git a/tests/queries/0_stateless/01746_extract_text_from_html.sql b/tests/queries/0_stateless/01746_extract_text_from_html.sql
new file mode 100644
index 00000000000..b4ccc775bef
--- /dev/null
+++ b/tests/queries/0_stateless/01746_extract_text_from_html.sql
@@ -0,0 +1,72 @@
+-- { echo }
+
+SELECT extractTextFromHTML('');
+SELECT extractTextFromHTML(' ');
+SELECT extractTextFromHTML('  ');
+SELECT extractTextFromHTML('Hello');
+SELECT extractTextFromHTML('Hello, world');
+SELECT extractTextFromHTML('Hello,  world');
+SELECT extractTextFromHTML(' Hello,  world');
+SELECT extractTextFromHTML(' Hello,  world ');
+SELECT extractTextFromHTML(' \t Hello,\rworld \n ');
+
+SELECT extractTextFromHTML('Hello<world');
+SELECT extractTextFromHTML('Hello < world');
+SELECT extractTextFromHTML('Hello > world');
+SELECT extractTextFromHTML('Hello<world>');
+SELECT extractTextFromHTML('Hello<>world');
+SELECT extractTextFromHTML('Hello<!>world');
+SELECT extractTextFromHTML('Hello<!->world');
+SELECT extractTextFromHTML('Hello<!-->world');
+SELECT extractTextFromHTML('Hello<!--->world');
+SELECT extractTextFromHTML('Hello<!---->world');
+
+SELECT extractTextFromHTML('Hello <!-- --> World');
+SELECT extractTextFromHTML('Hello<!-- --> World');
+SELECT extractTextFromHTML('Hello<!-- -->World');
+SELECT extractTextFromHTML('Hello <!-- -->World');
+SELECT extractTextFromHTML('Hello <u> World</u>');
+SELECT extractTextFromHTML('Hello <u>World</u>');
+SELECT extractTextFromHTML('Hello<u>World</u>');
+SELECT extractTextFromHTML('Hello<u> World</u>');
+
+SELECT extractTextFromHTML('<![CDATA[ \t Hello,\rworld \n ]]>');
+SELECT extractTextFromHTML('Hello <![CDATA[Hello\tworld]]> world!');
+SELECT extractTextFromHTML('Hello<![CDATA[Hello\tworld]]>world!');
+
+SELECT extractTextFromHTML('Hello <![CDATA[Hello <b>world</b>]]> world!');
+SELECT extractTextFromHTML('<![CDATA[<sender>John Smith</sender>]]>');
+SELECT extractTextFromHTML('<![CDATA[<sender>John <![CDATA[Smith</sender>]]>');
+SELECT extractTextFromHTML('<![CDATA[<sender>John <![CDATA[]]>Smith</sender>]]>');
+SELECT extractTextFromHTML('<![CDATA[<sender>John ]]><![CDATA[Smith</sender>]]>');
+SELECT extractTextFromHTML('<![CDATA[<sender>John ]]> <![CDATA[Smith</sender>]]>');
+SELECT extractTextFromHTML('<![CDATA[<sender>John]]> <![CDATA[Smith</sender>]]>');
+SELECT extractTextFromHTML('<![CDATA[<sender>John ]]>]]><![CDATA[Smith</sender>]]>');
+
+SELECT extractTextFromHTML('Hello<script>World</script> goodbye');
+SELECT extractTextFromHTML('Hello<script >World</script> goodbye');
+SELECT extractTextFromHTML('Hello<scripta>World</scripta> goodbye');
+SELECT extractTextFromHTML('Hello<script type="text/javascript">World</script> goodbye');
+SELECT extractTextFromHTML('Hello<style type="text/css">World</style> goodbye');
+SELECT extractTextFromHTML('Hello<script:p>World</script:p> goodbye');
+SELECT extractTextFromHTML('Hello<script:p type="text/javascript">World</script:p> goodbye');
+
+SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </style> goodbye');
+SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </style \n > goodbye');
+SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </ style> goodbye');
+SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </stylea> goodbye');
+
+SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDATA[</style>]]> </stylea> goodbye');
+SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDATA[</style>]]> </style> goodbye');
+SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDAT[</style>]]> </style> goodbye');
+SELECT extractTextFromHTML('Hello<style type="text/css">World <![endif]--> </style> goodbye');
+SELECT extractTextFromHTML('Hello<style type="text/css">World <script>abc</script> </stylea> goodbye');
+SELECT extractTextFromHTML('Hello<style type="text/css">World <script>abc</script> </style> goodbye');
+
+SELECT extractTextFromHTML('<![CDATA[]]]]><![CDATA[>]]>');
+
+SELECT extractTextFromHTML('
+<img src="pictures/power.png" style="margin-bottom: -30px;" />
+<br><span style="padding-right: 10px; font-size: 10px;">xkcd.com</span>
+</div>
+');
diff --git a/tests/queries/0_stateless/01746_forbid_drop_column_referenced_by_mv.reference b/tests/queries/0_stateless/01746_forbid_drop_column_referenced_by_mv.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01746_forbid_drop_column_referenced_by_mv.sql b/tests/queries/0_stateless/01746_forbid_drop_column_referenced_by_mv.sql
new file mode 100644
index 00000000000..f084cae7780
--- /dev/null
+++ b/tests/queries/0_stateless/01746_forbid_drop_column_referenced_by_mv.sql
@@ -0,0 +1,172 @@
+-- MergeTree
+DROP TABLE IF EXISTS `01746_merge_tree`;
+CREATE TABLE `01746_merge_tree`
+(
+    `n1` Int8,
+    `n2` Int8,
+    `n3` Int8,
+    `n4` Int8
+)
+ENGINE = MergeTree
+ORDER BY n1;
+
+DROP TABLE IF EXISTS `01746_merge_tree_mv`;
+CREATE MATERIALIZED VIEW `01746_merge_tree_mv`
+ENGINE = Memory AS
+SELECT
+    n2,
+    n3
+FROM `01746_merge_tree`;
+
+ALTER TABLE `01746_merge_tree`
+    DROP COLUMN n3;  -- { serverError 524 }
+
+ALTER TABLE `01746_merge_tree`
+    DROP COLUMN n2;  -- { serverError 524 }
+
+-- ok
+ALTER TABLE `01746_merge_tree`
+    DROP COLUMN n4;
+
+DROP TABLE `01746_merge_tree`;
+DROP TABLE `01746_merge_tree_mv`;
+
+-- Null 
+DROP TABLE IF EXISTS `01746_null`;
+CREATE TABLE `01746_null`
+(
+    `n1` Int8,
+    `n2` Int8,
+    `n3` Int8
+)
+ENGINE = Null;
+
+DROP TABLE IF EXISTS `01746_null_mv`;
+CREATE MATERIALIZED VIEW `01746_null_mv`
+ENGINE = Memory AS
+SELECT
+    n1,
+    n2
+FROM `01746_null`;
+
+ALTER TABLE `01746_null`
+    DROP COLUMN n1; -- { serverError 524 }
+
+ALTER TABLE `01746_null`
+    DROP COLUMN n2; -- { serverError 524 }
+
+-- ok
+ALTER TABLE `01746_null`
+    DROP COLUMN n3;
+
+DROP TABLE `01746_null`;
+DROP TABLE `01746_null_mv`;
+
+-- Distributed
+
+DROP TABLE IF EXISTS `01746_local`;
+CREATE TABLE `01746_local`
+(
+    `n1` Int8,
+    `n2` Int8,
+    `n3` Int8
+)
+ENGINE = Memory;
+
+DROP TABLE IF EXISTS `01746_dist`;
+CREATE TABLE `01746_dist` AS `01746_local`
+ENGINE = Distributed('test_shard_localhost', currentDatabase(), `01746_local`, rand());
+
+DROP TABLE IF EXISTS `01746_dist_mv`;
+CREATE MATERIALIZED VIEW `01746_dist_mv`
+ENGINE = Memory AS
+SELECT
+    n1,
+    n2
+FROM `01746_dist`;
+
+ALTER TABLE `01746_dist`
+    DROP COLUMN n1; -- { serverError 524 }
+
+ALTER TABLE `01746_dist`
+    DROP COLUMN n2; -- { serverError 524 }
+
+-- ok
+ALTER TABLE `01746_dist`
+    DROP COLUMN n3;
+
+DROP TABLE `01746_local`;
+DROP TABLE `01746_dist`;
+DROP TABLE `01746_dist_mv`;
+
+-- Merge
+DROP TABLE IF EXISTS `01746_merge_t`;
+CREATE TABLE `01746_merge_t`
+(
+    `n1` Int8,
+    `n2` Int8,
+    `n3` Int8
+)
+ENGINE = Memory;
+
+DROP TABLE IF EXISTS `01746_merge`;
+CREATE TABLE `01746_merge` AS `01746_merge_t`
+ENGINE = Merge(currentDatabase(), '01746_merge_t');
+
+DROP TABLE IF EXISTS `01746_merge_mv`;
+CREATE MATERIALIZED VIEW `01746_merge_mv`
+ENGINE = Memory AS
+SELECT
+    n1,
+    n2
+FROM `01746_merge`;
+
+ALTER TABLE `01746_merge`
+    DROP COLUMN n1; -- { serverError 524 }
+
+ALTER TABLE `01746_merge`
+    DROP COLUMN n2; -- { serverError 524 }
+
+-- ok
+ALTER TABLE `01746_merge`
+    DROP COLUMN n3;
+
+DROP TABLE `01746_merge_t`;
+DROP TABLE `01746_merge`;
+DROP TABLE `01746_merge_mv`;
+
+-- Buffer
+DROP TABLE IF EXISTS `01746_buffer_t`;
+CREATE TABLE `01746_buffer_t`
+(
+    `n1` Int8,
+    `n2` Int8,
+    `n3` Int8
+)
+ENGINE = Memory;
+
+DROP TABLE IF EXISTS `01746_buffer`;
+CREATE TABLE `01746_buffer` AS `01746_buffer_t`
+ENGINE = Buffer(currentDatabase(), `01746_buffer_t`, 16, 10, 100, 10000, 1000000, 10000000, 100000000);
+
+DROP TABLE IF EXISTS `01746_buffer_mv`;
+CREATE MATERIALIZED VIEW `01746_buffer_mv`
+ENGINE = Memory AS
+SELECT
+    n1,
+    n2
+FROM `01746_buffer`;
+
+ALTER TABLE `01746_buffer`
+    DROP COLUMN n1; -- { serverError 524 }
+
+ALTER TABLE `01746_buffer`
+    DROP COLUMN n2; -- { serverError 524 }
+
+-- ok
+ALTER TABLE `01746_buffer`
+    DROP COLUMN n3;
+
+DROP TABLE `01746_buffer_t`;
+DROP TABLE `01746_buffer`;
+DROP TABLE `01746_buffer_mv`;
diff --git a/tests/queries/0_stateless/01746_lc_values_format_bug.reference b/tests/queries/0_stateless/01746_lc_values_format_bug.reference
new file mode 100644
index 00000000000..78981922613
--- /dev/null
+++ b/tests/queries/0_stateless/01746_lc_values_format_bug.reference
@@ -0,0 +1 @@
+a
diff --git a/tests/queries/0_stateless/01746_lc_values_format_bug.sql b/tests/queries/0_stateless/01746_lc_values_format_bug.sql
new file mode 100644
index 00000000000..6717b9ae5e3
--- /dev/null
+++ b/tests/queries/0_stateless/01746_lc_values_format_bug.sql
@@ -0,0 +1,14 @@
+drop table if exists lc_test;
+
+CREATE TABLE lc_test
+(
+    `id` LowCardinality(String)
+)
+ENGINE = MergeTree
+PARTITION BY tuple()
+ORDER BY id;
+
+insert into lc_test values (toString('a'));
+
+select id from lc_test;
+drop table if exists lc_test;
diff --git a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.reference b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.reference
new file mode 100644
index 00000000000..7c089a2fd05
--- /dev/null
+++ b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.reference
@@ -0,0 +1,23 @@
+		},
+		{
+			"datetime": "2020-12-12",
+			"pipeline": "test-pipeline",
+			"host": "clickhouse-test-host-001.clickhouse.com",
+			"home": "clickhouse",
+			"detail": "clickhouse",
+			"row_number": "999998"
+		},
+		{
+			"datetime": "2020-12-12",
+			"pipeline": "test-pipeline",
+			"host": "clickhouse-test-host-001.clickhouse.com",
+			"home": "clickhouse",
+			"detail": "clickhouse",
+			"row_number": "999999"
+		}
+	],
+
+	"rows": 1000000,
+
+	"rows_before_limit_at_least": 1048080,
+
diff --git a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh
new file mode 100755
index 00000000000..e663b329660
--- /dev/null
+++ b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip'              "${CLICKHOUSE_URL}&enable_http_compression=1&http_zlib_compression_level=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | gzip -d | tail -n30 | head -n23
diff --git a/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.reference b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.reference
new file mode 100644
index 00000000000..7c089a2fd05
--- /dev/null
+++ b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.reference
@@ -0,0 +1,23 @@
+		},
+		{
+			"datetime": "2020-12-12",
+			"pipeline": "test-pipeline",
+			"host": "clickhouse-test-host-001.clickhouse.com",
+			"home": "clickhouse",
+			"detail": "clickhouse",
+			"row_number": "999998"
+		},
+		{
+			"datetime": "2020-12-12",
+			"pipeline": "test-pipeline",
+			"host": "clickhouse-test-host-001.clickhouse.com",
+			"home": "clickhouse",
+			"detail": "clickhouse",
+			"row_number": "999999"
+		}
+	],
+
+	"rows": 1000000,
+
+	"rows_before_limit_at_least": 1048080,
+
diff --git a/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh
new file mode 100755
index 00000000000..f520a21e9b3
--- /dev/null
+++ b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: zstd'              "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | zstd -d | tail -n30 | head -n23
diff --git a/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.reference b/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.reference
new file mode 100644
index 00000000000..02359f0f98b
--- /dev/null
+++ b/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.reference
@@ -0,0 +1,6 @@
+IU	lada	2101	1970-04-19 15:00:00
+PS	jeep	Grand Cherokee	2005-10-03 15:00:00
+PS	jeep	Grand Cherokee	2005-10-03 15:00:00
+IU	lada	2101	1970-04-19 15:00:00
+PS	jeep	Grand Cherokee	2005-10-03 15:00:00
+PS	jeep	Grand Cherokee	2005-10-03 15:00:00
diff --git a/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.sql b/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.sql
new file mode 100644
index 00000000000..759c8ba3a0b
--- /dev/null
+++ b/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.sql
@@ -0,0 +1,63 @@
+DROP TABLE IF EXISTS report;
+
+CREATE TABLE report
+(
+    `product` Enum8('IU' = 1, 'WS' = 2),
+    `machine` String,
+    `branch` String,
+    `generated_time` DateTime
+)
+ENGINE = MergeTree
+PARTITION BY (product, toYYYYMM(generated_time))
+ORDER BY (product, machine, branch, generated_time);
+
+INSERT INTO report VALUES ('IU', 'lada', '2101', toDateTime('1970-04-19 15:00:00'));
+
+SELECT * FROM report  WHERE product = 'IU';
+
+ALTER TABLE report MODIFY COLUMN product Enum8('IU' = 1, 'WS' = 2, 'PS' = 3);
+
+SELECT * FROM report WHERE product = 'PS';
+
+INSERT INTO report VALUES ('PS', 'jeep', 'Grand Cherokee', toDateTime('2005-10-03 15:00:00'));
+
+SELECT * FROM report WHERE product = 'PS';
+
+DETACH TABLE report;
+ATTACH TABLE report;
+
+SELECT * FROM report WHERE product = 'PS';
+
+DROP TABLE IF EXISTS report;
+
+DROP TABLE IF EXISTS replicated_report;
+
+CREATE TABLE replicated_report
+(
+    `product` Enum8('IU' = 1, 'WS' = 2),
+    `machine` String,
+    `branch` String,
+    `generated_time` DateTime
+)
+ENGINE = ReplicatedMergeTree('/clickhouse/01747_alter_partition_key/t', '1')
+PARTITION BY (product, toYYYYMM(generated_time))
+ORDER BY (product, machine, branch, generated_time);
+
+INSERT INTO replicated_report VALUES ('IU', 'lada', '2101', toDateTime('1970-04-19 15:00:00'));
+
+SELECT * FROM replicated_report  WHERE product = 'IU';
+
+ALTER TABLE replicated_report MODIFY COLUMN product Enum8('IU' = 1, 'WS' = 2, 'PS' = 3) SETTINGS replication_alter_partitions_sync=2;
+
+SELECT * FROM replicated_report WHERE product = 'PS';
+
+INSERT INTO replicated_report VALUES ('PS', 'jeep', 'Grand Cherokee', toDateTime('2005-10-03 15:00:00'));
+
+SELECT * FROM replicated_report WHERE product = 'PS';
+
+DETACH TABLE replicated_report;
+ATTACH TABLE replicated_report;
+
+SELECT * FROM replicated_report WHERE product = 'PS';
+
+DROP TABLE IF EXISTS replicated_report;
diff --git a/tests/queries/0_stateless/01747_join_view_filter_dictionary.reference b/tests/queries/0_stateless/01747_join_view_filter_dictionary.reference
new file mode 100644
index 00000000000..24ed2375adf
--- /dev/null
+++ b/tests/queries/0_stateless/01747_join_view_filter_dictionary.reference
@@ -0,0 +1,2 @@
+name	test	33	1.2
+33
diff --git a/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql
new file mode 100644
index 00000000000..b43fa336485
--- /dev/null
+++ b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql
@@ -0,0 +1,49 @@
+drop table if exists summing_table01747;
+drop view if exists rates01747;
+drop view if exists agg_view01747;
+drop table if exists dictst01747;
+drop DICTIONARY if exists default.dict01747;
+
+CREATE TABLE summing_table01747
+ (
+    some_name               String,
+    user_id                 UInt64,
+    amount                  Int64,
+    currency                String
+ )
+ENGINE = SummingMergeTree()
+ORDER BY (some_name);
+
+CREATE VIEW rates01747 AS
+   SELECT 'USD' as from_currency, 'EUR' as to_currency, 1.2 as rates01747;
+
+insert into summing_table01747 values ('name', 2, 20, 'USD'),('name', 1, 10, 'USD');
+
+create table dictst01747(some_name String, field1 String, field2 UInt8) Engine = Memory
+as select 'name', 'test', 33;
+
+CREATE DICTIONARY default.dict01747 (some_name String, field1 String, field2 UInt8)
+PRIMARY KEY some_name SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 
+TABLE dictst01747 DB currentDatabase() USER 'default')) 
+LIFETIME(MIN 0 MAX 0) LAYOUT(COMPLEX_KEY_HASHED());
+
+
+CREATE VIEW agg_view01747 AS
+  SELECT
+    summing_table01747.some_name as some_name,
+    dictGet('default.dict01747', 'field1', tuple(some_name)) as field1,
+    dictGet('default.dict01747', 'field2', tuple(some_name)) as field2,
+    rates01747.rates01747 as rates01747
+  FROM summing_table01747
+  ANY LEFT JOIN rates01747
+    ON rates01747.from_currency = summing_table01747.currency;
+
+select * from agg_view01747;
+
+SELECT field2 FROM agg_view01747 WHERE field1 = 'test';
+
+drop table summing_table01747;
+drop view rates01747;
+drop view agg_view01747;
+drop table dictst01747;
+drop DICTIONARY default.dict01747;
diff --git a/tests/queries/0_stateless/01747_transform_empty_arrays.reference b/tests/queries/0_stateless/01747_transform_empty_arrays.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01747_transform_empty_arrays.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01747_transform_empty_arrays.sql b/tests/queries/0_stateless/01747_transform_empty_arrays.sql
new file mode 100644
index 00000000000..cc9a00a4555
--- /dev/null
+++ b/tests/queries/0_stateless/01747_transform_empty_arrays.sql
@@ -0,0 +1,27 @@
+SELECT *
+FROM
+(
+    WITH
+
+        (
+            SELECT groupArray(a)
+            FROM
+            (
+                SELECT 1 AS a
+            )
+        ) AS keys,
+
+        (
+            SELECT groupArray(a)
+            FROM
+            (
+                SELECT 2 AS a
+            )
+        ) AS values
+    SELECT *
+    FROM
+    (
+        SELECT 1 AS a
+    )
+    WHERE transform(a, keys, values, 0)
+) AS wrap;
diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 4e523545938..a823ba38c1c 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -207,3 +207,4 @@
 01702_bitmap_native_integers
 01686_event_time_microseconds_part_log
 01017_uniqCombined_memory_usage
+01747_join_view_filter_dictionary
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_array_3dim.proto
similarity index 100%
rename from tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto
rename to tests/queries/0_stateless/format_schemas/00825_protobuf_format_array_3dim.proto
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_array_of_arrays.proto
similarity index 100%
rename from tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto
rename to tests/queries/0_stateless/format_schemas/00825_protobuf_format_array_of_arrays.proto
diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_enum_mapping.proto
similarity index 100%
rename from tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto
rename to tests/queries/0_stateless/format_schemas/00825_protobuf_format_enum_mapping.proto
diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_map.proto
similarity index 100%
rename from tests/queries/0_stateless/00825_protobuf_format_map.proto
rename to tests/queries/0_stateless/format_schemas/00825_protobuf_format_map.proto
diff --git a/tests/queries/0_stateless/format_schemas/00825_protobuf_format_nested_in_nested.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_nested_in_nested.proto
new file mode 100644
index 00000000000..f9617572f94
--- /dev/null
+++ b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_nested_in_nested.proto
@@ -0,0 +1,11 @@
+syntax = "proto3";
+
+message MessageType {
+  message XType {
+    message YType {
+      int32 z=3;
+    };
+    repeated YType y=2;
+  };
+  repeated XType x=1;
+};
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_nested_optional.proto
similarity index 100%
rename from tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto
rename to tests/queries/0_stateless/format_schemas/00825_protobuf_format_nested_optional.proto
diff --git a/tests/queries/0_stateless/format_schemas/00825_protobuf_format_no_length_delimiter.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_no_length_delimiter.proto
new file mode 100644
index 00000000000..9726d0dede1
--- /dev/null
+++ b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_no_length_delimiter.proto
@@ -0,0 +1,6 @@
+syntax = "proto3";
+
+message Message {
+  int32 x = 1;
+  string str = 2;
+};
diff --git a/tests/queries/0_stateless/00825_protobuf_format.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_persons.proto
similarity index 97%
rename from tests/queries/0_stateless/00825_protobuf_format.proto
rename to tests/queries/0_stateless/format_schemas/00825_protobuf_format_persons.proto
index 0d9bdd83ccd..b588619f488 100644
--- a/tests/queries/0_stateless/00825_protobuf_format.proto
+++ b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_persons.proto
@@ -143,9 +143,3 @@ message StrPerson {
   MeasureUnits measureUnits = 21;
   NestinessA nestiness_a = 22;
 };
-
-message NumberAndSquare
-{
-  uint32 number = 1;
-  uint64 square = 2;
-};
diff --git a/tests/queries/0_stateless/00825_protobuf_format_syntax2.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_persons_syntax2.proto
similarity index 100%
rename from tests/queries/0_stateless/00825_protobuf_format_syntax2.proto
rename to tests/queries/0_stateless/format_schemas/00825_protobuf_format_persons_syntax2.proto
diff --git a/tests/queries/0_stateless/format_schemas/00825_protobuf_format_squares.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_squares.proto
new file mode 100644
index 00000000000..bca321568f3
--- /dev/null
+++ b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_squares.proto
@@ -0,0 +1,6 @@
+syntax = "proto3";
+
+message NumberAndSquare {
+  uint32 number = 1;
+  uint64 square = 2;
+};
diff --git a/tests/queries/0_stateless/format_schemas/00825_protobuf_format_syntax2.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_syntax2.proto
new file mode 100644
index 00000000000..d4abea38d4e
--- /dev/null
+++ b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_syntax2.proto
@@ -0,0 +1,63 @@
+syntax = "proto2";
+
+message Syntax2Person {
+  enum Gender {
+    female = 0;
+    male = 1;
+  };
+
+  enum ZodiacSign {
+    aries = 0;
+    taurus = 1;
+    gemini = 2;
+    cancer = 3;
+    leo = 4;
+    virgo = 5;
+    libra = 6;
+    scorpius = 7;
+    sagittarius = 8;
+    capricorn = 9;
+    aquarius = 10;
+    pisces = 11;
+  };
+
+  required string uuid = 1;
+  required string name = 2;
+  required string surname = 3;
+  required Gender gender = 4;
+  required uint32 birthDate = 5;
+  optional bytes photo = 6;
+  optional string phoneNumber = 7;
+  optional bool isOnline = 8;
+  optional fixed32 visitTime = 9;
+  optional uint32 age = 10;
+  optional ZodiacSign zodiacSign = 11;
+  repeated string songs = 12;
+  repeated uint32 color = 13;
+  optional string hometown = 14 [default='Moscow'];
+  repeated float location = 15 [packed=true];
+  optional double pi = 16;
+  optional double lotteryWin = 17;
+  optional float someRatio = 18;
+  optional float temperature = 19;
+  optional sint64 randomBigNumber = 20;
+  optional group MeasureUnits = 21 {
+    repeated float coef = 1;
+    repeated string unit = 2;
+  };
+  optional group Nestiness = 22
+  {
+    optional group A = 1 {
+      message SubB {
+        optional group C = 1 {
+          optional uint32 d = 1;
+          repeated uint32 e = 2;
+        };
+      };
+      optional SubB b = 100;
+    };
+  };
+  optional string newFieldStr = 23 [default='abc'];
+  optional int32 newFieldInt = 24 [default=-11];
+  optional bool newBool = 25 [default=true];
+};
diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_table_default.proto
similarity index 100%
rename from tests/queries/0_stateless/00825_protobuf_format_table_default.proto
rename to tests/queries/0_stateless/format_schemas/00825_protobuf_format_table_default.proto
diff --git a/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py b/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
index 3ed42f1c820..86c5048c8a3 100755
--- a/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
+++ b/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
@@ -12,18 +12,18 @@ import tempfile
 
 def read_varint(input):
     res = 0
-    shift = 0
+    multiplier = 1
     while True:
         c = input.read(1)
         if len(c) == 0:
             return None
         b = c[0]
         if b < 0x80:
-            res += b << shift
+            res += b * multiplier
             break
         b -= 0x80
-        res += b << shift
-        shift = shift << 7
+        res += b * multiplier
+        multiplier *= 0x80
     return res
 
 def write_varint(output, value):
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 39ec8bac3cf..45d569fc131 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -260,7 +260,9 @@
         "00121_drop_column_zookeeper",
         "00116_storage_set",
         "00083_create_merge_tree_zookeeper",
-        "00062_replicated_merge_tree_alter_zookeeper"
+        "00062_replicated_merge_tree_alter_zookeeper",
+        "01720_constraints_complex_types",
+        "01747_alter_partition_key_enum_zookeeper"
     ],
     "polymorphic-parts": [
         "01508_partition_pruning_long", /// bug, shoud be fixed
@@ -281,13 +283,16 @@
         "00746_sql_fuzzy",
         "00763_create_query_as_table_engine_bug",
         "00765_sql_compatibility_aliases",
-        "00825_protobuf_format_input",
-        "00825_protobuf_format_nested_optional",
         "00825_protobuf_format_array_3dim",
-        "00825_protobuf_format_map",
         "00825_protobuf_format_array_of_arrays",
-        "00825_protobuf_format_table_default",
         "00825_protobuf_format_enum_mapping",
+        "00825_protobuf_format_map",
+        "00825_protobuf_format_nested_in_nested",
+        "00825_protobuf_format_nested_optional",
+        "00825_protobuf_format_no_length_delimiter",
+        "00825_protobuf_format_persons",
+        "00825_protobuf_format_squares",
+        "00825_protobuf_format_table_default",
         "00826_cross_to_inner_join",
         "00834_not_between",
         "00909_kill_not_initialized_query",
@@ -744,6 +749,7 @@
         "01676_dictget_in_default_expression",
         "01700_system_zookeeper_path_in",
         "01715_background_checker_blather_zookeeper",
+        "01747_alter_partition_key_enum_zookeeper",
         "attach",
         "ddl_dictionaries",
         "dictionary",
diff --git a/tests/testflows/aes_encryption/docker-compose/zookeeper-service.yml b/tests/testflows/aes_encryption/docker-compose/zookeeper-service.yml
index f3df33358be..f27405b97a2 100644
--- a/tests/testflows/aes_encryption/docker-compose/zookeeper-service.yml
+++ b/tests/testflows/aes_encryption/docker-compose/zookeeper-service.yml
@@ -2,7 +2,7 @@ version: '2.3'
 
 services:
   zookeeper:
-    image: zookeeper:3.4.12
+    image: zookeeper:3.6.2
     expose:
       - "2181"
     environment:
diff --git a/tests/testflows/example/docker-compose/zookeeper-service.yml b/tests/testflows/example/docker-compose/zookeeper-service.yml
index 6691a2df31c..ca732a48dbd 100644
--- a/tests/testflows/example/docker-compose/zookeeper-service.yml
+++ b/tests/testflows/example/docker-compose/zookeeper-service.yml
@@ -2,7 +2,7 @@ version: '2.3'
 
 services:
   zookeeper:
-    image: zookeeper:3.4.12
+    image: zookeeper:3.6.2
     expose:
       - "2181"
     environment:
diff --git a/tests/testflows/ldap/authentication/docker-compose/zookeeper-service.yml b/tests/testflows/ldap/authentication/docker-compose/zookeeper-service.yml
index 6691a2df31c..ca732a48dbd 100644
--- a/tests/testflows/ldap/authentication/docker-compose/zookeeper-service.yml
+++ b/tests/testflows/ldap/authentication/docker-compose/zookeeper-service.yml
@@ -2,7 +2,7 @@ version: '2.3'
 
 services:
   zookeeper:
-    image: zookeeper:3.4.12
+    image: zookeeper:3.6.2
     expose:
       - "2181"
     environment:
diff --git a/tests/testflows/ldap/external_user_directory/docker-compose/zookeeper-service.yml b/tests/testflows/ldap/external_user_directory/docker-compose/zookeeper-service.yml
index 6691a2df31c..ca732a48dbd 100644
--- a/tests/testflows/ldap/external_user_directory/docker-compose/zookeeper-service.yml
+++ b/tests/testflows/ldap/external_user_directory/docker-compose/zookeeper-service.yml
@@ -2,7 +2,7 @@ version: '2.3'
 
 services:
   zookeeper:
-    image: zookeeper:3.4.12
+    image: zookeeper:3.6.2
     expose:
       - "2181"
     environment:
diff --git a/tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml b/tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml
index 6691a2df31c..ca732a48dbd 100644
--- a/tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml
+++ b/tests/testflows/ldap/role_mapping/docker-compose/zookeeper-service.yml
@@ -2,7 +2,7 @@ version: '2.3'
 
 services:
   zookeeper:
-    image: zookeeper:3.4.12
+    image: zookeeper:3.6.2
     expose:
       - "2181"
     environment:
diff --git a/tests/testflows/rbac/docker-compose/zookeeper-service.yml b/tests/testflows/rbac/docker-compose/zookeeper-service.yml
index f3df33358be..f27405b97a2 100755
--- a/tests/testflows/rbac/docker-compose/zookeeper-service.yml
+++ b/tests/testflows/rbac/docker-compose/zookeeper-service.yml
@@ -2,7 +2,7 @@ version: '2.3'
 
 services:
   zookeeper:
-    image: zookeeper:3.4.12
+    image: zookeeper:3.6.2
     expose:
       - "2181"
     environment:
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index a27a7e9dadc..8a39d591612 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -27,7 +27,6 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS)
     add_subdirectory (zookeeper-adjust-block-numbers-to-parts)
     add_subdirectory (wikistat-loader)
     add_subdirectory (check-marks)
-    add_subdirectory (test-data-generator)
     add_subdirectory (convert-month-partitioned-parts)
     add_subdirectory (checksum-for-compressed-block)
     add_subdirectory (db-generator)
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 4ba92864020..3e63f8898c0 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,10 +1,13 @@
+v21.2.5.5-stable	2021-03-02
 v21.2.4.6-stable	2021-02-20
 v21.2.3.15-stable	2021-02-14
 v21.2.2.8-stable	2021-02-07
+v21.1.6.13-stable	2021-03-02
 v21.1.5.4-stable	2021-02-20
 v21.1.4.46-stable	2021-02-14
 v21.1.3.32-stable	2021-02-03
 v21.1.2.15-stable	2021-01-18
+v20.12.8.5-stable	2021-03-02
 v20.12.7.3-stable	2021-02-20
 v20.12.6.29-stable	2021-02-14
 v20.12.5.18-stable	2021-02-03
@@ -30,6 +33,7 @@ v20.9.5.5-stable	2020-11-13
 v20.9.4.76-stable	2020-10-29
 v20.9.3.45-stable	2020-10-09
 v20.9.2.20-stable	2020-09-22
+v20.8.14.4-lts	2021-03-03
 v20.8.13.15-lts	2021-02-20
 v20.8.12.2-lts	2021-01-16
 v20.8.11.17-lts	2020-12-25
diff --git a/utils/test-data-generator/CMakeLists.txt b/utils/test-data-generator/CMakeLists.txt
deleted file mode 100644
index 80d7d4301e7..00000000000
--- a/utils/test-data-generator/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-# Disable clang-tidy for protobuf generated files
-set (CMAKE_CXX_CLANG_TIDY "")
-
-if (USE_PROTOBUF)
-    protobuf_generate_cpp(ProtobufDelimitedMessagesSerializer_Srcs ProtobufDelimitedMessagesSerializer_Hdrs ${CMAKE_CURRENT_SOURCE_DIR}/../../tests/queries/0_stateless/00825_protobuf_format.proto)
-    protobuf_generate_cpp(ProtobufDelimitedMessagesSerializer_Srcs2 ProtobufDelimitedMessagesSerializer_Hdrs2 ${CMAKE_CURRENT_SOURCE_DIR}/../../tests/queries/0_stateless/00825_protobuf_format_syntax2.proto)
-    add_executable (ProtobufDelimitedMessagesSerializer ProtobufDelimitedMessagesSerializer.cpp ${ProtobufDelimitedMessagesSerializer_Srcs} ${ProtobufDelimitedMessagesSerializer_Hdrs} ${ProtobufDelimitedMessagesSerializer_Srcs2} ${ProtobufDelimitedMessagesSerializer_Hdrs2})
-    target_include_directories (ProtobufDelimitedMessagesSerializer SYSTEM BEFORE PRIVATE ${Protobuf_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
-    target_link_libraries (ProtobufDelimitedMessagesSerializer PRIVATE ${Protobuf_LIBRARY} boost::program_options)
-    get_filename_component(ProtobufDelimitedMessagesSerializer_OutputDir "${CMAKE_CURRENT_LIST_DIR}/../../tests/queries/0_stateless" REALPATH)
-    target_compile_definitions(ProtobufDelimitedMessagesSerializer PRIVATE OUTPUT_DIR="${ProtobufDelimitedMessagesSerializer_OutputDir}")
-
-    # Protoc generates substandard code.
-    check_cxx_compiler_flag("-Wsuggest-destructor-override" HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
-    if (HAS_SUGGEST_OVERRIDE)
-        target_compile_options(ProtobufDelimitedMessagesSerializer PRIVATE -Wno-suggest-destructor-override)
-    endif()
-endif ()
diff --git a/utils/test-data-generator/ProtobufDelimitedMessagesSerializer.cpp b/utils/test-data-generator/ProtobufDelimitedMessagesSerializer.cpp
deleted file mode 100644
index ad465913313..00000000000
--- a/utils/test-data-generator/ProtobufDelimitedMessagesSerializer.cpp
+++ /dev/null
@@ -1,728 +0,0 @@
-// Generator of protobuf delimited messages used in the protobuf IO tests
-// tests/queries/0_stateless/00825_protobuf_format*
-
-#include <boost/program_options.hpp>
-#include <fstream>
-#include <iostream>
-#include <google/protobuf/util/delimited_message_util.h>
-#include "00825_protobuf_format.pb.h"
-#include "00825_protobuf_format_syntax2.pb.h"
-
-
-void writeInsertDataQueryForInputTest(std::stringstream & delimited_messages, const std::string & table_name, const std::string & format_schema, std::ostream & out)    // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-{
-    out << "echo -ne '";
-    std::string bytes = delimited_messages.str();
-    delimited_messages.str("");
-    for (const char c : bytes)
-    {
-        char buf[5];
-        sprintf(buf, "\\x%02x", static_cast<unsigned char>(c));
-        out << buf;
-    }
-    out << "' | $CLICKHOUSE_CLIENT --query=\"INSERT INTO " << table_name << " FORMAT Protobuf"
-           " SETTINGS format_schema = '$CURDIR/"
-        << format_schema << "'\"" << std::endl;
-}
-
-void writeInsertDataQueriesForInputTest(std::ostream & out)
-{
-    std::stringstream ss;       // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-    {
-        Person person;
-        person.set_uuid("a7522158-3d41-4b77-ad69-6c598ee55c49");
-        person.set_name("Ivan");
-        person.set_surname("Petrov");
-        person.set_gender(Gender::male);
-        person.set_birthdate(4015); // 1980-12-29
-        person.set_photo("png");
-        person.set_phonenumber("+74951234567");
-        person.set_isonline(true);
-        person.set_visittime(1546703100); // 2019-01-05 18:45:00
-        person.set_age(38);
-        person.set_zodiacsign(ZodiacSign::capricorn);
-        person.add_songs("Yesterday");
-        person.add_songs("Flowers");
-        person.add_color(255);
-        person.add_color(0);
-        person.add_color(0);
-        person.set_hometown("Moscow");
-        person.add_location(55.753215);
-        person.add_location(37.622504);
-        person.set_pi(3.14);
-        person.set_lotterywin(214.10);
-        person.set_someratio(0.1);
-        person.set_temperature(5.8);
-        person.set_randombignumber(17060000000);
-        auto* mu = person.add_measureunits();
-        mu->set_unit("meter");
-        mu->set_coef(1);
-        mu = person.add_measureunits();
-        mu->set_unit("centimeter");
-        mu->set_coef(0.01);
-        mu = person.add_measureunits();
-        mu->set_unit("kilometer");
-        mu->set_coef(1000);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->set_d(500);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->add_e(501);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->add_e(502);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &ss);
-    }
-
-    {
-        Person person;
-        person.set_uuid("c694ad8a-f714-4ea3-907d-fd54fb25d9b5");
-        person.set_name("Natalia");
-        person.set_surname("Sokolova");
-        person.set_gender(Gender::female);
-        person.set_birthdate(8102); // 1992-03-08
-        person.set_photo("jpg");
-        person.set_isonline(false);
-        person.set_age(26);
-        person.set_zodiacsign(ZodiacSign::pisces);
-        person.add_color(100);
-        person.add_color(200);
-        person.add_color(50);
-        person.set_hometown("Plymouth");
-        person.add_location(50.403724);
-        person.add_location(-4.142123);
-        person.set_pi(3.14159);
-        person.set_someratio(0.007);
-        person.set_temperature(5.4);
-        person.set_randombignumber(-20000000000000);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &ss);
-    }
-
-    {
-        Person person;
-        person.set_uuid("a7da1aa6-f425-4789-8947-b034786ed374");
-        person.set_name("Vasily");
-        person.set_surname("Sidorov");
-        person.set_gender(Gender::male);
-        person.set_birthdate(9339); // 1995-07-28
-        person.set_photo("bmp");
-        person.set_phonenumber("+442012345678");
-        person.set_isonline(true);
-        person.set_visittime(1546117200); // 2018-12-30 00:00:00
-        person.set_age(23);
-        person.set_zodiacsign(ZodiacSign::leo);
-        person.add_songs("Sunny");
-        person.add_color(250);
-        person.add_color(244);
-        person.add_color(10);
-        person.set_hometown("Murmansk");
-        person.add_location(68.970682);
-        person.add_location(33.074981);
-        person.set_pi(3.14159265358979);
-        person.set_lotterywin(100000000000);
-        person.set_someratio(800);
-        person.set_temperature(-3.2);
-        person.set_randombignumber(154400000);
-        auto* mu = person.add_measureunits();
-        mu->set_unit("pound");
-        mu->set_coef(16);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->set_d(503);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &ss);
-    }
-
-    writeInsertDataQueryForInputTest(ss, "in_persons_00825", "00825_protobuf_format:Person", out);
-
-    {
-        AltPerson person;
-        person.add_location(42);
-        person.add_location(-88);
-        person.set_pi(3.141);
-        person.set_uuid("20fcd95a-332d-41db-a9ec-161f644d059c");
-        person.set_name("Frida");
-        person.set_gender(AltPerson::female);
-        person.set_zodiacsign(1122); // sagittarius
-        person.set_birthdate(3267); // 1978-12-12
-        person.set_age("40");
-        person.set_isonline(OnlineStatus::offline);
-        person.set_someratio(0.5);
-        person.set_visittime(1363005000); // 2013-03-11 16:30:00
-        person.set_randombignumber(8010000009);
-        person.add_color(110);
-        person.add_color(210);
-        person.add_color(74);
-        person.set_lotterywin(311);
-        person.set_surname("Ermakova");
-        person.set_phonenumber(3124555929);
-        person.set_temperature(10);
-        person.add_measureunits_unit("KB");
-        person.add_measureunits_coef(1024);
-        person.add_measureunits_unit("MB");
-        person.add_measureunits_coef(1048576);
-        person.set_nestiness_a_b_c_d(700);
-        person.add_nestiness_a_b_c_e(701);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &ss);
-    }
-
-    {
-        AltPerson person;
-        person.add_location(26);
-        person.add_location(-80);
-        person.set_pi(3.1416);
-        person.set_uuid("7cfa6856-a54a-4786-b8e5-745159d52278");
-        person.set_name("Isolde");
-        person.set_gender(AltPerson::female);
-        person.set_zodiacsign(120); // aquarius
-        person.set_birthdate(6248); // 1987-02-09
-        person.set_age("32");
-        person.set_isonline(OnlineStatus::online);
-        person.set_someratio(4.5);
-        person.set_randombignumber(-11111111111111);
-        person.add_color(255);
-        person.add_color(0);
-        person.add_color(255);
-        person.set_surname("Lavrova");
-        person.set_temperature(25);
-        person.set_newfieldstr("abc");
-        person.set_newfieldbool(true);
-        person.add_newfieldint(44);
-        person.add_measureunits_unit("Byte");
-        person.add_measureunits_coef(8);
-        person.add_measureunits_unit("Bit");
-        person.add_measureunits_coef(1);
-        person.mutable_newmessage()->set_z(91);
-        person.set_nestiness_a_b_c_d(702);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &ss);
-    }
-
-    writeInsertDataQueryForInputTest(ss, "in_persons_00825", "00825_protobuf_format:AltPerson", out);
-
-    {
-        StrPerson person;
-        person.set_uuid("aa0e5a06-cab2-4034-a6a2-48e82b91664e");
-        person.set_name("Leonid");
-        person.set_surname("Kirillov");
-        person.set_gender("male");
-        person.set_birthdate("1983-06-24");
-        person.set_phonenumber("+74950275864");
-        person.set_isonline("1");
-        person.set_visittime("2019-02-04 09:45:00");
-        person.set_age("35");
-        person.set_zodiacsign("cancer");
-        person.add_songs("7 rings");
-        person.add_songs("Eastside");
-        person.add_songs("Last Hurrah");
-        person.add_color("0");
-        person.add_color("0");
-        person.add_color("255");
-        person.set_hometown("San Diego");
-        person.add_location("32.823943");
-        person.add_location("-117.081327");
-        person.set_pi("3.1415927");
-        person.set_lotterywin("15000000");
-        person.set_someratio("186.75");
-        person.set_temperature("-2.1");
-        person.set_randombignumber("20659829331");
-        person.mutable_measureunits()->add_unit("minute");
-        person.mutable_measureunits()->add_coef("60");
-        person.mutable_measureunits()->add_unit("hour");
-        person.mutable_measureunits()->add_coef("3600");
-        person.mutable_nestiness_a()->mutable_b_c()->add_e("1800");
-        google::protobuf::util::SerializeDelimitedToOstream(person, &ss);
-    }
-
-    writeInsertDataQueryForInputTest(ss, "in_persons_00825", "00825_protobuf_format:StrPerson", out);
-
-    {
-        Syntax2Person person;
-        person.set_uuid("3faee064-c4f7-4d34-b6f3-8d81c2b6a15d");
-        person.set_name("Nick");
-        person.set_surname("Kolesnikov");
-        person.set_gender(Syntax2Person::male);
-        person.set_birthdate(10586); // 1998-12-26
-        person.set_photo("bmp");
-        person.set_phonenumber("412-687-5007");
-        person.set_isonline(true);
-        person.set_visittime(1542596399); // 2018-11-19 05:59:59
-        person.set_age(20);
-        person.set_zodiacsign(Syntax2Person::capricorn);
-        person.add_songs("Havana");
-        person.add_color(128);
-        person.add_color(0);
-        person.add_color(128);
-        person.set_hometown("Pittsburgh");
-        person.add_location(40.517193);
-        person.add_location(-79.949452);
-        person.set_pi(3.1415926535898);
-        person.set_lotterywin(50000000000);
-        person.set_someratio(780);
-        person.set_temperature(18.3);
-        person.set_randombignumber(195500007);
-        person.mutable_measureunits()->add_unit("ounce");
-        person.mutable_measureunits()->add_coef(28.35);
-        person.mutable_measureunits()->add_unit("carat");
-        person.mutable_measureunits()->add_coef(0.2);
-        person.mutable_measureunits()->add_unit("gram");
-        person.mutable_measureunits()->add_coef(1);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->set_d(9494);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &ss);
-    }
-
-    writeInsertDataQueryForInputTest(ss, "in_persons_00825", "00825_protobuf_format_syntax2:Syntax2Person", out);
-
-    {
-        NumberAndSquare ns;
-        ns.set_number(2);
-        ns.set_square(4);
-        google::protobuf::util::SerializeDelimitedToOstream(ns, &ss);
-    }
-
-    {
-        NumberAndSquare ns;
-        ns.set_number(0);
-        ns.set_square(0);
-        google::protobuf::util::SerializeDelimitedToOstream(ns, &ss);
-    }
-
-    {
-        NumberAndSquare ns;
-        ns.set_number(3);
-        ns.set_square(9);
-        google::protobuf::util::SerializeDelimitedToOstream(ns, &ss);
-    }
-
-    writeInsertDataQueryForInputTest(ss, "in_squares_00825", "00825_protobuf_format:NumberAndSquare", out);
-}
-
-
-void writeReferenceForOutputTest(std::ostream & out)
-{
-    {
-        Person person;
-        person.set_uuid("a7522158-3d41-4b77-ad69-6c598ee55c49");
-        person.set_name("Ivan");
-        person.set_surname("Petrov");
-        person.set_gender(Gender::male);
-        person.set_birthdate(4015); // 1980-12-29
-        person.set_photo("png");
-        person.set_phonenumber(std::string("+74951234567\0", 13)); // Converted from FixedString(13)
-        person.set_isonline(true);
-        person.set_visittime(1546703100); // 2019-01-05 18:45:00
-        person.set_age(38);
-        person.set_zodiacsign(ZodiacSign::capricorn);
-        person.add_songs("Yesterday");
-        person.add_songs("Flowers");
-        person.add_color(255);
-        person.add_color(0);
-        person.add_color(0);
-        person.set_hometown("Moscow");
-        person.add_location(55.753215);
-        person.add_location(37.622504);
-        person.set_pi(3.14);
-        person.set_lotterywin(214.10);
-        person.set_someratio(0.1);
-        person.set_temperature(5.8);
-        person.set_randombignumber(17060000000);
-        auto* mu = person.add_measureunits();
-        mu->set_unit("meter");
-        mu->set_coef(1);
-        mu = person.add_measureunits();
-        mu->set_unit("centimeter");
-        mu->set_coef(0.01);
-        mu = person.add_measureunits();
-        mu->set_unit("kilometer");
-        mu->set_coef(1000);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->set_d(500);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->add_e(501);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->add_e(502);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    {
-        Person person;
-        person.set_uuid("c694ad8a-f714-4ea3-907d-fd54fb25d9b5");
-        person.set_name("Natalia");
-        person.set_surname("Sokolova");
-        person.set_gender(Gender::female);
-        person.set_birthdate(8102); // 1992-03-08
-        person.set_photo("jpg");
-        person.set_isonline(false);
-        person.set_age(26);
-        person.set_zodiacsign(ZodiacSign::pisces);
-        person.add_color(100);
-        person.add_color(200);
-        person.add_color(50);
-        person.set_hometown("Plymouth");
-        person.add_location(50.403724);
-        person.add_location(-4.142123);
-        person.set_pi(3.14159);
-        person.set_someratio(0.007);
-        person.set_temperature(5.4);
-        person.set_randombignumber(-20000000000000);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    {
-        Person person;
-        person.set_uuid("a7da1aa6-f425-4789-8947-b034786ed374");
-        person.set_name("Vasily");
-        person.set_surname("Sidorov");
-        person.set_gender(Gender::male);
-        person.set_birthdate(9339); // 1995-07-28
-        person.set_photo("bmp");
-        person.set_phonenumber("+442012345678");
-        person.set_isonline(true);
-        person.set_visittime(1546117200); // 2018-12-30 00:00:00
-        person.set_age(23);
-        person.set_zodiacsign(ZodiacSign::leo);
-        person.add_songs("Sunny");
-        person.add_color(250);
-        person.add_color(244);
-        person.add_color(10);
-        person.set_hometown("Murmansk");
-        person.add_location(68.970682);
-        person.add_location(33.074981);
-        person.set_pi(3.14159265358979);
-        person.set_lotterywin(100000000000);
-        person.set_someratio(800);
-        person.set_temperature(-3.2);
-        person.set_randombignumber(154400000);
-        auto* mu = person.add_measureunits();
-        mu->set_unit("pound");
-        mu->set_coef(16);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->set_d(503);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    out << "ALTERNATIVE->" << std::endl;
-
-    {
-        AltPerson person;
-        person.add_location(55);
-        person.add_location(37);
-        person.set_pi(3.14);
-        person.set_uuid("a7522158-3d41-4b77-ad69-6c598ee55c49");
-        person.set_name("Ivan");
-        person.set_gender(AltPerson::male);
-        person.set_zodiacsign(1222); // capricorn
-        person.set_birthdate(4015); // 1980-12-29
-        person.set_age("38");
-        person.set_isonline(OnlineStatus::online);
-        person.set_someratio(0.100000001490116119384765625); // 0.1 converted from float to double
-        person.set_visittime(1546703100); // 2019-01-05 18:45:00
-        person.set_randombignumber(17060000000);
-        person.add_color(255);
-        person.add_color(0);
-        person.add_color(0);
-        person.set_lotterywin(214);
-        person.set_surname("Petrov");
-        person.set_phonenumber(+74951234567);
-        person.set_temperature(5);
-        person.add_measureunits_unit("meter");
-        person.add_measureunits_coef(1);
-        person.add_measureunits_unit("centimeter");
-        person.add_measureunits_coef(0.01);
-        person.add_measureunits_unit("kilometer");
-        person.add_measureunits_coef(1000);
-        person.set_nestiness_a_b_c_d(500);
-        person.add_nestiness_a_b_c_e(501);
-        person.add_nestiness_a_b_c_e(502);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    {
-        AltPerson person;
-        person.add_location(50);
-        person.add_location(-4);
-        person.set_pi(3.14159);
-        person.set_uuid("c694ad8a-f714-4ea3-907d-fd54fb25d9b5");
-        person.set_name("Natalia");
-        person.set_gender(AltPerson::female);
-        person.set_zodiacsign(219); // pisces
-        person.set_birthdate(8102); // 1992-03-08
-        person.set_age("26");
-        person.set_isonline(OnlineStatus::offline);
-        person.set_someratio(0.007000000216066837310791015625); // 0.007 converted from float to double
-        person.set_randombignumber(-20000000000000);
-        person.add_color(100);
-        person.add_color(200);
-        person.add_color(50);
-        person.set_surname("Sokolova");
-        person.set_temperature(5);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    {
-        AltPerson person;
-        person.add_location(68);
-        person.add_location(33);
-        person.set_pi(3.1415926535897);
-        person.set_uuid("a7da1aa6-f425-4789-8947-b034786ed374");
-        person.set_name("Vasily");
-        person.set_gender(AltPerson::male);
-        person.set_zodiacsign(723); // leo
-        person.set_birthdate(9339); // 1995-07-28
-        person.set_age("23");
-        person.set_isonline(OnlineStatus::online);
-        person.set_someratio(800);
-        person.set_visittime(1546117200); // 2018-12-30 00:00:00
-        person.set_randombignumber(154400000);
-        person.add_color(250);
-        person.add_color(244);
-        person.add_color(10);
-        person.set_lotterywin(100000000000);
-        person.set_surname("Sidorov");
-        person.set_phonenumber(+442012345678);
-        person.set_temperature(-3);
-        person.add_measureunits_unit("pound");
-        person.add_measureunits_coef(16);
-        person.set_nestiness_a_b_c_d(503);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    out << "STRINGS->" << std::endl;
-
-    {
-        StrPerson person;
-        person.set_uuid("a7522158-3d41-4b77-ad69-6c598ee55c49");
-        person.set_name("Ivan");
-        person.set_surname("Petrov");
-        person.set_gender("male");
-        person.set_birthdate("1980-12-29");
-        person.set_phonenumber(std::string("+74951234567\0", 13)); // Converted from FixedString(13)
-        person.set_isonline("1");
-        person.set_visittime("2019-01-05 18:45:00");
-        person.set_age("38");
-        person.set_zodiacsign("capricorn");
-        person.add_songs("Yesterday");
-        person.add_songs("Flowers");
-        person.add_color("255");
-        person.add_color("0");
-        person.add_color("0");
-        person.set_hometown("Moscow");
-        person.add_location("55.753215");
-        person.add_location("37.622504");
-        person.set_pi("3.14");
-        person.set_lotterywin("214.10");
-        person.set_someratio("0.1");
-        person.set_temperature("5.8");
-        person.set_randombignumber("17060000000");
-        person.mutable_measureunits()->add_unit("meter");
-        person.mutable_measureunits()->add_coef("1");
-        person.mutable_measureunits()->add_unit("centimeter");
-        person.mutable_measureunits()->add_coef("0.01");
-        person.mutable_measureunits()->add_unit("kilometer");
-        person.mutable_measureunits()->add_coef("1000");
-        person.mutable_nestiness_a()->mutable_b_c()->set_d("500");
-        person.mutable_nestiness_a()->mutable_b_c()->add_e("501");
-        person.mutable_nestiness_a()->mutable_b_c()->add_e("502");
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    {
-        StrPerson person;
-        person.set_uuid("c694ad8a-f714-4ea3-907d-fd54fb25d9b5");
-        person.set_name("Natalia");
-        person.set_surname("Sokolova");
-        person.set_gender("female");
-        person.set_birthdate("1992-03-08");
-        person.set_isonline("0");
-        person.set_age("26");
-        person.set_zodiacsign("pisces");
-        person.add_color("100");
-        person.add_color("200");
-        person.add_color("50");
-        person.set_hometown("Plymouth");
-        person.add_location("50.403724");
-        person.add_location("-4.142123");
-        person.set_pi("3.14159");
-        person.set_someratio("0.007");
-        person.set_temperature("5.4");
-        person.set_randombignumber("-20000000000000");
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    {
-        StrPerson person;
-        person.set_uuid("a7da1aa6-f425-4789-8947-b034786ed374");
-        person.set_name("Vasily");
-        person.set_surname("Sidorov");
-        person.set_gender("male");
-        person.set_birthdate("1995-07-28");
-        person.set_phonenumber("+442012345678");
-        person.set_isonline("1");
-        person.set_visittime("2018-12-30 00:00:00");
-        person.set_age("23");
-        person.set_zodiacsign("leo");
-        person.add_songs("Sunny");
-        person.add_color("250");
-        person.add_color("244");
-        person.add_color("10");
-        person.set_hometown("Murmansk");
-        person.add_location("68.970682");
-        person.add_location("33.074981");
-        person.set_pi("3.14159265358979");
-        person.set_lotterywin("100000000000.00");
-        person.set_someratio("800");
-        person.set_temperature("-3.2");
-        person.set_randombignumber("154400000");
-        person.mutable_measureunits()->add_unit("pound");
-        person.mutable_measureunits()->add_coef("16");
-        person.mutable_nestiness_a()->mutable_b_c()->set_d("503");
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    out << "SYNTAX2->" << std::endl;
-
-    {
-        Syntax2Person person;
-        person.set_uuid("a7522158-3d41-4b77-ad69-6c598ee55c49");
-        person.set_name("Ivan");
-        person.set_surname("Petrov");
-        person.set_gender(Syntax2Person::male);
-        person.set_birthdate(4015); // 1980-12-29
-        person.set_photo("png");
-        person.set_phonenumber(std::string("+74951234567\0", 13)); // Converted from FixedString(13)
-        person.set_isonline(true);
-        person.set_visittime(1546703100); // 2019-01-05 18:45:00
-        person.set_age(38);
-        person.set_zodiacsign(Syntax2Person::capricorn);
-        person.add_songs("Yesterday");
-        person.add_songs("Flowers");
-        person.add_color(255);
-        person.add_color(0);
-        person.add_color(0);
-        person.set_hometown("Moscow");
-        person.add_location(55.753215);
-        person.add_location(37.622504);
-        person.set_pi(3.14);
-        person.set_lotterywin(214.10);
-        person.set_someratio(0.1);
-        person.set_temperature(5.8);
-        person.set_randombignumber(17060000000);
-        person.mutable_measureunits()->add_unit("meter");
-        person.mutable_measureunits()->add_coef(1);
-        person.mutable_measureunits()->add_unit("centimeter");
-        person.mutable_measureunits()->add_coef(0.01);
-        person.mutable_measureunits()->add_unit("kilometer");
-        person.mutable_measureunits()->add_coef(1000);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->set_d(500);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->add_e(501);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->add_e(502);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    {
-        Syntax2Person person;
-        person.set_uuid("c694ad8a-f714-4ea3-907d-fd54fb25d9b5");
-        person.set_name("Natalia");
-        person.set_surname("Sokolova");
-        person.set_gender(Syntax2Person::female);
-        person.set_birthdate(8102); // 1992-03-08
-        person.set_photo("jpg");
-        person.set_isonline(false);
-        person.set_age(26);
-        person.set_zodiacsign(Syntax2Person::pisces);
-        person.add_color(100);
-        person.add_color(200);
-        person.add_color(50);
-        person.set_hometown("Plymouth");
-        person.add_location(50.403724);
-        person.add_location(-4.142123);
-        person.set_pi(3.14159);
-        person.set_someratio(0.007);
-        person.set_temperature(5.4);
-        person.set_randombignumber(-20000000000000);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    {
-        Syntax2Person person;
-        person.set_uuid("a7da1aa6-f425-4789-8947-b034786ed374");
-        person.set_name("Vasily");
-        person.set_surname("Sidorov");
-        person.set_gender(Syntax2Person::male);
-        person.set_birthdate(9339); // 1995-07-28
-        person.set_photo("bmp");
-        person.set_phonenumber("+442012345678");
-        person.set_isonline(true);
-        person.set_visittime(1546117200); // 2018-12-30 00:00:00
-        person.set_age(23);
-        person.set_zodiacsign(Syntax2Person::leo);
-        person.add_songs("Sunny");
-        person.add_color(250);
-        person.add_color(244);
-        person.add_color(10);
-        person.set_hometown("Murmansk");
-        person.add_location(68.970682);
-        person.add_location(33.074981);
-        person.set_pi(3.14159265358979);
-        person.set_lotterywin(100000000000);
-        person.set_someratio(800);
-        person.set_temperature(-3.2);
-        person.set_randombignumber(154400000);
-        person.mutable_measureunits()->add_unit("pound");
-        person.mutable_measureunits()->add_coef(16);
-        person.mutable_nestiness()->mutable_a()->mutable_b()->mutable_c()->set_d(503);
-        google::protobuf::util::SerializeDelimitedToOstream(person, &out);
-    }
-
-    out << "SQUARES->" << std::endl;
-
-    {
-        NumberAndSquare ns;
-        ns.set_number(0);
-        ns.set_square(0);
-        google::protobuf::util::SerializeDelimitedToOstream(ns, &out);
-    }
-
-    {
-        NumberAndSquare ns;
-        ns.set_number(2);
-        ns.set_square(4);
-        google::protobuf::util::SerializeDelimitedToOstream(ns, &out);
-    }
-
-    {
-        NumberAndSquare ns;
-        ns.set_number(3);
-        ns.set_square(9);
-        google::protobuf::util::SerializeDelimitedToOstream(ns, &out);
-    }
-}
-
-
-void parseCommandLine(int argc, char ** argv, std::string & output_dir)
-{
-    namespace po = boost::program_options;
-    po::options_description desc;
-    output_dir = OUTPUT_DIR;
-    desc.add_options()
-        ("help,h", "Show help")
-        ("directory,d", po::value<std::string>(&output_dir),
-         "Set the output directory. By default it's " OUTPUT_DIR);
-    po::parsed_options parsed = po::command_line_parser(argc, argv).options(desc).run();
-    po::variables_map vm;
-    po::store(parsed, vm);
-    po::notify(vm);
-    if (!output_dir.empty())
-        return;
-
-    // Show help.
-    std::cout << "This utility generates delimited messages for tests checking protobuf IO support." << std::endl;
-    std::cout << desc;
-    std::cout << "Example:" << std::endl;
-    std::cout << argv[0] << " -g OUTPUT_REFERENCE" << std::endl;
-    std::exit(0);
-}
-
-void writeFile(const std::string & filepath, void (*fn)(std::ostream &))
-{
-    std::cout << "Writing '" << filepath << "' ... ";
-    std::fstream out(filepath, std::fstream::out | std::fstream::trunc);
-    fn(out);
-    std::cout << "done." << std::endl;
-}
-
-int main(int argc, char ** argv)
-{
-    std::string output_dir;
-    parseCommandLine(argc, argv, output_dir);
-    writeFile(output_dir + "/00825_protobuf_format_input.insh", writeInsertDataQueriesForInputTest);
-    writeFile(output_dir + "/00825_protobuf_format_output.reference", writeReferenceForOutputTest);
-    return 0;
-}
diff --git a/website/css/highlight.css b/website/css/highlight.css
index 55a0054b07f..7cc8a4865dd 100644
--- a/website/css/highlight.css
+++ b/website/css/highlight.css
@@ -11,7 +11,7 @@
 .syntax .hll { background-color: #b9b6b0 }
 .syntax  { background: #f8f9fa; color: #2f1e2e }
 .syntax .c { color: #8d8687 } /* Comment */
-.syntax .err { color: #ef6155 } /* Error */
+.syntax .err {} /* Error */
 .syntax .k { color: #000000; font-weight: bold } /* Keyword */
 .syntax .l { color: #0088ff } /* Literal */
 .syntax .n { color: #2f1e2e } /* Name */