From 9bcc5a6175eceadd48de4ed6f5c6360d87730b4d Mon Sep 17 00:00:00 2001
From: John Skopis <jskopis@cloudflare.com>
Date: Wed, 26 Aug 2020 08:36:58 +0000
Subject: [PATCH 001/152] Support interserver credential rotation

Restarting a server instance to change the interserver password results
in many replicas being out of sync until all clusters are using the new
credential.

This commit adds dynamic credential loading for both the client
(Replicated* tables) and server (InterserverIOHTTPHandler).

This commit also adds the ability to rotate credentials, i.e. accept more
than one credential during a credential change.

state0 (no auth):

    <interserver_http_credentials />

state1 (auth+allow_empty migration):

    <interserver_http_credentials>
        <user>admin</user>
        <password>222</password>
        <allow_empty>true</allow_empty>
    </interserver_http_credentials>

state2 (auth+new admin password migration):

    <interserver_http_credentials>
        <user>admin</user>
        <password>333</password>
        <users>
            <admin>222</admin>
        </users>
    </interserver_http_credentials>
---
 programs/server/Server.cpp                    | 14 +--
 src/Interpreters/Context.cpp                  | 52 +++++++---
 src/Interpreters/Context.h                    |  6 +-
 src/Interpreters/InterserverCredentials.cpp   | 61 ++++++++++++
 src/Interpreters/InterserverCredentials.h     | 94 +++++++++++++++++++
 src/Interpreters/ya.make                      |  1 +
 src/Server/InterserverIOHTTPHandler.cpp       | 47 +++++-----
 src/Server/InterserverIOHTTPHandler.h         |  6 +-
 .../test_replication_credentials/test.py      | 24 ++++-
 9 files changed, 254 insertions(+), 51 deletions(-)
 create mode 100644 src/Interpreters/InterserverCredentials.cpp
 create mode 100644 src/Interpreters/InterserverCredentials.h
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 76765c0374c..013aa2d994a 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -47,6 +47,7 @@
 #include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/DNSCacheUpdater.h>
 #include <Interpreters/ExternalLoaderXMLConfigRepository.h>
+#include <Interpreters/InterserverCredentials.h>
 #include <Access/AccessControlManager.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/System/attachSystemTables.h>
@@ -620,16 +621,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
         }
     }
 
-    if (config().has("interserver_http_credentials"))
-    {
-        String user = config().getString("interserver_http_credentials.user", "");
-        String password = config().getString("interserver_http_credentials.password", "");
-
-        if (user.empty())
-            throw Exception("Configuration parameter interserver_http_credentials user can't be empty", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
-
-        global_context->setInterserverCredentials(user, password);
-    }
+    LOG_DEBUG(log, "Initiaializing InterserverCredentials.");
+    global_context->updateInterserverCredentials(config());
 
     if (config().has("macros"))
         global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
@@ -673,6 +666,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
             global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config);
 
             global_context->updateStorageConfiguration(*config);
+            global_context->updateInterserverCredentials(*config);
         },
         /* already_loaded = */ true);
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 9fb74a4f800..5d24dc27e6f 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -46,6 +46,7 @@
 #include <Interpreters/ExternalModelsLoader.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/ProcessList.h>
+#include <Interpreters/InterserverCredentials.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/InterserverIOHandler.h>
 #include <Interpreters/SystemLog.h>
@@ -314,9 +315,9 @@ struct ContextShared
 
     String interserver_io_host;                             /// The host name by which this server is available for other servers.
     UInt16 interserver_io_port = 0;                         /// and port.
-    String interserver_io_user;
-    String interserver_io_password;
     String interserver_scheme;                              /// http or https
+    mutable std::mutex interserver_io_credentials_mutex;
+    std::shared_ptr<BaseInterserverCredentials> interserver_io_credentials;
 
     String path;                                            /// Path to the data directory, with a slash at the end.
     String flags_path;                                      /// Path to the directory with some control flags for server maintenance.
@@ -1615,6 +1616,42 @@ bool Context::hasAuxiliaryZooKeeper(const String & name) const
     return getConfigRef().has("auxiliary_zookeepers." + name);
 }
 
+std::shared_ptr<BaseInterserverCredentials> Context::getInterserverCredential()
+{
+    std::lock_guard lock(shared->interserver_io_credentials_mutex);
+    return shared->interserver_io_credentials;
+}
+
+void Context::setInterserverCredentials(std::shared_ptr<BaseInterserverCredentials> credentials)
+{
+    std::lock_guard lock(shared->interserver_io_credentials_mutex);
+    shared->interserver_io_credentials = credentials;
+}
+
+void Context::updateInterserverCredentials(const Poco::Util::AbstractConfiguration & config)
+{
+    std::shared_ptr<BaseInterserverCredentials> interserver_credentials = nullptr;
+
+    if (config.has("interserver_http_credentials"))
+    {
+        interserver_credentials = ConfigInterserverCredentials::make(config, "interserver_http_credentials");
+    }
+    else
+    {
+        interserver_credentials = NullInterserverCredentials::make();
+    }
+
+    global_context->setInterserverCredentials(interserver_credentials);
+}
+
+std::pair<String, String> Context::getInterserverCredentials() const
+{
+    std::lock_guard lock(shared->interserver_io_credentials_mutex);
+    auto & credentials = shared->interserver_io_credentials;
+
+    return { credentials->getUser(), credentials->getPassword() };
+}
+
 void Context::setInterserverIOAddress(const String & host, UInt16 port)
 {
     shared->interserver_io_host = host;
@@ -1630,17 +1667,6 @@ std::pair<String, UInt16> Context::getInterserverIOAddress() const
     return { shared->interserver_io_host, shared->interserver_io_port };
 }
 
-void Context::setInterserverCredentials(const String & user_, const String & password)
-{
-    shared->interserver_io_user = user_;
-    shared->interserver_io_password = password;
-}
-
-std::pair<String, String> Context::getInterserverCredentials() const
-{
-    return { shared->interserver_io_user, shared->interserver_io_password };
-}
-
 void Context::setInterserverScheme(const String & scheme)
 {
     shared->interserver_scheme = scheme;
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 79140f0d209..c9afe9a7f75 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -62,6 +62,7 @@ class AccessRightsElements;
 class EmbeddedDictionaries;
 class ExternalDictionariesLoader;
 class ExternalModelsLoader;
+class BaseInterserverCredentials;
 class InterserverIOHandler;
 class BackgroundSchedulePool;
 class MergeList;
@@ -440,7 +441,8 @@ public:
     std::pair<String, UInt16> getInterserverIOAddress() const;
 
     /// Credentials which server will use to communicate with others
-    void setInterserverCredentials(const String & user, const String & password);
+    void updateInterserverCredentials(const Poco::Util::AbstractConfiguration & config);
+    std::shared_ptr<BaseInterserverCredentials> getInterserverCredential();
     std::pair<String, String> getInterserverCredentials() const;
 
     /// Interserver requests scheme (http or https)
@@ -695,6 +697,8 @@ private:
 
     /// If the password is not set, the password will not be checked
     void setUserImpl(const String & name, const std::optional<String> & password, const Poco::Net::SocketAddress & address);
+
+    void setInterserverCredentials(std::shared_ptr<BaseInterserverCredentials> credentials);
 };
 
 
diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp
new file mode 100644
index 00000000000..b81eedad483
--- /dev/null
+++ b/src/Interpreters/InterserverCredentials.cpp
@@ -0,0 +1,61 @@
+#include <Interpreters/InterserverCredentials.h>
+#include <common/logger_useful.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NO_ELEMENTS_IN_CONFIG;
+    extern const int WRONG_PASSWORD;
+}
+
+std::shared_ptr<ConfigInterserverCredentials>
+ConfigInterserverCredentials::make(const Poco::Util::AbstractConfiguration & config, const std::string root_tag)
+{
+    const auto user = config.getString(root_tag + ".user", "");
+    const auto password = config.getString(root_tag + ".password", "");
+
+    if (user.empty())
+        throw Exception("Configuration parameter interserver_http_credentials user can't be empty", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
+
+    auto store = makeCredentialStore(user, password, config, root_tag);
+
+    return std::make_shared<ConfigInterserverCredentials>(user, password, store);
+}
+
+ConfigInterserverCredentials::Store ConfigInterserverCredentials::makeCredentialStore(
+    const std::string current_user_,
+    const std::string current_password_,
+    const Poco::Util::AbstractConfiguration & config,
+    const std::string root_tag)
+{
+    Store store;
+    store.insert({{current_user_, current_password_}, true});
+    if (config.has(root_tag + ".allow_empty") && config.getBool(root_tag + ".allow_empty"))
+    {
+        /// Allow empty credential to support migrating from no auth
+        store.insert({{"", ""}, true});
+    }
+
+
+    Poco::Util::AbstractConfiguration::Keys users;
+    config.keys(root_tag + ".users", users);
+    for (const auto & user : users)
+    {
+        LOG_DEBUG(&Poco::Logger::get("InterserverCredentials"), "Adding credential for {}", user);
+        const auto password = config.getString(root_tag + ".users." + user);
+        store.insert({{user, password}, true});
+    }
+
+    return store;
+}
+
+bool ConfigInterserverCredentials::isValidUser(const std::pair<std::string, std::string> credentials)
+{
+    const auto & valid = store.find(credentials);
+    if (valid == store.end())
+        throw Exception("Incorrect user or password in HTTP basic authentication: " + credentials.first, ErrorCodes::WRONG_PASSWORD);
+    return true;
+}
+
+}
diff --git a/src/Interpreters/InterserverCredentials.h b/src/Interpreters/InterserverCredentials.h
new file mode 100644
index 00000000000..27e4fc16bb5
--- /dev/null
+++ b/src/Interpreters/InterserverCredentials.h
@@ -0,0 +1,94 @@
+#pragma once
+
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/Exception.h>
+#include <common/logger_useful.h>
+
+namespace DB
+{
+/// InterserverCredentials holds credentials for server (store) and client
+/// credentials (current_*). The container is constructed through `make` and a
+/// shared_ptr is captured inside Context.
+class BaseInterserverCredentials
+{
+public:
+    BaseInterserverCredentials(std::string current_user_, std::string current_password_)
+        : current_user(current_user_), current_password(current_password_)
+            { }
+
+    virtual ~BaseInterserverCredentials() { }
+
+    /// isValidUser returns true or throws WRONG_PASSWORD
+    virtual bool isValidUser(const std::pair<std::string, std::string> credentials) = 0;
+
+    std::string getUser() { return current_user; }
+
+    std::string getPassword() { return current_password; }
+
+
+protected:
+    std::string current_user;
+    std::string current_password;
+};
+
+
+/// NullInterserverCredentials are used when authentication is not configured
+class NullInterserverCredentials : public virtual BaseInterserverCredentials
+{
+public:
+    NullInterserverCredentials(const NullInterserverCredentials &) = delete;
+    NullInterserverCredentials() : BaseInterserverCredentials("", "") { }
+
+    ~NullInterserverCredentials() override { }
+
+    static std::shared_ptr<NullInterserverCredentials> make() { return std::make_shared<NullInterserverCredentials>(); }
+
+    bool isValidUser(const std::pair<std::string, std::string> credentials) override
+    {
+        std::ignore = credentials;
+        return true;
+    }
+};
+
+
+/// ConfigInterserverCredentials implements authentication using a Store, which
+/// is configured, e.g.
+///    <interserver_http_credentials>
+///        <user>admin</user>
+///        <password>222</password>
+///        <!-- To support mix of un/authenticated clients -->
+///        <!-- <allow_empty>true</allow_empty> -->
+///        <users>
+///            <!-- Allow authentication using previous passwords during rotation -->
+///            <admin>111</admin>
+///        </users>
+///    </interserver_http_credentials>
+class ConfigInterserverCredentials : public virtual BaseInterserverCredentials
+{
+public:
+    using Store = std::map<std::pair<std::string, std::string>, bool>;
+
+    ConfigInterserverCredentials(const ConfigInterserverCredentials &) = delete;
+
+    static std::shared_ptr<ConfigInterserverCredentials> make(const Poco::Util::AbstractConfiguration & config, const std::string root_tag);
+
+    ~ConfigInterserverCredentials() override { }
+
+    ConfigInterserverCredentials(const std::string current_user_, const std::string current_password_, const Store & store_)
+        : BaseInterserverCredentials(current_user_, current_password_), store(std::move(store_))
+    {
+    }
+
+    bool isValidUser(const std::pair<std::string, std::string> credentials) override;
+
+private:
+    Store store;
+
+    static Store makeCredentialStore(
+        const std::string current_user_,
+        const std::string current_password_,
+        const Poco::Util::AbstractConfiguration & config,
+        const std::string root_tag);
+};
+
+}
diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make
index 92da029d681..1e0bc70b6de 100644
--- a/src/Interpreters/ya.make
+++ b/src/Interpreters/ya.make
@@ -99,6 +99,7 @@ SRCS(
     InterpreterSystemQuery.cpp
     InterpreterUseQuery.cpp
     InterpreterWatchQuery.cpp
+    InterserverCredentials.cpp
     JoinSwitcher.cpp
     JoinToSubqueryTransformVisitor.cpp
     JoinedTables.cpp
diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index 973759bedd1..dbb1b9ee0f3 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -20,35 +20,25 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int ABORTED;
+    extern const int NOT_IMPLEMENTED;
     extern const int TOO_MANY_SIMULTANEOUS_QUERIES;
+    extern const int WRONG_PASSWORD;
 }
 
-std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(Poco::Net::HTTPServerRequest & request) const
+bool InterserverIOHTTPHandler::checkAuthentication(Poco::Net::HTTPServerRequest & request) const
 {
-    const auto & config = server.config();
+    auto creds = server.context().getInterserverCredential();
+    if (!request.hasCredentials())
+        return creds->isValidUser(std::make_pair(default_user, default_password));
 
-    if (config.has("interserver_http_credentials.user"))
-    {
-        if (!request.hasCredentials())
-            return {"Server requires HTTP Basic authentication, but client doesn't provide it", false};
-        String scheme, info;
-        request.getCredentials(scheme, info);
+    String scheme, info;
+    request.getCredentials(scheme, info);
 
-        if (scheme != "Basic")
-            return {"Server requires HTTP Basic authentication but client provides another method", false};
+    if (scheme != "Basic")
+        throw Exception("Server requires HTTP Basic authentication but client provides another method", ErrorCodes::NOT_IMPLEMENTED);
 
-        String user = config.getString("interserver_http_credentials.user");
-        String password = config.getString("interserver_http_credentials.password", "");
-
-        Poco::Net::HTTPBasicCredentials credentials(info);
-        if (std::make_pair(user, password) != std::make_pair(credentials.getUsername(), credentials.getPassword()))
-            return {"Incorrect user or password in HTTP Basic authentication", false};
-    }
-    else if (request.hasCredentials())
-    {
-        return {"Client requires HTTP Basic authentication, but server doesn't provide it", false};
-    }
-    return {"", true};
+    Poco::Net::HTTPBasicCredentials credentials(info);
+    return creds->isValidUser(std::make_pair(credentials.getUsername(), credentials.getPassword()));
 }
 
 void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output)
@@ -95,7 +85,7 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ
 
     try
     {
-        if (auto [message, success] = checkAuthentication(request); success)
+        if (checkAuthentication(request))
         {
             processQuery(request, response, used_output);
             LOG_DEBUG(log, "Done processing query");
@@ -104,12 +94,21 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ
         {
             response.setStatusAndReason(Poco::Net::HTTPServerResponse::HTTP_UNAUTHORIZED);
             if (!response.sent())
-                writeString(message, *used_output.out);
+                writeString("Unauthorized.", *used_output.out);
             LOG_WARNING(log, "Query processing failed request: '{}' authentication failed", request.getURI());
         }
     }
     catch (Exception & e)
     {
+        if (e.code() == ErrorCodes::WRONG_PASSWORD)
+        {
+            response.setStatusAndReason(Poco::Net::HTTPServerResponse::HTTP_UNAUTHORIZED);
+            if (!response.sent())
+                writeString("Unauthorized.", *used_output.out);
+            LOG_WARNING(log, "Query processing failed request: '{}' authentication failed", request.getURI());
+            return;
+        }
+
         if (e.code() == ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES)
             return;
 
diff --git a/src/Server/InterserverIOHTTPHandler.h b/src/Server/InterserverIOHTTPHandler.h
index 8dc1962664c..8499fd59925 100644
--- a/src/Server/InterserverIOHTTPHandler.h
+++ b/src/Server/InterserverIOHTTPHandler.h
@@ -1,9 +1,11 @@
 #pragma once
 
 #include <memory>
+#include <string>
 #include <Poco/Logger.h>
 #include <Poco/Net/HTTPRequestHandler.h>
 #include <Common/CurrentMetrics.h>
+#include <Interpreters/InterserverCredentials.h>
 
 
 namespace CurrentMetrics
@@ -41,7 +43,9 @@ private:
 
     void processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output);
 
-    std::pair<String, bool> checkAuthentication(Poco::Net::HTTPServerRequest & request) const;
+    bool checkAuthentication(Poco::Net::HTTPServerRequest & request) const;
+    const std::string default_user = "";
+    const std::string default_password = "";
 };
 
 }
diff --git a/tests/integration/test_replication_credentials/test.py b/tests/integration/test_replication_credentials/test.py
index 4f07d6966a6..82c1bcad7b0 100644
--- a/tests/integration/test_replication_credentials/test.py
+++ b/tests/integration/test_replication_credentials/test.py
@@ -9,7 +9,6 @@ def _fill_nodes(nodes, shard):
         node.query(
             '''
                 CREATE DATABASE test;
-    
                 CREATE TABLE test_table(date Date, id UInt32, dummy UInt32)
                 ENGINE = ReplicatedMergeTree('/clickhouse/tables/test{shard}/replicated', '{replica}', date, id, 8192);
             '''.format(shard=shard, replica=node.name))
@@ -135,6 +134,7 @@ def credentials_and_no_credentials_cluster():
 
 
 def test_credentials_and_no_credentials(credentials_and_no_credentials_cluster):
+    # Initial state: node7 requires auth; node8 open
     node7.query("insert into test_table values ('2017-06-21', 111, 0)")
     time.sleep(1)
 
@@ -144,5 +144,25 @@ def test_credentials_and_no_credentials(credentials_and_no_credentials_cluster):
     node8.query("insert into test_table values ('2017-06-22', 222, 1)")
     time.sleep(1)
 
-    assert node7.query("SELECT id FROM test_table order by id") == '111\n'
+    assert node7.query("SELECT id FROM test_table order by id") == '111\n222\n'
     assert node8.query("SELECT id FROM test_table order by id") == '222\n'
+
+    allow_empty = """
+    <yandex>
+        <interserver_http_port>9009</interserver_http_port>
+        <interserver_http_credentials>
+            <user>admin</user>
+            <password>222</password>
+            <allow_empty>true</allow_empty>
+        </interserver_http_credentials>
+    </yandex>
+    """
+
+    # change state: Flip node7 to mixed auth/non-auth (allow node8)
+    node7.replace_config("/etc/clickhouse-server/config.d/credentials1.xml",
+                         allow_empty)
+    node7.query("insert into test_table values ('2017-06-22', 333, 1)")
+    node8.query("DETACH TABLE test_table")
+    node8.query("ATTACH TABLE test_table")
+    time.sleep(3)
+    assert node8.query("SELECT id FROM test_table order by id") == '111\n222\n333\n'

From be929edd88cb753f16b425fed776381340068fa5 Mon Sep 17 00:00:00 2001
From: romanzhukov <romanzhukov@yandex-team.ru>
Date: Tue, 9 Mar 2021 18:10:37 +0300
Subject: [PATCH 002/152] DOCSUP-7045: Add interserver replication https

---
 .../settings.md                               |  3 ++-
 docs/en/operations/update.md                  |  3 ++-
 .../settings.md                               | 27 ++++++++++++++++++-
 docs/ru/operations/update.md                  | 19 +++++++++++--
 4 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 89fcbafe663..e47ff638007 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -321,7 +321,8 @@ Similar to `interserver_http_host`, except that this hostname can be used by oth
 The username and password used to authenticate during [replication](../../engines/table-engines/mergetree-family/replication.md) with the Replicated\* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster.
 By default, the authentication is not used.
 
-**Note:** These credentials are common for replication through `HTTP` and `HTTPS`.
+!!! note "Note"
+    These credentials are common for replication through `HTTP` and `HTTPS`.
 
 This section contains the following parameters:
 
diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md
index 9fa9c44e130..d0542295190 100644
--- a/docs/en/operations/update.md
+++ b/docs/en/operations/update.md
@@ -15,7 +15,8 @@ $ sudo service clickhouse-server restart
 
 If you installed ClickHouse using something other than the recommended `deb` packages, use the appropriate update method.
 
-ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time.
+!!! warning "Warning"
+    ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time.
 
 The upgrade of older version of ClickHouse to specific version:
 
diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 15ab13836e3..9d2ae9c09dc 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -285,7 +285,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 
 ## interserver_http_host {#interserver-http-host}
 
-Имя хоста, которое могут использовать другие серверы для обращения к этому.
+Имя хоста, которое могут использовать другие серверы для обращения к этому хосту.
 
 Если не указано, то определяется аналогично команде `hostname -f`.
 
@@ -297,11 +297,36 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 <interserver_http_host>example.yandex.ru</interserver_http_host>
 ```
 
+## interserver_https_port {#interserver-https-port}
+
+Порт для обмена данными между серверами ClickHouse по протоколу `HTTPS`.
+
+**Пример**
+
+``` xml
+<interserver_https_port>9010</interserver_https_port>
+```
+
+## interserver_https_host {#interserver-https-host}
+
+Имя хоста, которое могут использовать другие серверы для обращения к этому хосту по протоколу `HTTPS`.
+
+**Пример**
+
+``` xml
+<interserver_https_host>example.yandex.ru</interserver_https_host>
+```
+
+
+
 ## interserver_http_credentials {#server-settings-interserver-http-credentials}
 
 Имя пользователя и пароль, использующиеся для аутентификации при [репликации](../../operations/server-configuration-parameters/settings.md) движками Replicated\*. Это имя пользователя и пароль используются только для взаимодействия между репликами кластера и никак не связаны с аутентификацией клиентов ClickHouse. Сервер проверяет совпадение имени и пароля для соединяющихся с ним реплик, а также использует это же имя и пароль для соединения с другими репликами. Соответственно, эти имя и пароль должны быть прописаны одинаковыми для всех реплик кластера.
 По умолчанию аутентификация не используется.
 
+!!! note "Примечание"
+    Эти учетные данные являются общими для обмена данными по протоколам `HTTP` и `HTTPS`.
+
 Раздел содержит следующие параметры:
 
 -   `user` — имя пользователя.
diff --git a/docs/ru/operations/update.md b/docs/ru/operations/update.md
index c74b28b3fd7..ab2f2571c24 100644
--- a/docs/ru/operations/update.md
+++ b/docs/ru/operations/update.md
@@ -3,7 +3,7 @@ toc_priority: 47
 toc_title: "\u041e\u0431\u043d\u043e\u0432\u043b\u0435\u043d\u0438\u0435\u0020\u0043\u006c\u0069\u0063\u006b\u0048\u006f\u0075\u0073\u0065"
 ---
 
-# Обновление ClickHouse {#obnovlenie-clickhouse}
+# Обновление ClickHouse {#clickhouse-upgrade}
 
 Если ClickHouse установлен с помощью deb-пакетов, выполните следующие команды на сервере:
 
@@ -15,4 +15,19 @@ $ sudo service clickhouse-server restart
 
 Если ClickHouse установлен не из рекомендуемых deb-пакетов, используйте соответствующий метод обновления.
 
-ClickHouse не поддерживает распределенное обновление. Операция должна выполняться последовательно на каждом отдельном сервере. Не обновляйте все серверы в кластере одновременно, иначе кластер становится недоступным в течение некоторого времени.
+!!! warning "Предупреждение"
+    ClickHouse не поддерживает распределенное обновление. Операция обновления должна выполняться последовательно на каждом отдельном сервере. Не обновляйте все серверы в кластере одновременно, иначе кластер станет недоступен в течение некоторого времени.
+
+Обновление ClickHouse до определенной версии:
+
+Пример:
+
+`xx.yy.a.b` — это номер текущей стабильной версии. Последнюю стабильную версию можно узнать [здесь](https://github.com/ClickHouse/ClickHouse/releases)
+
+```bash
+$ sudo apt-get update
+$ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b clickhouse-common-static=xx.yy.a.b
+$ sudo service clickhouse-server restart
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/update/) <!--hide-->
\ No newline at end of file

From cebeef8487d2dae13b3a06f14537776b51a13a43 Mon Sep 17 00:00:00 2001
From: Roman Bug <rrrrrroman@gmail.com>
Date: Wed, 10 Mar 2021 10:58:54 +0300
Subject: [PATCH 003/152] Update docs/ru/operations/update.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/operations/update.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ru/operations/update.md b/docs/ru/operations/update.md
index ab2f2571c24..8a411b32214 100644
--- a/docs/ru/operations/update.md
+++ b/docs/ru/operations/update.md
@@ -20,7 +20,7 @@ $ sudo service clickhouse-server restart
 
 Обновление ClickHouse до определенной версии:
 
-Пример:
+**Пример**
 
 `xx.yy.a.b` — это номер текущей стабильной версии. Последнюю стабильную версию можно узнать [здесь](https://github.com/ClickHouse/ClickHouse/releases)
 
@@ -30,4 +30,4 @@ $ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b c
 $ sudo service clickhouse-server restart
 ```
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/update/) <!--hide-->
\ No newline at end of file
+[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/update/) <!--hide-->

From b01897ebd73cb47476c94592d30d08037cff0f26 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 30 Mar 2021 14:31:39 +0300
Subject: [PATCH 004/152] check how ccache is used for contribs

---
 docker/packager/binary/build.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index a42789c6186..71bad4a93cd 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -20,6 +20,12 @@ rm -f CMakeCache.txt
 # Read cmake arguments into array (possibly empty)
 read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
 cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
+
+# FIXME Check how ccache is used for contribs.
+# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
+ninja $NINJA_FLAGS contrib/all
+ccache --show-stats ||:
+
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
 ninja $NINJA_FLAGS clickhouse-bundle
 mv ./programs/clickhouse* /output

From e5953f249d611751e6336e93dc109412492fd628 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Tue, 30 Mar 2021 15:34:24 +0300
Subject: [PATCH 005/152] Update build.sh

---
 docker/packager/binary/build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 71bad4a93cd..edd4e52ba16 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -21,9 +21,9 @@ rm -f CMakeCache.txt
 read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
 cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
 
-# FIXME Check how ccache is used for contribs.
+# FIXME Check how ccache is used for contribs. The contrib/all target doesn't build successfully, but we don't care.
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
-ninja $NINJA_FLAGS contrib/all
+ninja $NINJA_FLAGS contrib/all ||:
 ccache --show-stats ||:
 
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.

From c143891be3f692cc9f4259916c67b9ba5b5924b0 Mon Sep 17 00:00:00 2001
From: Roman Bug <rrrrrroman@gmail.com>
Date: Tue, 30 Mar 2021 15:49:09 +0300
Subject: [PATCH 006/152] =?UTF-8?q?fix=20(=D0=BC=D0=B5=D0=B6=D0=B4=D1=83?=
 =?UTF-8?q?=20=D1=80=D0=B5=D0=BF=D0=BB=D0=B8=D0=BA=D0=B0=D0=BC=D0=B8)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/ru/operations/server-configuration-parameters/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 9d2ae9c09dc..7762213690f 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -299,7 +299,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 
 ## interserver_https_port {#interserver-https-port}
 
-Порт для обмена данными между серверами ClickHouse по протоколу `HTTPS`.
+Порт для обмена данными между репликами ClickHouse по протоколу `HTTPS`.
 
 **Пример**
 

From a28dd16846677a5a1ca36c2b799b308e18cb6504 Mon Sep 17 00:00:00 2001
From: Roman Bug <rrrrrroman@gmail.com>
Date: Tue, 30 Mar 2021 15:51:15 +0300
Subject: [PATCH 007/152] fix (update multiple servers) en

---
 docs/en/operations/update.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md
index d0542295190..97f2bb07d2a 100644
--- a/docs/en/operations/update.md
+++ b/docs/en/operations/update.md
@@ -15,8 +15,8 @@ $ sudo service clickhouse-server restart
 
 If you installed ClickHouse using something other than the recommended `deb` packages, use the appropriate update method.
 
-!!! warning "Warning"
-    ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time.
+!!! note "Note"
+    You can update multiple servers at once as soon as there is no moment of time when all replicas of one shards are offline.    
 
 The upgrade of older version of ClickHouse to specific version:
 

From 4c4de3b3f5538bd2f81e03c87583984c44ca9faf Mon Sep 17 00:00:00 2001
From: Roman Bug <rrrrrroman@gmail.com>
Date: Tue, 30 Mar 2021 15:54:04 +0300
Subject: [PATCH 008/152] fix (update multiple servers) - ru

---
 docs/ru/operations/update.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ru/operations/update.md b/docs/ru/operations/update.md
index 8a411b32214..2486d14cf08 100644
--- a/docs/ru/operations/update.md
+++ b/docs/ru/operations/update.md
@@ -15,8 +15,8 @@ $ sudo service clickhouse-server restart
 
 Если ClickHouse установлен не из рекомендуемых deb-пакетов, используйте соответствующий метод обновления.
 
-!!! warning "Предупреждение"
-    ClickHouse не поддерживает распределенное обновление. Операция обновления должна выполняться последовательно на каждом отдельном сервере. Не обновляйте все серверы в кластере одновременно, иначе кластер станет недоступен в течение некоторого времени.
+!!! note "Примечание"
+    Вы можете обновить сразу несколько серверов, кроме случая, когда все реплики одного шарда отключены.
 
 Обновление ClickHouse до определенной версии:
 

From 45d5179098a81c461b843d3effac1f9a048418d9 Mon Sep 17 00:00:00 2001
From: Roman Bug <rrrrrroman@gmail.com>
Date: Tue, 30 Mar 2021 15:58:27 +0300
Subject: [PATCH 009/152] =?UTF-8?q?fix=20(=D0=A0=D0=B5=D0=BF=D0=BB=D0=B8?=
 =?UTF-8?q?=D0=BA=D0=B8).?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/ru/operations/server-configuration-parameters/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 7762213690f..9f0508cb821 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -309,7 +309,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 
 ## interserver_https_host {#interserver-https-host}
 
-Имя хоста, которое могут использовать другие серверы для обращения к этому хосту по протоколу `HTTPS`.
+Имя реплики, которое могут использовать другие реплики для обращения к ней по протоколу `HTTPS`.
 
 **Пример**
 

From 5f8e88aca0d5b04549853da55953e42bbe753e75 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Tue, 30 Mar 2021 16:39:49 +0300
Subject: [PATCH 010/152] boop

---
 docker/packager/binary/build.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index edd4e52ba16..89727bff09e 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -21,6 +21,7 @@ rm -f CMakeCache.txt
 read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
 cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
 
+
 # FIXME Check how ccache is used for contribs. The contrib/all target doesn't build successfully, but we don't care.
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
 ninja $NINJA_FLAGS contrib/all ||:

From 350546356ac40cbc7018ae89d51e30733a5afab4 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 30 Mar 2021 18:02:44 +0300
Subject: [PATCH 011/152] debug dns in docker build

---
 docker/packager/binary/Dockerfile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 91036d88d8c..3334d1291b4 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -15,7 +15,9 @@ RUN apt-get update \
         wget \
         --yes --no-install-recommends --verbose-versions \
     && cat /etc/resolv.conf \
+    && cat /etc/hosts \
     && echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
+    && dig apt.llvm.org \
     && nslookup -debug apt.llvm.org \
     && ping -c1 apt.llvm.org \
     && wget -nv --retry-connrefused --tries=10 -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
@@ -37,7 +39,9 @@ RUN apt-get update \
         --yes --no-install-recommends
 
 RUN cat /etc/resolv.conf \
+    && cat /etc/hosts \
     && echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
+    && dig apt.llvm.org \
     && nslookup -debug apt.llvm.org \
     && apt-get update \
     && apt-get install \

From 8d52292eb1f07c0aed9fd12bb61d46ab03fcc4ad Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 30 Mar 2021 18:57:22 +0300
Subject: [PATCH 012/152] fix

---
 docker/packager/binary/Dockerfile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 3334d1291b4..c309599adba 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -17,8 +17,8 @@ RUN apt-get update \
     && cat /etc/resolv.conf \
     && cat /etc/hosts \
     && echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
-    && dig apt.llvm.org \
-    && nslookup -debug apt.llvm.org \
+    && { dig apt.llvm.org || : ; }\
+    && { nslookup -debug apt.llvm.org || : ; } \
     && ping -c1 apt.llvm.org \
     && wget -nv --retry-connrefused --tries=10 -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
     && export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
@@ -41,8 +41,8 @@ RUN apt-get update \
 RUN cat /etc/resolv.conf \
     && cat /etc/hosts \
     && echo "nameserver 1.1.1.1" >> /etc/resolv.conf \
-    && dig apt.llvm.org \
-    && nslookup -debug apt.llvm.org \
+    && { dig apt.llvm.org || : ; } \
+    && { nslookup -debug apt.llvm.org || : ; } \
     && apt-get update \
     && apt-get install \
         bash \

From 1ccc162c8669abbf9d82cc86370e25025b9553d6 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 31 Mar 2021 15:59:45 +0300
Subject: [PATCH 013/152] stats

---
 docker/packager/binary/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 89727bff09e..12e6a4f38de 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -21,11 +21,11 @@ rm -f CMakeCache.txt
 read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
 cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
 
-
 # FIXME Check how ccache is used for contribs. The contrib/all target doesn't build successfully, but we don't care.
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
 ninja $NINJA_FLAGS contrib/all ||:
 ccache --show-stats ||:
+ccache --zero-stats ||:
 
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
 ninja $NINJA_FLAGS clickhouse-bundle

From 99b8e9e93d9a84c965db9bad429754da651222e4 Mon Sep 17 00:00:00 2001
From: Roman Bug <rrrrrroman@gmail.com>
Date: Wed, 31 Mar 2021 18:18:43 +0300
Subject: [PATCH 014/152] Update docs/en/operations/update.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/en/operations/update.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md
index 97f2bb07d2a..22995705af9 100644
--- a/docs/en/operations/update.md
+++ b/docs/en/operations/update.md
@@ -16,7 +16,7 @@ $ sudo service clickhouse-server restart
 If you installed ClickHouse using something other than the recommended `deb` packages, use the appropriate update method.
 
 !!! note "Note"
-    You can update multiple servers at once as soon as there is no moment of time when all replicas of one shards are offline.    
+    You can update multiple servers at once as soon as there is no moment when all replicas of one shard are offline.    
 
 The upgrade of older version of ClickHouse to specific version:
 
@@ -32,4 +32,3 @@ $ sudo service clickhouse-server restart
 
 
 
-

From c15d7e009d2f3c759ce5917964073bb3a0d36c8d Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 31 Mar 2021 18:20:30 +0300
Subject: [PATCH 015/152] Some initial code

---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp | 19 +++++
 .../MergeTree/MergeTreeDeduplicationLog.cpp   | 78 +++++++++++++++++++
 .../MergeTree/MergeTreeDeduplicationLog.h     | 57 ++++++++++++++
 3 files changed, 154 insertions(+)
 create mode 100644 src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
 create mode 100644 src/Storages/MergeTree/MergeTreeDeduplicationLog.h

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 7c9f7b8104d..d59d877b372 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1349,6 +1349,24 @@ String IMergeTreeDataPart::getUniqueId() const
     return id;
 }
 
+
+String IMergeTreeDataPart::getZeroLevelPartBlockID() const
+{
+    if (info.level != 0)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get block id for non zero level part {}", name);
+
+    SipHash hash;
+    checksums.computeTotalChecksumDataOnly(hash);
+    union
+    {
+        char bytes[16];
+        UInt64 words[2];
+    } hash_value;
+    hash.get128(hash_value.bytes);
+
+    return info.partition_id + "_" + toString(hash_value.words[0]) + "_" + toString(hash_value.words[1]);
+}
+
 bool isCompactPart(const MergeTreeDataPartPtr & data_part)
 {
     return (data_part && data_part->getType() == MergeTreeDataPartType::COMPACT);
@@ -1364,5 +1382,6 @@ bool isInMemoryPart(const MergeTreeDataPartPtr & data_part)
     return (data_part && data_part->getType() == MergeTreeDataPartType::IN_MEMORY);
 }
 
+
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
new file mode 100644
index 00000000000..7c6f6f6db9a
--- /dev/null
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -0,0 +1,78 @@
+#include <Storages/MergeTree/MergeTreeDeduplicationLog.h>
+#include <filesystem>
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string/join.hpp>
+#include <boost/algorithm/string/trim.hpp>
+
+
+
+namespace DB
+{
+
+namespace
+{
+
+std::string getLogPath(const std::string & prefix, size_t number)
+{
+    std::filesystem::path path(prefix);
+    path /= std::filesystem::path(std::string{"deduplication_log_"} + std::to_string(number) + ".txt");
+    return path;
+}
+
+size_t getLogNumber(const std::string & path_str)
+{
+    std::filesystem::path path(path_str);
+    std::string filename = path.stem();
+    Strings filename_parts;
+    boost::split(filename_parts, filename, boost::is_any_of("_"));
+
+    return parse<size_t>(filename_parts[2]);
+}
+
+}
+
+MergeTreeDeduplicationLog::MergeTreeDeduplicationLog(
+    const std::string & logs_dir_,
+    size_t deduplication_window_,
+    size_t rotate_interval_)
+    : logs_dir(logs_dir_)
+    , deduplication_window(deduplication_window_)
+    , rotate_interval(rotate_interval_)
+{}
+
+void MergeTreeDeduplicationLog::load()
+{
+    namespace fs = std::filesystem;
+    if (!fs::exists(logs_dir))
+        fs::create_directories(logs_dir);
+
+    for (const auto & p : fs::directory_iterator(logs_dir))
+    {
+        auto path = p.path();
+        auto log_number = getLogNumber(path);
+        existing_logs[log_description.log_number] = {path, 0};
+    }
+}
+
+std::unordered_set<std::string> MergeTreeDeduplicationLog::loadSingleLog(const std::string & path)
+{
+    ReadBufferFromFile read_buf(path);
+
+    while (!read_buf.eof())
+    {
+        readIntBinary(record_checksum, read_buf);
+    }
+}
+
+void MergeTreeDeduplicationLog::rotate()
+{
+    size_t new_log_number = log_counter++;
+    auto new_description = getLogDescription(logs_dir, new_log_number, rotate_interval);
+    existing_logs.emplace(new_log_number, new_description);
+    current_writer->sync();
+
+    current_writer = std::make_unique<ChangelogWriter>(description.path, WriteMode::Append, description.from_log_index);
+
+}
+
+}
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
new file mode 100644
index 00000000000..140e1c80be7
--- /dev/null
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
@@ -0,0 +1,57 @@
+#pragma once
+#include <Core/Types.h>
+#include <Storages/MergeTree/IMergeTreeDataPart.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+
+namespace DB
+{
+
+enum class MergeTreeDeduplicationOp : uint8_t
+{
+    ADD = 1,
+    DROP = 2,
+};
+
+struct MergeTreeDeduplicationLogRecord
+{
+    MergeTreeDeduplicationOp operation;
+    std::string part_name;
+    std::string block_id;
+};
+
+struct MergeTreeDeduplicationLogNameDescription
+{
+    std::string path;
+    size_t entries_count;
+};
+
+class MergeTreeDeduplicationLog
+{
+public:
+    MergeTreeDeduplicationLog(
+        const std::string & logs_dir_,
+        size_t deduplication_window_,
+        size_t rotate_interval_);
+
+    bool addPart(const MergeTreeData::MutableDataPartPtr & part);
+    void dropPart(const MergeTreeData::MutableDataPartPtr & part);
+    void dropPartition(const std::string & partition_id);
+
+    void load();
+private:
+    const std::string logs_dir;
+    const size_t deduplication_window;
+    const size_t rotate_interval;
+    size_t log_counter = 1;
+    std::map<size_t, MergeTreeDeduplicationLogNameDescription> existing_logs;
+
+    std::unordered_set<std::string> deduplication_set;
+
+    std::unique_ptr<WriteBufferFromFile> current_writer;
+    size_t entries_written_in_current_file;
+
+    void rotate();
+    std::unordered_set<std::string> loadSingleLog(const std::string & path) const;
+};
+
+}

From 1683c3a10ddb03ea866797f483762f362a79a41d Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 31 Mar 2021 18:38:36 +0300
Subject: [PATCH 016/152] more debug

---
 docker/packager/binary/build.sh | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 12e6a4f38de..0fe6a00b13d 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -11,6 +11,9 @@ tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolc
 mkdir -p build/cmake/toolchain/freebsd-x86_64
 tar xJf freebsd-11.3-toolchain.tar.xz -C build/cmake/toolchain/freebsd-x86_64 --strip-components=1
 
+export CCACHE_LOGFILE=/output/ccache.log
+export CCACHE_DEBUG=1
+
 mkdir -p build/build_docker
 cd build/build_docker
 ccache --show-stats ||:
@@ -23,17 +26,22 @@ cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUI
 
 # FIXME Check how ccache is used for contribs. The contrib/all target doesn't build successfully, but we don't care.
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
-ninja $NINJA_FLAGS contrib/all ||:
+ninja $NINJA_FLAGS --verbose contrib/all ||:
 ccache --show-stats ||:
 ccache --zero-stats ||:
 
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
-ninja $NINJA_FLAGS clickhouse-bundle
+ninja $NINJA_FLAGS --verbose clickhouse-bundle
 mv ./programs/clickhouse* /output
 mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
 find . -name '*.so' -print -exec mv '{}' /output \;
 find . -name '*.so.*' -print -exec mv '{}' /output \;
 
+mkdir /output/ccache
+find . -name '*.ccache-*' -print -exec mv '{}' /output/ccache \;
+tar -czvf "/output/ccache.tgz" /output/ccache
+rm -rf /output/ccache
+
 # Different files for performance test.
 if [ "performance" == "$COMBINED_OUTPUT" ]
 then

From d0db789196906d9e41d2d5207b9505dcb30d029f Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Wed, 31 Mar 2021 18:43:31 +0300
Subject: [PATCH 017/152] Update build.sh

---
 docker/packager/binary/build.sh | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 0fe6a00b13d..db293b9eca1 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -32,16 +32,14 @@ ccache --zero-stats ||:
 
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
 ninja $NINJA_FLAGS --verbose clickhouse-bundle
+
+ccache --show-stats ||:
+
 mv ./programs/clickhouse* /output
 mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
 find . -name '*.so' -print -exec mv '{}' /output \;
 find . -name '*.so.*' -print -exec mv '{}' /output \;
 
-mkdir /output/ccache
-find . -name '*.ccache-*' -print -exec mv '{}' /output/ccache \;
-tar -czvf "/output/ccache.tgz" /output/ccache
-rm -rf /output/ccache
-
 # Different files for performance test.
 if [ "performance" == "$COMBINED_OUTPUT" ]
 then
@@ -84,4 +82,9 @@ then
     rm -r /output/*
     mv "$COMBINED_OUTPUT.tgz" /output
 fi
-ccache --show-stats ||:
+
+mkdir /output/ccache
+find . -name '*.ccache-*' -print -exec mv '{}' /output/ccache \;
+tar -czvf "/output/ccache.tgz" /output/ccache
+rm -rf /output/ccache
+

From 84292e16fd39ee563f171417ae53d2c8e07ee90e Mon Sep 17 00:00:00 2001
From: Roman Bug <rrrrrroman@gmail.com>
Date: Wed, 31 Mar 2021 22:37:48 +0300
Subject: [PATCH 018/152] Update settings.md

---
 .../ru/operations/server-configuration-parameters/settings.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 9f0508cb821..42849a86e1d 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -1,6 +1,6 @@
 ---
 toc_priority: 57
-toc_title: "\u041a\u043e\u043d\u0444\u0438\u0433\u0443\u0440\u0430\u0446\u0438\u043e\u043d\u043d\u044b\u0435\u0020\u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u044b\u0020\u0441\u0435\u0440\u0432\u0435\u0440\u0430"
+toc_title: "Конфигурационные параметры сервера"
 ---
 
 # Конфигурационные параметры сервера {#server-configuration-parameters-reference}
@@ -309,7 +309,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 
 ## interserver_https_host {#interserver-https-host}
 
-Имя реплики, которое могут использовать другие реплики для обращения к ней по протоколу `HTTPS`.
+Имя хоста, которое могут использовать другие реплики для обращения к ней по протоколу `HTTPS`.
 
 **Пример**
 

From 1f454d2cdec2025b955320a1371ad12d4ad1cdef Mon Sep 17 00:00:00 2001
From: Roman Bug <rrrrrroman@gmail.com>
Date: Wed, 31 Mar 2021 22:40:28 +0300
Subject: [PATCH 019/152] Update settings.md

---
 docs/ru/operations/server-configuration-parameters/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 42849a86e1d..eb113be3a03 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -309,7 +309,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 
 ## interserver_https_host {#interserver-https-host}
 
-Имя хоста, которое могут использовать другие реплики для обращения к ней по протоколу `HTTPS`.
+Имя хоста, которое могут использовать другие реплики для обращения к нему по протоколу `HTTPS`.
 
 **Пример**
 

From a55d03cb62f88913166f78e25d2d0fe68b5cc019 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 31 Mar 2021 22:41:53 +0300
Subject: [PATCH 020/152] try to avoid conflict with prlimit

---
 CMakeLists.txt                  | 11 +++++++++--
 cmake/find/ccache.cmake         |  4 +++-
 docker/packager/binary/build.sh |  4 ++++
 docker/packager/packager        |  1 +
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b6eaa9f7731..fe617579b74 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -66,7 +66,14 @@ endif ()
 
 include (cmake/find/ccache.cmake)
 
-option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile too long or to take too much memory while compiling" OFF)
+# Take care to add prlimit in command line before ccache, or else ccache thinks that
+# prlimit is compiler, and clang++ is its input file, and refuses to work  with
+# multiple inputs, e.g in ccache log:
+# [2021-03-31T18:06:32.655327 36900] Command line: /usr/bin/ccache prlimit --as=10000000000 --data=5000000000 --cpu=600 /usr/bin/clang++-11 - ...... std=gnu++2a -MD -MT src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -MF src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o.d -o src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -c ../src/Storages/MergeTree/IMergeTreeDataPart.cpp
+#
+# [2021-03-31T18:06:32.656704 36900] Multiple input files: /usr/bin/clang++-11 and ../src/Storages/MergeTree/IMergeTreeDataPart.cpp
+
+option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile too long or to take too much memory while compiling." OFF)
 if (ENABLE_CHECK_HEAVY_BUILDS)
     # set DATA (since RSS does not work since 2.6.x+) to 2G
     set (RLIMIT_DATA 5000000000)
@@ -76,7 +83,7 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
     if (SANITIZE STREQUAL "memory" OR COMPILER_GCC)
        set (RLIMIT_DATA 10000000000)
     endif()
-    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=600)
+    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=600 ${CMAKE_CXX_COMPILER_LAUNCHER})
 endif ()
 
 if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None")
diff --git a/cmake/find/ccache.cmake b/cmake/find/ccache.cmake
index fea1f8b4c97..986c9cb5fe2 100644
--- a/cmake/find/ccache.cmake
+++ b/cmake/find/ccache.cmake
@@ -32,7 +32,9 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
    if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
       message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
 
-      set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
+      set (CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND} ${CMAKE_CXX_COMPILER_LAUNCHER})
+      set (CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND} ${CMAKE_C_COMPILER_LAUNCHER})
+
       set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
 
       # debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index db293b9eca1..ce47df9b39c 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -88,3 +88,7 @@ find . -name '*.ccache-*' -print -exec mv '{}' /output/ccache \;
 tar -czvf "/output/ccache.tgz" /output/ccache
 rm -rf /output/ccache
 
+# Compress the log as well, or else the CI will try to compress all log files in place,
+# and will fail because this directory is not writable.
+gzip "/output/ccache.log"
+
diff --git a/docker/packager/packager b/docker/packager/packager
index 65c03cc10e3..cf62d96d681 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -155,6 +155,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
 
     if clang_tidy:
         cmake_flags.append('-DENABLE_CLANG_TIDY=1')
+        cmake_flags.append('-D
         # Don't stop on first error to find more clang-tidy errors in one run.
         result.append('NINJA_FLAGS=-k0')
 

From 8bd19ccad2fc24fe4efe5ec46f3f98dca1cce7e9 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Wed, 31 Mar 2021 23:23:08 +0300
Subject: [PATCH 021/152] Update packager

---
 docker/packager/packager | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker/packager/packager b/docker/packager/packager
index cf62d96d681..65c03cc10e3 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -155,7 +155,6 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
 
     if clang_tidy:
         cmake_flags.append('-DENABLE_CLANG_TIDY=1')
-        cmake_flags.append('-D
         # Don't stop on first error to find more clang-tidy errors in one run.
         result.append('NINJA_FLAGS=-k0')
 

From a91f5058995c39f96ecc0027416623ca9358eac3 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 1 Apr 2021 00:54:45 +0300
Subject: [PATCH 022/152] remove debug

---
 CMakeLists.txt                    |  4 ++-
 docker/packager/binary/Dockerfile | 43 ++++++++++++++++---------------
 docker/packager/binary/build.sh   | 33 ++++++++++++------------
 3 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fe617579b74..6aa136c51b8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,7 +72,9 @@ include (cmake/find/ccache.cmake)
 # [2021-03-31T18:06:32.655327 36900] Command line: /usr/bin/ccache prlimit --as=10000000000 --data=5000000000 --cpu=600 /usr/bin/clang++-11 - ...... std=gnu++2a -MD -MT src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -MF src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o.d -o src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -c ../src/Storages/MergeTree/IMergeTreeDataPart.cpp
 #
 # [2021-03-31T18:06:32.656704 36900] Multiple input files: /usr/bin/clang++-11 and ../src/Storages/MergeTree/IMergeTreeDataPart.cpp
-
+#
+# Another way would be to use --ccache-skip option before clang++-11 to make
+# ccache ignore it.
 option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile too long or to take too much memory while compiling." OFF)
 if (ENABLE_CHECK_HEAVY_BUILDS)
     # set DATA (since RSS does not work since 2.6.x+) to 2G
diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 94c7f934f6e..97ef70aeaa9 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -35,35 +35,36 @@ RUN apt-get update \
 RUN apt-get update \
     && apt-get install \
         bash \
-        cmake \
+        build-essential \
         ccache \
-        curl \
-        gcc-9 \
-        g++-9 \
         clang-10 \
-        clang-tidy-10 \
-        lld-10 \
-        llvm-10 \
-        llvm-10-dev \
         clang-11 \
+        clang-tidy-10 \
         clang-tidy-11 \
-        lld-11 \
-        llvm-11 \
-        llvm-11-dev \
+        cmake \
+        cmake \
+        curl \
+        g++-9 \
+        gcc-9 \
+        gdb \
+        git \
+        gperf \
+        gperf \
+        intel-opencl-icd \
         libicu-dev \
         libreadline-dev \
+        lld-10 \
+        lld-11 \
+        llvm-10 \
+        llvm-10-dev \
+        llvm-11 \
+        llvm-11-dev \
+        moreutils \
         ninja-build \
-        gperf \
-        git \
-        opencl-headers \
         ocl-icd-libopencl1 \
-        intel-opencl-icd \
-        tzdata \
-        gperf \
-        cmake \
-        gdb \
+        opencl-headers \
         rename \
-        build-essential \
+        tzdata \
         --yes --no-install-recommends
 
 # This symlink required by gcc to find lld compiler
@@ -111,4 +112,4 @@ RUN rm /etc/apt/sources.list.d/proposed-repositories.list && apt-get update
 
 
 COPY build.sh /
-CMD ["/bin/bash", "/build.sh"]
+CMD ["bash", "-c", "/build.sh 2>&1 | ts"]
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index ce47df9b39c..7e83209be19 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -11,8 +11,9 @@ tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolc
 mkdir -p build/cmake/toolchain/freebsd-x86_64
 tar xJf freebsd-11.3-toolchain.tar.xz -C build/cmake/toolchain/freebsd-x86_64 --strip-components=1
 
-export CCACHE_LOGFILE=/output/ccache.log
-export CCACHE_DEBUG=1
+# # Uncomment to debug ccache
+# export CCACHE_LOGFILE=/output/ccache.log
+# export CCACHE_DEBUG=1
 
 mkdir -p build/build_docker
 cd build/build_docker
@@ -24,14 +25,8 @@ rm -f CMakeCache.txt
 read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
 cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
 
-# FIXME Check how ccache is used for contribs. The contrib/all target doesn't build successfully, but we don't care.
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
-ninja $NINJA_FLAGS --verbose contrib/all ||:
-ccache --show-stats ||:
-ccache --zero-stats ||:
-
-# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
-ninja $NINJA_FLAGS --verbose clickhouse-bundle
+ninja $NINJA_FLAGS clickhouse-bundle
 
 ccache --show-stats ||:
 
@@ -83,12 +78,18 @@ then
     mv "$COMBINED_OUTPUT.tgz" /output
 fi
 
-mkdir /output/ccache
-find . -name '*.ccache-*' -print -exec mv '{}' /output/ccache \;
-tar -czvf "/output/ccache.tgz" /output/ccache
-rm -rf /output/ccache
+if [ "${CCACHE_DEBUG:-}" == "1"]
+then
+    mkdir /output/ccache
+    find . -name '*.ccache-*' -print -exec mv '{}' /output/ccache \;
+    tar -czvf "/output/ccache.tgz" /output/ccache
+    rm -rf /output/ccache
+fi
 
-# Compress the log as well, or else the CI will try to compress all log files in place,
-# and will fail because this directory is not writable.
-gzip "/output/ccache.log"
+if ! [ -z "$CCACHE_LOGFILE" ]
+then
+    # Compress the log as well, or else the CI will try to compress all log
+    # files in place, and will fail because this directory is not writable.
+    gzip "$CCACHE_LOGFILE"
+fi
 

From 026dab2b81d815c8ac621f68161e1510f7661581 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 1 Apr 2021 02:51:47 +0300
Subject: [PATCH 023/152] typo

---
 docker/packager/binary/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 7e83209be19..ea2d02d3090 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -78,7 +78,7 @@ then
     mv "$COMBINED_OUTPUT.tgz" /output
 fi
 
-if [ "${CCACHE_DEBUG:-}" == "1"]
+if [ "${CCACHE_DEBUG:-}" == "1" ]
 then
     mkdir /output/ccache
     find . -name '*.ccache-*' -print -exec mv '{}' /output/ccache \;

From 2b975870558612af77690664565c767f94c09c58 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 1 Apr 2021 02:53:22 +0300
Subject: [PATCH 024/152] debug again

---
 docker/packager/binary/build.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index ea2d02d3090..108211d0f01 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -11,9 +11,9 @@ tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolc
 mkdir -p build/cmake/toolchain/freebsd-x86_64
 tar xJf freebsd-11.3-toolchain.tar.xz -C build/cmake/toolchain/freebsd-x86_64 --strip-components=1
 
-# # Uncomment to debug ccache
-# export CCACHE_LOGFILE=/output/ccache.log
-# export CCACHE_DEBUG=1
+# Uncomment to debug ccache
+export CCACHE_LOGFILE=/output/ccache.log
+export CCACHE_DEBUG=1
 
 mkdir -p build/build_docker
 cd build/build_docker

From 7c8f54e69440a36a72ba9da951f34fe76d0cf314 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 1 Apr 2021 11:07:56 +0300
Subject: [PATCH 025/152] More changes

---
 src/Storages/MergeTree/IMergeTreeDataPart.h           |  3 +++
 .../ReplicatedMergeTreeBlockOutputStream.cpp          | 11 +----------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index b64022d2b5a..61364e642d7 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -164,6 +164,9 @@ public:
 
     bool isEmpty() const { return rows_count == 0; }
 
+    /// Compute part block id for zero level part. Otherwise throws an exception.
+    String getZeroLevelPartBlockID() const;
+
     const MergeTreeData & storage;
 
     String name;
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp
index 6f90d9f00a9..1fcce21cb41 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp
@@ -153,18 +153,9 @@ void ReplicatedMergeTreeBlockOutputStream::write(const Block & block)
 
         if (deduplicate)
         {
-            SipHash hash;
-            part->checksums.computeTotalChecksumDataOnly(hash);
-            union
-            {
-                char bytes[16];
-                UInt64 words[2];
-            } hash_value;
-            hash.get128(hash_value.bytes);
-
             /// We add the hash from the data and partition identifier to deduplication ID.
             /// That is, do not insert the same data to the same partition twice.
-            block_id = part->info.partition_id + "_" + toString(hash_value.words[0]) + "_" + toString(hash_value.words[1]);
+            block_id = part->getZeroLevelPartBlockID();
 
             LOG_DEBUG(log, "Wrote block with ID '{}', {} rows", block_id, current_block.block.rows());
         }

From 5476e68d6c284ef9ef92c42de7c4ec30c8807ac3 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Tue, 2 Mar 2021 15:28:09 +0300
Subject: [PATCH 026/152] Fix joined table access with Merge engine and
 aggregation

---
 src/Interpreters/IdentifierSemantic.cpp       | 61 ++++++++++++++++
 src/Interpreters/IdentifierSemantic.h         | 48 +++++++++++-
 .../getHeaderForProcessingStage.cpp           | 28 +++++--
 .../getHeaderForProcessingStage.h             |  3 +-
 src/Storages/StorageMerge.cpp                 | 73 ++++++++++++-------
 ...83_merge_table_join_and_group_by.reference |  1 +
 .../01483_merge_table_join_and_group_by.sql   |  5 ++
 7 files changed, 183 insertions(+), 36 deletions(-)

diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp
index a1fc533eb7f..716cf645f71 100644
--- a/src/Interpreters/IdentifierSemantic.cpp
+++ b/src/Interpreters/IdentifierSemantic.cpp
@@ -249,4 +249,65 @@ void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const Dat
     }
 }
 
+IdentifiersCollector::ASTIdentifiers IdentifiersCollector::collect(const ASTPtr & node)
+{
+    IdentifiersCollector::Data ident_data;
+    ConstInDepthNodeVisitor<IdentifiersCollector, true> ident_visitor(ident_data);
+    ident_visitor.visit(node);
+    return ident_data.idents;
+}
+
+bool IdentifiersCollector::needChildVisit(const ASTPtr &, const ASTPtr &)
+{
+    return true;
+}
+
+void IdentifiersCollector::visit(const ASTPtr & node, IdentifiersCollector::Data & data)
+{
+    if (const auto * ident = node->as<ASTIdentifier>())
+        data.idents.push_back(ident);
+}
+
+IdentifierMembershipCollector::IdentifierMembershipCollector(const ASTSelectQuery & select, const Context & context)
+{
+    if (ASTPtr with = select.with())
+        QueryAliasesNoSubqueriesVisitor(aliases).visit(with);
+    QueryAliasesNoSubqueriesVisitor(aliases).visit(select.select());
+
+    tables = getDatabaseAndTablesWithColumns(getTableExpressions(select), context);
+}
+
+
+std::optional<size_t> IdentifierMembershipCollector::getIdentsMembership(
+    const ASTPtr ast, const std::vector<TableWithColumnNamesAndTypes> & tables, const Aliases & aliases)
+{
+    auto idents = IdentifiersCollector::collect(ast);
+
+    std::optional<size_t> result;
+    for (const auto * ident : idents)
+    {
+        /// short name clashes with alias, ambiguous case
+        if (ident->isShort() && aliases.count(ident->shortName()))
+            return {};
+        const auto pos = getIdentMembership(*ident, tables);
+        if (!pos)
+            return {};
+        /// identifiers from different tables
+        if (result && *pos != *result)
+            return {};
+        result = pos;
+    }
+    return result;
+}
+
+std::optional<size_t>
+IdentifierMembershipCollector::getIdentMembership(const ASTIdentifier & ident, const std::vector<TableWithColumnNamesAndTypes> & tables)
+{
+
+    std::optional<size_t> table_pos = IdentifierSemantic::getMembership(ident);
+    if (table_pos)
+        return table_pos;
+    return IdentifierSemantic::chooseTableColumnMatch(ident, tables);
+}
+
 }
diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h
index 80b55ba0537..89f6c27a74c 100644
--- a/src/Interpreters/IdentifierSemantic.h
+++ b/src/Interpreters/IdentifierSemantic.h
@@ -2,8 +2,15 @@
 
 #include <optional>
 
-#include <Parsers/ASTIdentifier.h>
+#include <Interpreters/Aliases.h>
 #include <Interpreters/DatabaseAndTableWithAlias.h>
+#include <Interpreters/InDepthNodeVisitor.h>
+#include <Interpreters/QueryAliasesVisitor.h>
+#include <Interpreters/getHeaderForProcessingStage.h>
+#include <Interpreters/getTableExpressions.h>
+
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTSelectQuery.h>
 
 namespace DB
 {
@@ -64,4 +71,43 @@ private:
     static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table);
 };
 
+/// Collect all identifies from AST recursively
+class IdentifiersCollector
+{
+public:
+    using ASTIdentPtr = const ASTIdentifier *;
+    using ASTIdentifiers = std::vector<ASTIdentPtr>;
+    struct Data
+    {
+        ASTIdentifiers idents;
+    };
+
+    static void visit(const ASTPtr & node, Data & data);
+    static bool needChildVisit(const ASTPtr &, const ASTPtr &);
+    static ASTIdentifiers collect(const ASTPtr & node);
+};
+
+/// Collect identifier table membership considering aliases
+class IdentifierMembershipCollector
+{
+public:
+    IdentifierMembershipCollector(const ASTSelectQuery & select, const Context & context);
+
+    std::optional<size_t> getIdentsMembership(const ASTPtr ast) const
+    {
+        return IdentifierMembershipCollector::getIdentsMembership(ast, tables, aliases);
+    }
+
+    /// Collect common table membership for identifiers in expression
+    /// If membership cannot be established or there are several identifies from different tables, return empty optional
+    static std::optional<size_t>
+    getIdentsMembership(const ASTPtr ast, const std::vector<TableWithColumnNamesAndTypes> & tables, const Aliases & aliases);
+
+private:
+    std::vector<TableWithColumnNamesAndTypes> tables;
+    Aliases aliases;
+
+    static std::optional<size_t> getIdentMembership(const ASTIdentifier & ident, const std::vector<TableWithColumnNamesAndTypes> & tables);
+};
+
 }
diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp
index b56b90cdf3f..3b39fe2e9c5 100644
--- a/src/Interpreters/getHeaderForProcessingStage.cpp
+++ b/src/Interpreters/getHeaderForProcessingStage.cpp
@@ -1,8 +1,9 @@
-#include <Interpreters/getHeaderForProcessingStage.h>
-#include <Interpreters/InterpreterSelectQuery.h>
-#include <Storages/IStorage.h>
 #include <DataStreams/OneBlockInputStream.h>
+#include <Interpreters/IdentifierSemantic.h>
+#include <Interpreters/InterpreterSelectQuery.h>
+#include <Interpreters/getHeaderForProcessingStage.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
+#include <Storages/IStorage.h>
 
 namespace DB
 {
@@ -13,7 +14,7 @@ namespace ErrorCodes
 }
 
 /// Rewrite original query removing joined tables from it
-bool removeJoin(ASTSelectQuery & select)
+bool removeJoin(ASTSelectQuery & select, const IdentifierMembershipCollector & membership_collector)
 {
     const auto & tables = select.tables();
     if (!tables || tables->children.size() < 2)
@@ -23,8 +24,22 @@ bool removeJoin(ASTSelectQuery & select)
     if (!joined_table.table_join)
         return false;
 
+    /// We need to remove joined columns and related functions (taking in account aliases if any).
+    auto * select_list = select.select()->as<ASTExpressionList>();
+    if (select_list)
+    {
+        ASTs new_children;
+        for (const auto & elem : select_list->children)
+        {
+            auto table_no = membership_collector.getIdentsMembership(elem);
+            if (!table_no.has_value() || *table_no < 1)
+                new_children.push_back(elem);
+        }
+
+        select_list->children = std::move(new_children);
+    }
+
     /// The most simple temporary solution: leave only the first table in query.
-    /// TODO: we also need to remove joined columns and related functions (taking in account aliases if any).
     tables->children.resize(1);
     return true;
 }
@@ -66,7 +81,8 @@ Block getHeaderForProcessingStage(
         case QueryProcessingStage::MAX:
         {
             auto query = query_info.query->clone();
-            removeJoin(*query->as<ASTSelectQuery>());
+            auto & select = *query->as<ASTSelectQuery>();
+            removeJoin(select, IdentifierMembershipCollector{select, context});
 
             auto stream = std::make_shared<OneBlockInputStream>(
                     metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals(), storage.getStorageID()));
diff --git a/src/Interpreters/getHeaderForProcessingStage.h b/src/Interpreters/getHeaderForProcessingStage.h
index ec238edf774..fc856ed4b62 100644
--- a/src/Interpreters/getHeaderForProcessingStage.h
+++ b/src/Interpreters/getHeaderForProcessingStage.h
@@ -13,8 +13,9 @@ using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
 struct SelectQueryInfo;
 class Context;
 class ASTSelectQuery;
+class IdentifierMembershipCollector;
 
-bool removeJoin(ASTSelectQuery & select);
+bool removeJoin(ASTSelectQuery & select, const IdentifierMembershipCollector & membership_collector);
 
 Block getHeaderForProcessingStage(
         const IStorage & storage,
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index b8aaa52f92c..e29a99ff4c0 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -1,31 +1,34 @@
-#include <DataStreams/narrowBlockInputStreams.h>
-#include <DataStreams/OneBlockInputStream.h>
-#include <Storages/StorageMerge.h>
-#include <Storages/StorageFactory.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Storages/AlterCommands.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/TreeRewriter.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/evaluateConstantExpression.h>
-#include <Interpreters/InterpreterSelectQuery.h>
-#include <Interpreters/getHeaderForProcessingStage.h>
-#include <Parsers/ASTSelectQuery.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTExpressionList.h>
-#include <DataTypes/DataTypeString.h>
-#include <Columns/ColumnString.h>
-#include <Common/typeid_cast.h>
-#include <Common/checkStackSize.h>
-#include <Databases/IDatabase.h>
-#include <ext/range.h>
 #include <algorithm>
+#include <ext/range.h>
+
+#include <Columns/ColumnString.h>
+#include <Common/checkStackSize.h>
+#include <Common/typeid_cast.h>
+#include <DataStreams/OneBlockInputStream.h>
+#include <DataStreams/narrowBlockInputStreams.h>
+#include <DataTypes/DataTypeString.h>
+#include <Databases/IDatabase.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/ExpressionActions.h>
+#include <Interpreters/IdentifierSemantic.h>
+#include <Interpreters/InterpreterSelectQuery.h>
+#include <Interpreters/TreeRewriter.h>
+#include <Interpreters/evaluateConstantExpression.h>
+#include <Interpreters/getHeaderForProcessingStage.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTSelectQuery.h>
 #include <Parsers/queryToString.h>
-#include <Processors/Transforms/MaterializingTransform.h>
 #include <Processors/ConcatProcessor.h>
 #include <Processors/Transforms/ExpressionTransform.h>
+#include <Processors/Transforms/MaterializingTransform.h>
+#include <Storages/AlterCommands.h>
+#include <Storages/StorageFactory.h>
+#include <Storages/StorageMerge.h>
+#include <Storages/VirtualColumnUtils.h>
 
+#include <common/logger_useful.h>
 
 namespace DB
 {
@@ -43,9 +46,12 @@ namespace ErrorCodes
 namespace
 {
 
-void modifySelect(ASTSelectQuery & select, const TreeRewriterResult & rewriter_result)
+TreeRewriterResult modifySelect(ASTSelectQuery & select, const TreeRewriterResult & rewriter_result, const Context & context)
 {
-    if (removeJoin(select))
+    IdentifierMembershipCollector membership_collector{select, context};
+
+    TreeRewriterResult new_rewriter_result = rewriter_result;
+    if (removeJoin(select, membership_collector))
     {
         /// Also remove GROUP BY cause ExpressionAnalyzer would check if it has all aggregate columns but joined columns would be missed.
         select.setExpression(ASTSelectQuery::Expression::GROUP_BY, {});
@@ -62,7 +68,17 @@ void modifySelect(ASTSelectQuery & select, const TreeRewriterResult & rewriter_r
         select.setExpression(ASTSelectQuery::Expression::PREWHERE, {});
         select.setExpression(ASTSelectQuery::Expression::HAVING, {});
         select.setExpression(ASTSelectQuery::Expression::ORDER_BY, {});
+
+        new_rewriter_result.aggregates.clear();
+        for (const auto & agg : rewriter_result.aggregates)
+        {
+            auto table_no = membership_collector.getIdentsMembership(std::make_shared<ASTFunction>(*agg));
+            if (!table_no.has_value() || *table_no < 1)
+                new_rewriter_result.aggregates.push_back(agg);
+        }
     }
+
+    return new_rewriter_result;
 }
 
 }
@@ -159,7 +175,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context &
     /// (see modifySelect()/removeJoin())
     ///
     /// And for this we need to return FetchColumns.
-    if (removeJoin(modified_select))
+    if (removeJoin(modified_select, IdentifierMembershipCollector{modified_select, context}))
         return QueryProcessingStage::FetchColumns;
 
     auto stage_in_source_tables = QueryProcessingStage::FetchColumns;
@@ -303,8 +319,9 @@ Pipe StorageMerge::createSources(
     modified_query_info.query = query_info.query->clone();
 
     /// Original query could contain JOIN but we need only the first joined table and its columns.
-    auto & modified_select = modified_query_info.query->as<ASTSelectQuery &>();
-    modifySelect(modified_select, *query_info.syntax_analyzer_result);
+    auto & modified_select = modified_query_info.query->as<ASTSelectQuery &>();\
+    auto new_analyzer_res = modifySelect(modified_select, *query_info.syntax_analyzer_result, *modified_context);
+    modified_query_info.syntax_analyzer_result = std::make_shared<TreeRewriterResult>(std::move(new_analyzer_res));
 
     VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", table_name);
 
diff --git a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference
index b2c3ea56b7f..f0dd7a31380 100644
--- a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference
+++ b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference
@@ -5,3 +5,4 @@
 1
 0	1
 0	1
+1
diff --git a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql
index a6678ca9040..9b8395b5565 100644
--- a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql
+++ b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql
@@ -17,6 +17,11 @@ SELECT ID FROM m INNER JOIN b USING(key) GROUP BY ID;
 SELECT * FROM m INNER JOIN b USING(key) WHERE ID = 1 HAVING ID = 1 ORDER BY ID;
 SELECT * FROM m INNER JOIN b USING(key) WHERE ID = 1 GROUP BY ID, key HAVING ID = 1 ORDER BY ID;
 
+SELECT sum(b.ID) FROM m FULL JOIN b ON (m.key == b.key) GROUP BY key;
+
+-- still not working because columns from different table under aggregation
+SELECT sum(b.ID + m.key) FROM m FULL JOIN b ON (m.key == b.key) GROUP BY key; -- { serverError 47 }
+
 DROP TABLE IF EXISTS a;
 DROP TABLE IF EXISTS b;
 DROP TABLE IF EXISTS m;

From fff8043e558bea8a45dfbc449f28347260d5128e Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Thu, 1 Apr 2021 14:31:46 +0300
Subject: [PATCH 027/152] Revert "Fix joined table access with Merge engine and
 aggregation"

This reverts commit 5476e68d6c284ef9ef92c42de7c4ec30c8807ac3.
---
 src/Interpreters/IdentifierSemantic.cpp       | 61 ------------------
 src/Interpreters/IdentifierSemantic.h         | 48 +-------------
 .../getHeaderForProcessingStage.cpp           | 28 ++-------
 .../getHeaderForProcessingStage.h             |  3 +-
 src/Storages/StorageMerge.cpp                 | 63 +++++++------------
 ...83_merge_table_join_and_group_by.reference |  1 -
 .../01483_merge_table_join_and_group_by.sql   |  5 --
 7 files changed, 31 insertions(+), 178 deletions(-)

diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp
index 716cf645f71..a1fc533eb7f 100644
--- a/src/Interpreters/IdentifierSemantic.cpp
+++ b/src/Interpreters/IdentifierSemantic.cpp
@@ -249,65 +249,4 @@ void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const Dat
     }
 }
 
-IdentifiersCollector::ASTIdentifiers IdentifiersCollector::collect(const ASTPtr & node)
-{
-    IdentifiersCollector::Data ident_data;
-    ConstInDepthNodeVisitor<IdentifiersCollector, true> ident_visitor(ident_data);
-    ident_visitor.visit(node);
-    return ident_data.idents;
-}
-
-bool IdentifiersCollector::needChildVisit(const ASTPtr &, const ASTPtr &)
-{
-    return true;
-}
-
-void IdentifiersCollector::visit(const ASTPtr & node, IdentifiersCollector::Data & data)
-{
-    if (const auto * ident = node->as<ASTIdentifier>())
-        data.idents.push_back(ident);
-}
-
-IdentifierMembershipCollector::IdentifierMembershipCollector(const ASTSelectQuery & select, const Context & context)
-{
-    if (ASTPtr with = select.with())
-        QueryAliasesNoSubqueriesVisitor(aliases).visit(with);
-    QueryAliasesNoSubqueriesVisitor(aliases).visit(select.select());
-
-    tables = getDatabaseAndTablesWithColumns(getTableExpressions(select), context);
-}
-
-
-std::optional<size_t> IdentifierMembershipCollector::getIdentsMembership(
-    const ASTPtr ast, const std::vector<TableWithColumnNamesAndTypes> & tables, const Aliases & aliases)
-{
-    auto idents = IdentifiersCollector::collect(ast);
-
-    std::optional<size_t> result;
-    for (const auto * ident : idents)
-    {
-        /// short name clashes with alias, ambiguous case
-        if (ident->isShort() && aliases.count(ident->shortName()))
-            return {};
-        const auto pos = getIdentMembership(*ident, tables);
-        if (!pos)
-            return {};
-        /// identifiers from different tables
-        if (result && *pos != *result)
-            return {};
-        result = pos;
-    }
-    return result;
-}
-
-std::optional<size_t>
-IdentifierMembershipCollector::getIdentMembership(const ASTIdentifier & ident, const std::vector<TableWithColumnNamesAndTypes> & tables)
-{
-
-    std::optional<size_t> table_pos = IdentifierSemantic::getMembership(ident);
-    if (table_pos)
-        return table_pos;
-    return IdentifierSemantic::chooseTableColumnMatch(ident, tables);
-}
-
 }
diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h
index 89f6c27a74c..80b55ba0537 100644
--- a/src/Interpreters/IdentifierSemantic.h
+++ b/src/Interpreters/IdentifierSemantic.h
@@ -2,15 +2,8 @@
 
 #include <optional>
 
-#include <Interpreters/Aliases.h>
-#include <Interpreters/DatabaseAndTableWithAlias.h>
-#include <Interpreters/InDepthNodeVisitor.h>
-#include <Interpreters/QueryAliasesVisitor.h>
-#include <Interpreters/getHeaderForProcessingStage.h>
-#include <Interpreters/getTableExpressions.h>
-
 #include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTSelectQuery.h>
+#include <Interpreters/DatabaseAndTableWithAlias.h>
 
 namespace DB
 {
@@ -71,43 +64,4 @@ private:
     static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table);
 };
 
-/// Collect all identifies from AST recursively
-class IdentifiersCollector
-{
-public:
-    using ASTIdentPtr = const ASTIdentifier *;
-    using ASTIdentifiers = std::vector<ASTIdentPtr>;
-    struct Data
-    {
-        ASTIdentifiers idents;
-    };
-
-    static void visit(const ASTPtr & node, Data & data);
-    static bool needChildVisit(const ASTPtr &, const ASTPtr &);
-    static ASTIdentifiers collect(const ASTPtr & node);
-};
-
-/// Collect identifier table membership considering aliases
-class IdentifierMembershipCollector
-{
-public:
-    IdentifierMembershipCollector(const ASTSelectQuery & select, const Context & context);
-
-    std::optional<size_t> getIdentsMembership(const ASTPtr ast) const
-    {
-        return IdentifierMembershipCollector::getIdentsMembership(ast, tables, aliases);
-    }
-
-    /// Collect common table membership for identifiers in expression
-    /// If membership cannot be established or there are several identifies from different tables, return empty optional
-    static std::optional<size_t>
-    getIdentsMembership(const ASTPtr ast, const std::vector<TableWithColumnNamesAndTypes> & tables, const Aliases & aliases);
-
-private:
-    std::vector<TableWithColumnNamesAndTypes> tables;
-    Aliases aliases;
-
-    static std::optional<size_t> getIdentMembership(const ASTIdentifier & ident, const std::vector<TableWithColumnNamesAndTypes> & tables);
-};
-
 }
diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp
index 3b39fe2e9c5..b56b90cdf3f 100644
--- a/src/Interpreters/getHeaderForProcessingStage.cpp
+++ b/src/Interpreters/getHeaderForProcessingStage.cpp
@@ -1,9 +1,8 @@
-#include <DataStreams/OneBlockInputStream.h>
-#include <Interpreters/IdentifierSemantic.h>
-#include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/getHeaderForProcessingStage.h>
-#include <Parsers/ASTTablesInSelectQuery.h>
+#include <Interpreters/InterpreterSelectQuery.h>
 #include <Storages/IStorage.h>
+#include <DataStreams/OneBlockInputStream.h>
+#include <Parsers/ASTTablesInSelectQuery.h>
 
 namespace DB
 {
@@ -14,7 +13,7 @@ namespace ErrorCodes
 }
 
 /// Rewrite original query removing joined tables from it
-bool removeJoin(ASTSelectQuery & select, const IdentifierMembershipCollector & membership_collector)
+bool removeJoin(ASTSelectQuery & select)
 {
     const auto & tables = select.tables();
     if (!tables || tables->children.size() < 2)
@@ -24,22 +23,8 @@ bool removeJoin(ASTSelectQuery & select, const IdentifierMembershipCollector & m
     if (!joined_table.table_join)
         return false;
 
-    /// We need to remove joined columns and related functions (taking in account aliases if any).
-    auto * select_list = select.select()->as<ASTExpressionList>();
-    if (select_list)
-    {
-        ASTs new_children;
-        for (const auto & elem : select_list->children)
-        {
-            auto table_no = membership_collector.getIdentsMembership(elem);
-            if (!table_no.has_value() || *table_no < 1)
-                new_children.push_back(elem);
-        }
-
-        select_list->children = std::move(new_children);
-    }
-
     /// The most simple temporary solution: leave only the first table in query.
+    /// TODO: we also need to remove joined columns and related functions (taking in account aliases if any).
     tables->children.resize(1);
     return true;
 }
@@ -81,8 +66,7 @@ Block getHeaderForProcessingStage(
         case QueryProcessingStage::MAX:
         {
             auto query = query_info.query->clone();
-            auto & select = *query->as<ASTSelectQuery>();
-            removeJoin(select, IdentifierMembershipCollector{select, context});
+            removeJoin(*query->as<ASTSelectQuery>());
 
             auto stream = std::make_shared<OneBlockInputStream>(
                     metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals(), storage.getStorageID()));
diff --git a/src/Interpreters/getHeaderForProcessingStage.h b/src/Interpreters/getHeaderForProcessingStage.h
index fc856ed4b62..ec238edf774 100644
--- a/src/Interpreters/getHeaderForProcessingStage.h
+++ b/src/Interpreters/getHeaderForProcessingStage.h
@@ -13,9 +13,8 @@ using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
 struct SelectQueryInfo;
 class Context;
 class ASTSelectQuery;
-class IdentifierMembershipCollector;
 
-bool removeJoin(ASTSelectQuery & select, const IdentifierMembershipCollector & membership_collector);
+bool removeJoin(ASTSelectQuery & select);
 
 Block getHeaderForProcessingStage(
         const IStorage & storage,
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index e29a99ff4c0..b8aaa52f92c 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -1,34 +1,31 @@
-#include <algorithm>
-#include <ext/range.h>
-
-#include <Columns/ColumnString.h>
-#include <Common/checkStackSize.h>
-#include <Common/typeid_cast.h>
-#include <DataStreams/OneBlockInputStream.h>
 #include <DataStreams/narrowBlockInputStreams.h>
-#include <DataTypes/DataTypeString.h>
-#include <Databases/IDatabase.h>
+#include <DataStreams/OneBlockInputStream.h>
+#include <Storages/StorageMerge.h>
+#include <Storages/StorageFactory.h>
+#include <Storages/VirtualColumnUtils.h>
+#include <Storages/AlterCommands.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/IdentifierSemantic.h>
-#include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/TreeRewriter.h>
+#include <Interpreters/ExpressionActions.h>
 #include <Interpreters/evaluateConstantExpression.h>
+#include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/getHeaderForProcessingStage.h>
-#include <Parsers/ASTExpressionList.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTSelectQuery.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTExpressionList.h>
+#include <DataTypes/DataTypeString.h>
+#include <Columns/ColumnString.h>
+#include <Common/typeid_cast.h>
+#include <Common/checkStackSize.h>
+#include <Databases/IDatabase.h>
+#include <ext/range.h>
+#include <algorithm>
 #include <Parsers/queryToString.h>
+#include <Processors/Transforms/MaterializingTransform.h>
 #include <Processors/ConcatProcessor.h>
 #include <Processors/Transforms/ExpressionTransform.h>
-#include <Processors/Transforms/MaterializingTransform.h>
-#include <Storages/AlterCommands.h>
-#include <Storages/StorageFactory.h>
-#include <Storages/StorageMerge.h>
-#include <Storages/VirtualColumnUtils.h>
 
-#include <common/logger_useful.h>
 
 namespace DB
 {
@@ -46,12 +43,9 @@ namespace ErrorCodes
 namespace
 {
 
-TreeRewriterResult modifySelect(ASTSelectQuery & select, const TreeRewriterResult & rewriter_result, const Context & context)
+void modifySelect(ASTSelectQuery & select, const TreeRewriterResult & rewriter_result)
 {
-    IdentifierMembershipCollector membership_collector{select, context};
-
-    TreeRewriterResult new_rewriter_result = rewriter_result;
-    if (removeJoin(select, membership_collector))
+    if (removeJoin(select))
     {
         /// Also remove GROUP BY cause ExpressionAnalyzer would check if it has all aggregate columns but joined columns would be missed.
         select.setExpression(ASTSelectQuery::Expression::GROUP_BY, {});
@@ -68,17 +62,7 @@ TreeRewriterResult modifySelect(ASTSelectQuery & select, const TreeRewriterResul
         select.setExpression(ASTSelectQuery::Expression::PREWHERE, {});
         select.setExpression(ASTSelectQuery::Expression::HAVING, {});
         select.setExpression(ASTSelectQuery::Expression::ORDER_BY, {});
-
-        new_rewriter_result.aggregates.clear();
-        for (const auto & agg : rewriter_result.aggregates)
-        {
-            auto table_no = membership_collector.getIdentsMembership(std::make_shared<ASTFunction>(*agg));
-            if (!table_no.has_value() || *table_no < 1)
-                new_rewriter_result.aggregates.push_back(agg);
-        }
     }
-
-    return new_rewriter_result;
 }
 
 }
@@ -175,7 +159,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context &
     /// (see modifySelect()/removeJoin())
     ///
     /// And for this we need to return FetchColumns.
-    if (removeJoin(modified_select, IdentifierMembershipCollector{modified_select, context}))
+    if (removeJoin(modified_select))
         return QueryProcessingStage::FetchColumns;
 
     auto stage_in_source_tables = QueryProcessingStage::FetchColumns;
@@ -319,9 +303,8 @@ Pipe StorageMerge::createSources(
     modified_query_info.query = query_info.query->clone();
 
     /// Original query could contain JOIN but we need only the first joined table and its columns.
-    auto & modified_select = modified_query_info.query->as<ASTSelectQuery &>();\
-    auto new_analyzer_res = modifySelect(modified_select, *query_info.syntax_analyzer_result, *modified_context);
-    modified_query_info.syntax_analyzer_result = std::make_shared<TreeRewriterResult>(std::move(new_analyzer_res));
+    auto & modified_select = modified_query_info.query->as<ASTSelectQuery &>();
+    modifySelect(modified_select, *query_info.syntax_analyzer_result);
 
     VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", table_name);
 
diff --git a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference
index f0dd7a31380..b2c3ea56b7f 100644
--- a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference
+++ b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference
@@ -5,4 +5,3 @@
 1
 0	1
 0	1
-1
diff --git a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql
index 9b8395b5565..a6678ca9040 100644
--- a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql
+++ b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql
@@ -17,11 +17,6 @@ SELECT ID FROM m INNER JOIN b USING(key) GROUP BY ID;
 SELECT * FROM m INNER JOIN b USING(key) WHERE ID = 1 HAVING ID = 1 ORDER BY ID;
 SELECT * FROM m INNER JOIN b USING(key) WHERE ID = 1 GROUP BY ID, key HAVING ID = 1 ORDER BY ID;
 
-SELECT sum(b.ID) FROM m FULL JOIN b ON (m.key == b.key) GROUP BY key;
-
--- still not working because columns from different table under aggregation
-SELECT sum(b.ID + m.key) FROM m FULL JOIN b ON (m.key == b.key) GROUP BY key; -- { serverError 47 }
-
 DROP TABLE IF EXISTS a;
 DROP TABLE IF EXISTS b;
 DROP TABLE IF EXISTS m;

From 3f464595ebcb87cb67886396c3a2470dc56897e6 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Thu, 1 Apr 2021 14:21:36 +0300
Subject: [PATCH 028/152] Accurate removing 'join' part for queries to 'Merge'
 engine

---
 src/Interpreters/CrossToInnerJoinVisitor.cpp  | 34 +---------
 src/Interpreters/IdentifierSemantic.cpp       | 64 +++++++++++++++++++
 src/Interpreters/IdentifierSemantic.h         | 45 ++++++++++++-
 src/Interpreters/TreeRewriter.cpp             |  2 +-
 .../getHeaderForProcessingStage.cpp           | 22 ++++---
 .../getHeaderForProcessingStage.h             |  1 +
 src/Storages/StorageMerge.cpp                 | 56 +++++++++++++---
 ...83_merge_table_join_and_group_by.reference |  2 +
 .../01483_merge_table_join_and_group_by.sql   |  3 +
 9 files changed, 178 insertions(+), 51 deletions(-)

diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp
index 2fcd75b1f23..c8195706f04 100644
--- a/src/Interpreters/CrossToInnerJoinVisitor.cpp
+++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp
@@ -131,36 +131,6 @@ std::vector<ASTPtr> collectConjunctions(const ASTPtr & node)
     return members;
 }
 
-std::optional<size_t> getIdentMembership(const ASTIdentifier & ident, const std::vector<TableWithColumnNamesAndTypes> & tables)
-{
-    std::optional<size_t> table_pos = IdentifierSemantic::getMembership(ident);
-    if (table_pos)
-        return table_pos;
-    return IdentifierSemantic::chooseTableColumnMatch(ident, tables, true);
-}
-
-std::optional<size_t> getIdentsMembership(const ASTPtr ast,
-                                          const std::vector<TableWithColumnNamesAndTypes> & tables,
-                                          const Aliases & aliases)
-{
-    auto idents = IdentifiersCollector::collect(ast);
-
-    std::optional<size_t> result;
-    for (const auto * ident : idents)
-    {
-        /// Moving expressions that use column aliases is not supported.
-        if (ident->isShort() && aliases.count(ident->shortName()))
-            return {};
-        const auto pos = getIdentMembership(*ident, tables);
-        if (!pos)
-            return {};
-        if (result && *pos != *result)
-            return {};
-        result = pos;
-    }
-    return result;
-}
-
 bool isAllowedToRewriteCrossJoin(const ASTPtr & node, const Aliases & aliases)
 {
     if (node->as<ASTFunction>())
@@ -193,8 +163,8 @@ std::map<size_t, std::vector<ASTPtr>> moveExpressionToJoinOn(
 
             /// Check if the identifiers are from different joined tables.
             /// If it's a self joint, tables should have aliases.
-            auto left_table_pos = getIdentsMembership(func->arguments->children[0], tables, aliases);
-            auto right_table_pos = getIdentsMembership(func->arguments->children[1], tables, aliases);
+            auto left_table_pos = IdentifierSemantic::getIdentsMembership(func->arguments->children[0], tables, aliases);
+            auto right_table_pos = IdentifierSemantic::getIdentsMembership(func->arguments->children[1], tables, aliases);
 
             /// Identifiers from different table move to JOIN ON
             if (left_table_pos && right_table_pos && *left_table_pos != *right_table_pos)
diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp
index a1fc533eb7f..ad5598afb5b 100644
--- a/src/Interpreters/IdentifierSemantic.cpp
+++ b/src/Interpreters/IdentifierSemantic.cpp
@@ -249,4 +249,68 @@ void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const Dat
     }
 }
 
+std::optional<size_t> IdentifierSemantic::getIdentMembership(const ASTIdentifier & ident, const std::vector<TableWithColumnNamesAndTypes> & tables)
+{
+    std::optional<size_t> table_pos = IdentifierSemantic::getMembership(ident);
+    if (table_pos)
+        return table_pos;
+    return IdentifierSemantic::chooseTableColumnMatch(ident, tables, true);
+}
+
+std::optional<size_t>
+IdentifierSemantic::getIdentsMembership(ASTPtr ast, const std::vector<TableWithColumnNamesAndTypes> & tables, const Aliases & aliases)
+{
+    auto idents = IdentifiersCollector::collect(ast);
+
+    std::optional<size_t> result;
+    for (const auto * ident : idents)
+    {
+        /// short name clashes with alias, ambiguous
+        if (ident->isShort() && aliases.count(ident->shortName()))
+            return {};
+        const auto pos = getIdentMembership(*ident, tables);
+        if (!pos)
+            return {};
+        /// identifiers from different tables
+        if (result && *pos != *result)
+            return {};
+        result = pos;
+    }
+    return result;
+}
+
+IdentifiersCollector::ASTIdentifiers IdentifiersCollector::collect(const ASTPtr & node)
+{
+    IdentifiersCollector::Data ident_data;
+    ConstInDepthNodeVisitor<IdentifiersCollector, true> ident_visitor(ident_data);
+    ident_visitor.visit(node);
+    return ident_data.idents;
+}
+
+bool IdentifiersCollector::needChildVisit(const ASTPtr &, const ASTPtr &)
+{
+    return true;
+}
+
+void IdentifiersCollector::visit(const ASTPtr & node, IdentifiersCollector::Data & data)
+{
+    if (const auto * ident = node->as<ASTIdentifier>())
+        data.idents.push_back(ident);
+}
+
+
+IdentifierMembershipCollector::IdentifierMembershipCollector(const ASTSelectQuery & select, const Context & context)
+{
+    if (ASTPtr with = select.with())
+        QueryAliasesNoSubqueriesVisitor(aliases).visit(with);
+    QueryAliasesNoSubqueriesVisitor(aliases).visit(select.select());
+
+    tables = getDatabaseAndTablesWithColumns(getTableExpressions(select), context);
+}
+
+std::optional<size_t> IdentifierMembershipCollector::getIdentsMembership(ASTPtr ast) const
+{
+    return IdentifierSemantic::getIdentsMembership(ast, tables, aliases);
+}
+
 }
diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h
index 80b55ba0537..3a99150b792 100644
--- a/src/Interpreters/IdentifierSemantic.h
+++ b/src/Interpreters/IdentifierSemantic.h
@@ -2,8 +2,15 @@
 
 #include <optional>
 
-#include <Parsers/ASTIdentifier.h>
+#include <Interpreters/Aliases.h>
 #include <Interpreters/DatabaseAndTableWithAlias.h>
+#include <Interpreters/InDepthNodeVisitor.h>
+#include <Interpreters/QueryAliasesVisitor.h>
+#include <Interpreters/getHeaderForProcessingStage.h>
+#include <Interpreters/getTableExpressions.h>
+
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTSelectQuery.h>
 
 namespace DB
 {
@@ -59,9 +66,45 @@ struct IdentifierSemantic
     static std::optional<size_t> chooseTableColumnMatch(const ASTIdentifier &, const TablesWithColumns & tables,
                             bool allow_ambiguous = false);
 
+    static std::optional<size_t> getIdentMembership(const ASTIdentifier & ident, const std::vector<TableWithColumnNamesAndTypes> & tables);
+
+    /// Collect common table membership for identifiers in expression
+    /// If membership cannot be established or there are several identifies from different tables, return empty optional
+    static std::optional<size_t>
+    getIdentsMembership(ASTPtr ast, const std::vector<TableWithColumnNamesAndTypes> & tables, const Aliases & aliases);
+
 private:
     static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table);
     static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table);
 };
 
+
+/// Collect all identifies from AST recursively
+class IdentifiersCollector
+{
+public:
+    using ASTIdentPtr = const ASTIdentifier *;
+    using ASTIdentifiers = std::vector<ASTIdentPtr>;
+    struct Data
+    {
+        ASTIdentifiers idents;
+    };
+
+    static void visit(const ASTPtr & node, Data & data);
+    static bool needChildVisit(const ASTPtr &, const ASTPtr &);
+    static ASTIdentifiers collect(const ASTPtr & node);
+};
+
+/// Collect identifier table membership considering aliases
+class IdentifierMembershipCollector
+{
+public:
+    IdentifierMembershipCollector(const ASTSelectQuery & select, const Context & context);
+    std::optional<size_t> getIdentsMembership(ASTPtr ast) const;
+
+private:
+    std::vector<TableWithColumnNamesAndTypes> tables;
+    Aliases aliases;
+};
+
 }
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index f88fd16045a..73d7d3d004c 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -408,7 +408,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
                           const TablesWithColumns & tables, const Aliases & aliases, ASTPtr & new_where_conditions)
 {
     const ASTTablesInSelectQueryElement * node = select_query.join();
-    if (!node)
+    if (!node || tables.size() < 2)
         return;
 
     auto & table_join = node->table_join->as<ASTTableJoin &>();
diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp
index b56b90cdf3f..4de636007e0 100644
--- a/src/Interpreters/getHeaderForProcessingStage.cpp
+++ b/src/Interpreters/getHeaderForProcessingStage.cpp
@@ -12,21 +12,27 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-/// Rewrite original query removing joined tables from it
-bool removeJoin(ASTSelectQuery & select)
+bool hasJoin(const ASTSelectQuery & select)
 {
     const auto & tables = select.tables();
     if (!tables || tables->children.size() < 2)
         return false;
 
     const auto & joined_table = tables->children[1]->as<ASTTablesInSelectQueryElement &>();
-    if (!joined_table.table_join)
-        return false;
+    return joined_table.table_join != nullptr;
+}
 
-    /// The most simple temporary solution: leave only the first table in query.
-    /// TODO: we also need to remove joined columns and related functions (taking in account aliases if any).
-    tables->children.resize(1);
-    return true;
+/// Rewrite original query removing joined tables from it
+bool removeJoin(ASTSelectQuery & select)
+{
+    if (hasJoin(select))
+    {
+        /// The most simple temporary solution: leave only the first table in query.
+        /// TODO: we also need to remove joined columns and related functions (taking in account aliases if any).
+        select.tables()->children.resize(1);
+        return true;
+    }
+    return false;
 }
 
 Block getHeaderForProcessingStage(
diff --git a/src/Interpreters/getHeaderForProcessingStage.h b/src/Interpreters/getHeaderForProcessingStage.h
index ec238edf774..e06d22f6b76 100644
--- a/src/Interpreters/getHeaderForProcessingStage.h
+++ b/src/Interpreters/getHeaderForProcessingStage.h
@@ -14,6 +14,7 @@ struct SelectQueryInfo;
 class Context;
 class ASTSelectQuery;
 
+bool hasJoin(const ASTSelectQuery & select);
 bool removeJoin(ASTSelectQuery & select);
 
 Block getHeaderForProcessingStage(
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index b8aaa52f92c..df176bd3bcf 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -9,6 +9,7 @@
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/InterpreterSelectQuery.h>
+#include <Interpreters/IdentifierSemantic.h>
 #include <Interpreters/getHeaderForProcessingStage.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTLiteral.h>
@@ -43,12 +44,15 @@ namespace ErrorCodes
 namespace
 {
 
-void modifySelect(ASTSelectQuery & select, const TreeRewriterResult & rewriter_result)
+TreeRewriterResult modifySelect(ASTSelectQuery & select, const TreeRewriterResult & rewriter_result, const Context & context)
 {
+
+    TreeRewriterResult new_rewriter_result = rewriter_result;
     if (removeJoin(select))
     {
         /// Also remove GROUP BY cause ExpressionAnalyzer would check if it has all aggregate columns but joined columns would be missed.
         select.setExpression(ASTSelectQuery::Expression::GROUP_BY, {});
+        new_rewriter_result.aggregates.clear();
 
         /// Replace select list to remove joined columns
         auto select_list = std::make_shared<ASTExpressionList>();
@@ -57,12 +61,48 @@ void modifySelect(ASTSelectQuery & select, const TreeRewriterResult & rewriter_r
 
         select.setExpression(ASTSelectQuery::Expression::SELECT, select_list);
 
-        /// TODO: keep WHERE/PREWHERE. We have to remove joined columns and their expressions but keep others.
-        select.setExpression(ASTSelectQuery::Expression::WHERE, {});
-        select.setExpression(ASTSelectQuery::Expression::PREWHERE, {});
+        const DB::IdentifierMembershipCollector membership_collector{select, context};
+
+        /// Remove unknown identifiers from where, leave only ones from left table
+        auto replace_where = [&membership_collector](ASTSelectQuery & query, ASTSelectQuery::Expression expr)
+        {
+            auto where = query.getExpression(expr, false);
+            if (!where)
+                return;
+
+            const size_t left_table_pos = 0;
+            if (const auto * conjunctions = where->as<ASTFunction>(); conjunctions && conjunctions->name == "and")
+            {
+                /// Test each argument of `and` function and select related to only left table
+                std::shared_ptr<ASTFunction> new_conj = makeASTFunction("and");
+                for (const auto & node : conjunctions->arguments->children)
+                {
+                    if (membership_collector.getIdentsMembership(node) == left_table_pos)
+                        new_conj->arguments->children.push_back(std::move(node));
+                }
+                if (new_conj->arguments->children.empty())
+                    /// No identifiers from left table
+                    query.setExpression(expr, {});
+                else if (new_conj->arguments->children.size() == 1)
+                    /// Only one expression, lift from `and`
+                    query.setExpression(expr, std::move(new_conj->arguments->children[0]));
+                else
+                    /// Set new expression
+                    query.setExpression(expr, std::move(new_conj));
+            }
+            else
+            {
+                /// Remove whole expression if not match to left table
+                if (membership_collector.getIdentsMembership(where) != left_table_pos)
+                    query.setExpression(expr, {});
+            }
+        };
+        replace_where(select,ASTSelectQuery::Expression::WHERE);
+        replace_where(select,ASTSelectQuery::Expression::PREWHERE);
         select.setExpression(ASTSelectQuery::Expression::HAVING, {});
         select.setExpression(ASTSelectQuery::Expression::ORDER_BY, {});
     }
+    return new_rewriter_result;
 }
 
 }
@@ -150,8 +190,6 @@ bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, cons
 
 QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const
 {
-    ASTPtr modified_query = query_info.query->clone();
-    auto & modified_select = modified_query->as<ASTSelectQuery &>();
     /// In case of JOIN the first stage (which includes JOIN)
     /// should be done on the initiator always.
     ///
@@ -159,7 +197,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context &
     /// (see modifySelect()/removeJoin())
     ///
     /// And for this we need to return FetchColumns.
-    if (removeJoin(modified_select))
+    if (const auto * select = query_info.query->as<ASTSelectQuery>(); select && hasJoin(*select))
         return QueryProcessingStage::FetchColumns;
 
     auto stage_in_source_tables = QueryProcessingStage::FetchColumns;
@@ -304,7 +342,8 @@ Pipe StorageMerge::createSources(
 
     /// Original query could contain JOIN but we need only the first joined table and its columns.
     auto & modified_select = modified_query_info.query->as<ASTSelectQuery &>();
-    modifySelect(modified_select, *query_info.syntax_analyzer_result);
+    auto new_analyzer_res = modifySelect(modified_select, *query_info.syntax_analyzer_result, *modified_context);
+    modified_query_info.syntax_analyzer_result = std::make_shared<TreeRewriterResult>(std::move(new_analyzer_res));
 
     VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", table_name);
 
@@ -328,7 +367,6 @@ Pipe StorageMerge::createSources(
         if (real_column_names.empty())
             real_column_names.push_back(ExpressionActions::getSmallestColumn(metadata_snapshot->getColumns().getAllPhysical()));
 
-
         pipe = storage->read(real_column_names, metadata_snapshot, modified_query_info, *modified_context, processed_stage, max_block_size, UInt32(streams_num));
     }
     else if (processed_stage > storage_stage)
diff --git a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference
index b2c3ea56b7f..4261ccd8a1f 100644
--- a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference
+++ b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference
@@ -5,3 +5,5 @@
 1
 0	1
 0	1
+1	0
+1
diff --git a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql
index a6678ca9040..68b4e7d4015 100644
--- a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql
+++ b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql
@@ -17,6 +17,9 @@ SELECT ID FROM m INNER JOIN b USING(key) GROUP BY ID;
 SELECT * FROM m INNER JOIN b USING(key) WHERE ID = 1 HAVING ID = 1 ORDER BY ID;
 SELECT * FROM m INNER JOIN b USING(key) WHERE ID = 1 GROUP BY ID, key HAVING ID = 1 ORDER BY ID;
 
+SELECT sum(b.ID), sum(m.key) FROM m FULL JOIN b ON (m.key == b.key) GROUP BY key;
+SELECT sum(b.ID + m.key) FROM m FULL JOIN b ON (m.key == b.key) GROUP BY key;
+
 DROP TABLE IF EXISTS a;
 DROP TABLE IF EXISTS b;
 DROP TABLE IF EXISTS m;

From 1c5c1946df26a6a6a139ac09c018557034604e3f Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Thu, 1 Apr 2021 14:46:05 +0300
Subject: [PATCH 029/152] Add test merge_engine_join_key_condition

---
 ..._merge_engine_join_key_condition.reference |  4 ++++
 .../01783_merge_engine_join_key_condition.sql | 22 +++++++++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 tests/queries/0_stateless/01783_merge_engine_join_key_condition.reference
 create mode 100644 tests/queries/0_stateless/01783_merge_engine_join_key_condition.sql

diff --git a/tests/queries/0_stateless/01783_merge_engine_join_key_condition.reference b/tests/queries/0_stateless/01783_merge_engine_join_key_condition.reference
new file mode 100644
index 00000000000..9f7c2e7ee16
--- /dev/null
+++ b/tests/queries/0_stateless/01783_merge_engine_join_key_condition.reference
@@ -0,0 +1,4 @@
+3	3
+1	4
+1	4
+1	4
diff --git a/tests/queries/0_stateless/01783_merge_engine_join_key_condition.sql b/tests/queries/0_stateless/01783_merge_engine_join_key_condition.sql
new file mode 100644
index 00000000000..97a5f2f0ef7
--- /dev/null
+++ b/tests/queries/0_stateless/01783_merge_engine_join_key_condition.sql
@@ -0,0 +1,22 @@
+DROP TABLE IF EXISTS foo;
+DROP TABLE IF EXISTS foo_merge;
+DROP TABLE IF EXISTS t2;
+
+CREATE TABLE foo(Id Int32, Val Int32) Engine=MergeTree PARTITION BY Val ORDER BY Id;
+INSERT INTO foo SELECT number, number%5 FROM numbers(100000);
+
+CREATE TABLE foo_merge as foo ENGINE=Merge(currentDatabase(), '^foo');
+
+CREATE TABLE t2 (Id Int32, Val Int32, X Int32) Engine=Memory;
+INSERT INTO t2 values (4, 3, 4);
+
+SET force_primary_key = 1;
+
+SELECT * FROM foo_merge WHERE Val = 3 AND Id = 3;
+SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND Id = 3 AND t2.X == 4 GROUP BY X;
+SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND Id = 3 GROUP BY X;
+SELECT count(), X FROM (SELECT * FROM foo_merge) f JOIN t2 USING Val WHERE Val = 3 AND Id = 3 GROUP BY X;
+
+DROP TABLE IF EXISTS foo;
+DROP TABLE IF EXISTS foo_merge;
+DROP TABLE IF EXISTS t2;

From 40dac7b678bde9fc7978a81b12ea762361cd2b9f Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 1 Apr 2021 15:12:30 +0300
Subject: [PATCH 030/152] more debug

---
 debian/rules                      |  2 +-
 docker/packager/binary/Dockerfile |  2 ++
 docker/packager/binary/build.sh   | 19 +++++++++----------
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/debian/rules b/debian/rules
index 8eb47e95389..b30dc2ec71b 100755
--- a/debian/rules
+++ b/debian/rules
@@ -62,7 +62,7 @@ ifndef DISABLE_NINJA
     NINJA=$(shell which ninja)
 ifneq ($(NINJA),)
         CMAKE_FLAGS += -GNinja
-        export MAKE=$(NINJA) $(NINJA_FLAGS)
+        export MAKE=$(NINJA) $(NINJA_FLAGS) --verbose
 endif
 endif
 
diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 97ef70aeaa9..6948aeb3b18 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -63,6 +63,8 @@ RUN apt-get update \
         ninja-build \
         ocl-icd-libopencl1 \
         opencl-headers \
+        pigz \
+        pixz \
         rename \
         tzdata \
         --yes --no-install-recommends
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 108211d0f01..b995adfb506 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -11,8 +11,9 @@ tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolc
 mkdir -p build/cmake/toolchain/freebsd-x86_64
 tar xJf freebsd-11.3-toolchain.tar.xz -C build/cmake/toolchain/freebsd-x86_64 --strip-components=1
 
-# Uncomment to debug ccache
-export CCACHE_LOGFILE=/output/ccache.log
+# Uncomment to debug ccache. Don't put ccache log in /output right away, or it
+# will be confusingly packed into the "performance" package.
+export CCACHE_LOGFILE=/build/ccache.log
 export CCACHE_DEBUG=1
 
 mkdir -p build/build_docker
@@ -26,7 +27,7 @@ read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
 cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
 
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
-ninja $NINJA_FLAGS clickhouse-bundle
+ninja $NINJA_FLAGS --verbose clickhouse-bundle
 
 ccache --show-stats ||:
 
@@ -73,23 +74,21 @@ then
     cp ../programs/server/config.xml /output/config
     cp ../programs/server/users.xml /output/config
     cp -r --dereference ../programs/server/config.d /output/config
-    tar -czvf "$COMBINED_OUTPUT.tgz" /output
+    tar -czvf -I pixz "$COMBINED_OUTPUT.tgz" /output
     rm -r /output/*
     mv "$COMBINED_OUTPUT.tgz" /output
 fi
 
 if [ "${CCACHE_DEBUG:-}" == "1" ]
 then
-    mkdir /output/ccache
-    find . -name '*.ccache-*' -print -exec mv '{}' /output/ccache \;
-    tar -czvf "/output/ccache.tgz" /output/ccache
-    rm -rf /output/ccache
+    find . -name '*.ccache-*' -print0
+        | tar -czf -I pixz /output/ccache-debug.tgz -null -T -
 fi
 
-if ! [ -z "$CCACHE_LOGFILE" ]
+if [ -n "$CCACHE_LOGFILE" ]
 then
     # Compress the log as well, or else the CI will try to compress all log
     # files in place, and will fail because this directory is not writable.
-    gzip "$CCACHE_LOGFILE"
+    tar -czvf -I pixz /output/ccache.log.tgz "$CCACHE_LOGFILE"
 fi
 

From 5f4519908208a0be03c63aea32ce816ca80dd4fb Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 1 Apr 2021 15:46:55 +0300
Subject: [PATCH 031/152] typo

---
 docker/packager/binary/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index b995adfb506..174d38f4c50 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -81,7 +81,7 @@ fi
 
 if [ "${CCACHE_DEBUG:-}" == "1" ]
 then
-    find . -name '*.ccache-*' -print0
+    find . -name '*.ccache-*' -print0 \
         | tar -czf -I pixz /output/ccache-debug.tgz -null -T -
 fi
 

From a255ab179fcae6b6994806a2e95bb40145f7df12 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 1 Apr 2021 17:29:51 +0300
Subject: [PATCH 032/152] typo

---
 docker/packager/binary/build.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 174d38f4c50..784fc600f8e 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -74,7 +74,7 @@ then
     cp ../programs/server/config.xml /output/config
     cp ../programs/server/users.xml /output/config
     cp -r --dereference ../programs/server/config.d /output/config
-    tar -czvf -I pixz "$COMBINED_OUTPUT.tgz" /output
+    tar -czv -I pixz -f "$COMBINED_OUTPUT.tgz" /output
     rm -r /output/*
     mv "$COMBINED_OUTPUT.tgz" /output
 fi
@@ -82,13 +82,13 @@ fi
 if [ "${CCACHE_DEBUG:-}" == "1" ]
 then
     find . -name '*.ccache-*' -print0 \
-        | tar -czf -I pixz /output/ccache-debug.tgz -null -T -
+        | tar -cz -I pixz -f /output/ccache-debug.tgz -null -T -
 fi
 
 if [ -n "$CCACHE_LOGFILE" ]
 then
     # Compress the log as well, or else the CI will try to compress all log
     # files in place, and will fail because this directory is not writable.
-    tar -czvf -I pixz /output/ccache.log.tgz "$CCACHE_LOGFILE"
+    tar -czv -I pixz -f /output/ccache.log.tgz "$CCACHE_LOGFILE"
 fi
 

From 28dd226004cfe4919ceecb8e3c5b7ae9924bc13c Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 1 Apr 2021 20:00:08 +0300
Subject: [PATCH 033/152] remove extra option

---
 docker/packager/binary/build.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 784fc600f8e..495198abf9b 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -74,7 +74,7 @@ then
     cp ../programs/server/config.xml /output/config
     cp ../programs/server/users.xml /output/config
     cp -r --dereference ../programs/server/config.d /output/config
-    tar -czv -I pixz -f "$COMBINED_OUTPUT.tgz" /output
+    tar -cv -I pigz -f "$COMBINED_OUTPUT.tgz" /output
     rm -r /output/*
     mv "$COMBINED_OUTPUT.tgz" /output
 fi
@@ -82,13 +82,13 @@ fi
 if [ "${CCACHE_DEBUG:-}" == "1" ]
 then
     find . -name '*.ccache-*' -print0 \
-        | tar -cz -I pixz -f /output/ccache-debug.tgz -null -T -
+        | tar -c -I pixz -f /output/ccache-debug.txz -null -T -
 fi
 
 if [ -n "$CCACHE_LOGFILE" ]
 then
     # Compress the log as well, or else the CI will try to compress all log
     # files in place, and will fail because this directory is not writable.
-    tar -czv -I pixz -f /output/ccache.log.tgz "$CCACHE_LOGFILE"
+    tar -cv -I pixz -f /output/ccache.log.txz "$CCACHE_LOGFILE"
 fi
 

From 13f3efcc884950e060051b9d66b314bb40bfa989 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 1 Apr 2021 21:02:35 +0300
Subject: [PATCH 034/152] typo

---
 docker/packager/binary/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 495198abf9b..2498700354d 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -82,7 +82,7 @@ fi
 if [ "${CCACHE_DEBUG:-}" == "1" ]
 then
     find . -name '*.ccache-*' -print0 \
-        | tar -c -I pixz -f /output/ccache-debug.txz -null -T -
+        | tar -c -I pixz -f /output/ccache-debug.txz --null -T -
 fi
 
 if [ -n "$CCACHE_LOGFILE" ]

From 4eaf585fe5ddf8abc16bfe540dd438811579fc67 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 1 Apr 2021 22:49:51 +0300
Subject: [PATCH 035/152] Almost nothing added

---
 src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index 7c6f6f6db9a..f4c2a44076e 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -58,10 +58,12 @@ std::unordered_set<std::string> MergeTreeDeduplicationLog::loadSingleLog(const s
 {
     ReadBufferFromFile read_buf(path);
 
+    std::unordered_set<std::string> result;
     while (!read_buf.eof())
     {
         readIntBinary(record_checksum, read_buf);
     }
+    return result;
 }
 
 void MergeTreeDeduplicationLog::rotate()

From a6b6e204b7013759b1a68e368b48c56a21a51950 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 1 Apr 2021 23:49:21 +0300
Subject: [PATCH 036/152] more debug

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e325d535227..b7f21ecd9dd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,7 +85,7 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
     if (SANITIZE STREQUAL "memory" OR COMPILER_GCC)
        set (RLIMIT_DATA 10000000000)
     endif()
-    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=600 ${CMAKE_CXX_COMPILER_LAUNCHER})
+    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --verbose --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=600 ${CMAKE_CXX_COMPILER_LAUNCHER})
 endif ()
 
 if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None")

From eef57a1a46d8e5f61b0bd40dda6b07545050cf72 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 2 Apr 2021 01:20:58 +0300
Subject: [PATCH 037/152] increase rlimit for gcc

---
 CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7f21ecd9dd..4cee827aee3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -83,7 +83,8 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
     set (RLIMIT_AS 10000000000)
     # gcc10/gcc10/clang -fsanitize=memory is too heavy
     if (SANITIZE STREQUAL "memory" OR COMPILER_GCC)
-       set (RLIMIT_DATA 10000000000)
+       set (RLIMIT_AS   20000000000)
+       set (RLIMIT_DATA 20000000000)
     endif()
     set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --verbose --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=600 ${CMAKE_CXX_COMPILER_LAUNCHER})
 endif ()

From b09e15d4305f4ca20a0cadd9ff4d7bd9fd87ded5 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 2 Apr 2021 03:09:14 +0300
Subject: [PATCH 038/152] increase cpu time limit for gcc

---
 CMakeLists.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4cee827aee3..536f7a31aaf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,12 +81,15 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
     set (RLIMIT_DATA 5000000000)
     # set VIRT (RLIMIT_AS) to 10G (DATA*10)
     set (RLIMIT_AS 10000000000)
+    # set CPU time limit to 600 seconds
+    set (RLIMIT_CPU 600)
     # gcc10/gcc10/clang -fsanitize=memory is too heavy
     if (SANITIZE STREQUAL "memory" OR COMPILER_GCC)
        set (RLIMIT_AS   20000000000)
        set (RLIMIT_DATA 20000000000)
+       set (RLIMIT_CPU 1200)
     endif()
-    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --verbose --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=600 ${CMAKE_CXX_COMPILER_LAUNCHER})
+    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --verbose --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=${RLIMIT_CPU} ${CMAKE_CXX_COMPILER_LAUNCHER})
 endif ()
 
 if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None")

From 6f3f4f821a62be853b31260677c6f0ec132b127d Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 2 Apr 2021 14:00:15 +0300
Subject: [PATCH 039/152] disable prlimit altogether for gcc

---
 CMakeLists.txt | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 536f7a31aaf..37822f95e10 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -76,19 +76,14 @@ include (cmake/find/ccache.cmake)
 # Another way would be to use --ccache-skip option before clang++-11 to make
 # ccache ignore it.
 option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile too long or to take too much memory while compiling." OFF)
-if (ENABLE_CHECK_HEAVY_BUILDS)
+# gcc10/gcc10/clang -fsanitize=memory is too heavy
+if (ENABLE_CHECK_HEAVY_BUILDS AND NOT (SANITIZE STREQUAL "memory" OR COMPILER_GCC))
     # set DATA (since RSS does not work since 2.6.x+) to 2G
     set (RLIMIT_DATA 5000000000)
     # set VIRT (RLIMIT_AS) to 10G (DATA*10)
     set (RLIMIT_AS 10000000000)
     # set CPU time limit to 600 seconds
     set (RLIMIT_CPU 600)
-    # gcc10/gcc10/clang -fsanitize=memory is too heavy
-    if (SANITIZE STREQUAL "memory" OR COMPILER_GCC)
-       set (RLIMIT_AS   20000000000)
-       set (RLIMIT_DATA 20000000000)
-       set (RLIMIT_CPU 1200)
-    endif()
     set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --verbose --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=${RLIMIT_CPU} ${CMAKE_CXX_COMPILER_LAUNCHER})
 endif ()
 

From 9ec0a9dd4c5625b577af7951b0f7e741a00fbf6b Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 2 Apr 2021 14:06:36 +0300
Subject: [PATCH 040/152] more debug

---
 docker/packager/deb/build.sh | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh
index 6450e21d289..4fbb3628b73 100755
--- a/docker/packager/deb/build.sh
+++ b/docker/packager/deb/build.sh
@@ -2,8 +2,13 @@
 
 set -x -e
 
+# Uncomment to debug ccache.
+export CCACHE_LOGFILE=/build/ccache.log
+export CCACHE_DEBUG=1
+
 ccache --show-stats ||:
 ccache --zero-stats ||:
+
 read -ra ALIEN_PKGS <<< "${ALIEN_PKGS:-}"
 build/release --no-pbuilder "${ALIEN_PKGS[@]}" | ts '%Y-%m-%d %H:%M:%S'
 mv /*.deb /output
@@ -24,3 +29,16 @@ then
 fi
 ccache --show-stats ||:
 ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
+
+if [ "${CCACHE_DEBUG:-}" == "1" ]
+then
+    find /build -name '*.ccache-*' -print0 \
+        | tar -c -I pixz -f /output/ccache-debug.txz --null -T -
+fi
+
+if [ -n "$CCACHE_LOGFILE" ]
+then
+    # Compress the log as well, or else the CI will try to compress all log
+    # files in place, and will fail because this directory is not writable.
+    tar -cv -I pixz -f /output/ccache.log.txz "$CCACHE_LOGFILE"
+fi

From 14634689e5ac681cc593be3c34f1574176441404 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 2 Apr 2021 14:46:42 +0300
Subject: [PATCH 041/152] Idea code

---
 .../MergeTree/MergeTreeBlockOutputStream.cpp  |  11 ++
 .../MergeTree/MergeTreeDeduplicationLog.cpp   | 177 ++++++++++++++++--
 .../MergeTree/MergeTreeDeduplicationLog.h     | 127 ++++++++++---
 src/Storages/MergeTree/MergeTreeSettings.h    |   1 +
 src/Storages/StorageMergeTree.cpp             |  13 ++
 src/Storages/StorageMergeTree.h               |   6 +
 src/Storages/ya.make                          |   1 +
 7 files changed, 300 insertions(+), 36 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
index bb5644567ae..1d7faaec111 100644
--- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
@@ -35,6 +35,17 @@ void MergeTreeBlockOutputStream::write(const Block & block)
         if (!part)
             continue;
 
+        if (auto deduplication_log = storage.getDeduplicationLog())
+        {
+            String block_id = part->getZeroLevelPartBlockID();
+            auto res = deduplication_log.addPart(block_id, part);
+            if (!res.second)
+            {
+                LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName());
+                continue;
+            }
+        }
+
         storage.renameTempPartAndAdd(part, &storage.increment);
 
         PartLog::addNewPart(storage.global_context, part, watch.elapsed());
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index f4c2a44076e..6fc9f958214 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -4,14 +4,48 @@
 #include <boost/algorithm/string/join.hpp>
 #include <boost/algorithm/string/trim.hpp>
 
-
-
 namespace DB
 {
 
 namespace
 {
 
+enum class MergeTreeDeduplicationOp : uint8_t
+{
+    ADD = 1,
+    DROP = 2,
+};
+
+struct MergeTreeDeduplicationLogRecord
+{
+    MergeTreeDeduplicationOp operation;
+    std::string part_name;
+    std::string block_id;
+};
+
+void writeRecord(const MergeTreeDeduplicationLogRecord & record, WriteBuffer & out)
+{
+    writeIntText(static_cast<uint8_t>(record.operation), out);
+    writeChar(' ', out);
+    writeStringBinary(record.part_name);
+    writeChar(' ', out);
+    writeStringBinary(record.block_id);
+    writeChar('\n', out);
+}
+
+void readRecord(MergeTreeDeduplicationLogRecord & record, ReadBuffer & in)
+{
+    uint8_t op;
+    readIntText(op, in);
+    record.operation = static_cast<MergeTreeDeduplicationOp>(op);
+    assertChar(' ', in);
+    readStringBinary(record.part_name, in);
+    assertChar(' ', in);
+    readStringBinary(record.block_id, in);
+    assertChar('\n', in);
+}
+
+
 std::string getLogPath(const std::string & prefix, size_t number)
 {
     std::filesystem::path path(prefix);
@@ -34,10 +68,12 @@ size_t getLogNumber(const std::string & path_str)
 MergeTreeDeduplicationLog::MergeTreeDeduplicationLog(
     const std::string & logs_dir_,
     size_t deduplication_window_,
-    size_t rotate_interval_)
+    const MergeTreeDataFormatVersion & format_version_)
     : logs_dir(logs_dir_)
     , deduplication_window(deduplication_window_)
-    , rotate_interval(rotate_interval_)
+    , rotate_interval(deduplication_window_ * 2) /// actually it doesn't matter
+    , format_version(format_version_)
+    , deduplication_map(deduplication_window)
 {}
 
 void MergeTreeDeduplicationLog::load()
@@ -50,31 +86,144 @@ void MergeTreeDeduplicationLog::load()
     {
         auto path = p.path();
         auto log_number = getLogNumber(path);
-        existing_logs[log_description.log_number] = {path, 0};
+        existing_logs[log_number] = {path, 0};
     }
+
+    /// Order important
+    for (auto & [log_number, desc] : existing_logs)
+    {
+        desc.entries_count = loadSingleLog(desc.path);
+        current_log_number = log_number;
+    }
+
+    rotateAndDropIfNeeded();
 }
 
-std::unordered_set<std::string> MergeTreeDeduplicationLog::loadSingleLog(const std::string & path)
+size_t MergeTreeDeduplicationLog::loadSingleLog(const std::string & path)
 {
     ReadBufferFromFile read_buf(path);
 
-    std::unordered_set<std::string> result;
+    size_t total_entries = 0;
     while (!read_buf.eof())
     {
-        readIntBinary(record_checksum, read_buf);
+        MergeTreeDeduplicationLogRecord record;
+        readRecord(record, read_buf);
+        if (record.operation == MergeTreeDeduplicationOp::DROP)
+            deduplication_map.erase(record.block_id);
+        else
+            deduplication_map.insert(record.block_id, MergeTreePartInfo::fromPartName(record.part_name, format_version));
+        total_entries++;
     }
-    return result;
+    return total_entries;
 }
 
 void MergeTreeDeduplicationLog::rotate()
 {
-    size_t new_log_number = log_counter++;
-    auto new_description = getLogDescription(logs_dir, new_log_number, rotate_interval);
-    existing_logs.emplace(new_log_number, new_description);
-    current_writer->sync();
+    current_log_number++;
+    auto new_path = getLogPath(logs_dir, current_log_number);
+    MergeTreeDeduplicationLogNameDescription log_description{new_path, 0};
+    existing_logs.emplace(current_log_number, log_description);
 
-    current_writer = std::make_unique<ChangelogWriter>(description.path, WriteMode::Append, description.from_log_index);
+    if (current_writer)
+        current_writer->sync();
 
+    current_writer = std::make_unique<WriteBufferFromFile>(log_description.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
+}
+
+void MergeTreeDeduplicationLog::dropOutdatedLogs()
+{
+    size_t current_sum = 0;
+    for (auto itr = existing_logs.rbegin(); itr != existing_logs.rend();)
+    {
+        auto & description = itr->second;
+        if (current_sum > deduplication_window)
+        {
+            std::filesystem::remove(description.path);
+            itr = existing_logs.erase(itr);
+        }
+        else
+        {
+            current_sum += description.entries_count;
+        }
+    }
+}
+
+void MergeTreeDeduplicationLog::rotateAndDropIfNeeded()
+{
+    if (existing_logs.empty() || existing_logs[current_log_number].entries_count > rotate_interval)
+    {
+        rotate();
+        dropOutdatedLogs();
+    }
+}
+
+std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std::string & block_id, const MergeTreeData::MutableDataPartPtr & part)
+{
+    std::lock_guard lock(state_mutex);
+
+    if (deduplication_map.contains(block_id))
+        return std::make_pair(deduplication_map->get(block_id), false);
+
+    assert(current_writer != nullptr);
+    MergeTreeDeduplicationLogRecord record;
+    record.operation = MergeTreeDeduplicationOp::ADD;
+    record.part_name = part->name;
+    record.block_id = block_id;
+    writeRecord(record, *current_writer);
+    existing_logs[current_log_number].entries_count++;
+
+    deduplication_map.insert(record.block_id, part->info);
+
+    return std::make_pair(part->info, true);
+}
+
+std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const MergeTreeData::MutableDataPartPtr & part)
+{
+    return addPart(part->getZeroLevelPartBlockID(), part);
+}
+
+void MergeTreeDeduplicationLog::dropPart(const MergeTreeData::DataPartPtr & part)
+{
+    std::lock_guard lock(state_mutex);
+
+    assert(current_writer != nullptr);
+
+    for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); ++itr)
+    {
+        const auto & part_info = itr->second;
+        if (part.contains(part_info))
+        {
+            record.operation = MergeTreeDeduplicationOp::DROP;
+            record.part_name = part_info.getPartName();
+            record.block_id = itr->first;
+            writeRecord(record, *current_writer);
+            existing_logs[current_log_number].entries_count++;
+            deduplication_map.erase(itr->first);
+            rotateAndDropIfNeeded();
+        }
+    }
+}
+
+void MergeTreeDeduplicationLog::dropPartition(const std::string & partition_id)
+{
+    std::lock_guard lock(state_mutex);
+    assert(current_writer != nullptr);
+
+    for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); ++itr)
+    {
+        const auto & part_info = itr->second;
+        if (part_info.partition_id == partition_id)
+        {
+            record.operation = MergeTreeDeduplicationOp::DROP;
+            record.part_name = part_info.getPartName();
+            record.block_id = itr->first;
+            writeRecord(record, *current_writer);
+            deduplication_map.erase(itr->first);
+            existing_logs[current_log_number].entries_count++;
+
+            rotateAndDropIfNeeded();
+        }
+    }
 }
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
index 140e1c80be7..9ff7763a7fe 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
@@ -2,56 +2,139 @@
 #include <Core/Types.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Storages/MergeTree/MergeTreeData.h>
+#include <Storages/MergeTree/MergeTreePartInfo.h>
 
 namespace DB
 {
 
-enum class MergeTreeDeduplicationOp : uint8_t
-{
-    ADD = 1,
-    DROP = 2,
-};
-
-struct MergeTreeDeduplicationLogRecord
-{
-    MergeTreeDeduplicationOp operation;
-    std::string part_name;
-    std::string block_id;
-};
-
 struct MergeTreeDeduplicationLogNameDescription
 {
     std::string path;
     size_t entries_count;
 };
 
+
+template <typename V>
+class LimitedOrderedHashMap
+{
+private:
+    struct ListNode
+    {
+        std::string key;
+        V value;
+    };
+    using Queue = std::list<ListNode>;
+    using IndexMap = std::unordered_map<StringRef, typename List::iterator, StringRefHash>;
+
+    Queue queue;
+    IndexMap map;
+    const size_t max_size;
+public:
+    using iterator = typename List::iterator;
+    using const_iterator = typename List::const_iterator;
+    using reverse_iterator = typename List::reverse_iterator;
+    using const_reverse_iterator = typename List::const_reverse_iterator;
+
+    explicit LimitedOrderedHashMap(size_t max_size_)
+        : max_size(max_size_)
+    {}
+
+    bool contains(const std::string & key, const V & value) const
+    {
+        return map.find(key) != map.end();
+    }
+
+    V get(const std::string & key) const
+    {
+        return map.at(key)->value;
+    }
+
+    size_t size() const
+    {
+        return queue.size();
+    }
+
+    bool erase(const std::string & key)
+    {
+        auto it = map.find(key);
+        if (it == map.end())
+            return false;
+
+        auto queue_itr = it->second;
+        map.erase(it);
+        queue.erase(queue_itr);
+
+        return true;
+    }
+
+
+    bool insert(const std::string & key, const V & value)
+    {
+        auto it = map.find(key);
+        if (it != map.end())
+            return false;
+
+        if (size() == max_size)
+        {
+            map.erase(queue.front().key);
+            queue.pop_front();
+        }
+
+        ListNode elem{key, value};
+        auto itr = queue.insert(queue.end(), elem);
+        map.emplace(itr->key, itr);
+        return true;
+    }
+
+    void clear()
+    {
+        map.clear();
+        queue.clear();
+    }
+
+    iterator begin() { return queue.begin(); }
+    const_iterator begin() const { return queue.cbegin(); }
+    iterator end() { return queue.end(); }
+    const_iterator end() const { return queue.cend(); }
+
+    reverse_iterator rbegin() { return queue.rbegin(); }
+    const_reverse_iterator rbegin() const { return queue.crbegin(); }
+    reverse_iterator rend() { return queue.rend(); }
+    const_reverse_iterator rend() const { return queue.crend(); }
+};
+
 class MergeTreeDeduplicationLog
 {
 public:
     MergeTreeDeduplicationLog(
         const std::string & logs_dir_,
         size_t deduplication_window_,
-        size_t rotate_interval_);
+        const MergeTreeDataFormatVersion & format_version_);
 
-    bool addPart(const MergeTreeData::MutableDataPartPtr & part);
-    void dropPart(const MergeTreeData::MutableDataPartPtr & part);
+    std::pair<MergeTreePartInfo, bool> addPart(const std::string block_id, const MergeTreeData::MutableDataPartPtr & part);
+    std::pair<MergeTreePartInfo, bool> addPart(const MergeTreeData::MutableDataPartPtr & part);
+    void dropPart(const MergeTreeData::DataPartPtr & part);
     void dropPartition(const std::string & partition_id);
 
     void load();
+
 private:
     const std::string logs_dir;
     const size_t deduplication_window;
     const size_t rotate_interval;
-    size_t log_counter = 1;
+    const MergeTreeDataFormatVersion format_version;
+
+    size_t current_log_number = 0;
     std::map<size_t, MergeTreeDeduplicationLogNameDescription> existing_logs;
-
-    std::unordered_set<std::string> deduplication_set;
-
+    LimitedOrderedHashMap<MergeTreePartInfo> deduplication_map;
     std::unique_ptr<WriteBufferFromFile> current_writer;
-    size_t entries_written_in_current_file;
+
+    std::mutex state_mutex;
 
     void rotate();
-    std::unordered_set<std::string> loadSingleLog(const std::string & path) const;
+    void dropOutdatedLogs();
+    void rotateAndDropIfNeeded();
+    size_t loadSingleLog(const std::string & path) const;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 7a1ef8aeed6..06d909eb912 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -54,6 +54,7 @@ struct Settings;
     M(UInt64, write_ahead_log_bytes_to_fsync, 100ULL * 1024 * 1024, "Amount of bytes, accumulated in WAL to do fsync.", 0) \
     M(UInt64, write_ahead_log_interval_ms_to_fsync, 100, "Interval in milliseconds after which fsync for WAL is being done.", 0) \
     M(Bool, in_memory_parts_insert_sync, false, "If true insert of part with in-memory format will wait for fsync of WAL", 0) \
+    M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \
     \
     /** Inserts settings. */ \
     M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 10790057ac9..f8491a6b8e9 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -93,6 +93,13 @@ StorageMergeTree::StorageMergeTree(
     increment.set(getMaxBlockNumber());
 
     loadMutations();
+
+    if (storage_settings->non_replicated_deduplication_window != 0)
+    {
+        std::string path = getDataPaths()[0] + "/deduplication_logs";
+        deduplication_log.emplace(path, storage_settings->non_replicated_deduplication_window, format_version);
+        deduplication_log->load();
+    }
 }
 
 
@@ -1209,6 +1216,12 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, bool
             }
         }
 
+        if (deduplication_log)
+        {
+            for (const auto & part : parts_to_remove)
+                deduplication_log->dropPart(part);
+        }
+
         if (detach)
             LOG_INFO(log, "Detached {} parts.", parts_to_remove.size());
         else
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index 246ce151a02..993a798d049 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -12,6 +12,8 @@
 #include <Storages/MergeTree/MergeTreePartsMover.h>
 #include <Storages/MergeTree/MergeTreeMutationEntry.h>
 #include <Storages/MergeTree/MergeTreeMutationStatus.h>
+#include <Storages/MergeTree/MergeTreeDeduplicationLog.h>
+
 #include <Disks/StoragePolicy.h>
 #include <Common/SimpleIncrement.h>
 #include <Storages/MergeTree/BackgroundJobsExecutor.h>
@@ -93,6 +95,8 @@ public:
     CheckResults checkData(const ASTPtr & query, const Context & context) override;
 
     std::optional<JobAndPool> getDataProcessingJob() override;
+
+    std::optional<MergeTreeDeduplicationLog> getDeduplicationLog() const { return deduplication_log; }
 private:
 
     /// Mutex and condvar for synchronous mutations wait
@@ -105,6 +109,8 @@ private:
     BackgroundJobsExecutor background_executor;
     BackgroundMovesExecutor background_moves_executor;
 
+    std::optional<MergeTreeDeduplicationLog> deduplication_log;
+
     /// For block numbers.
     SimpleIncrement increment;
 
diff --git a/src/Storages/ya.make b/src/Storages/ya.make
index e3e1807c566..2afdbe8c749 100644
--- a/src/Storages/ya.make
+++ b/src/Storages/ya.make
@@ -57,6 +57,7 @@ SRCS(
     MergeTree/MergeTreeDataPartWriterWide.cpp
     MergeTree/MergeTreeDataSelectExecutor.cpp
     MergeTree/MergeTreeDataWriter.cpp
+    MergeTree/MergeTreeDeduplicationLog.cpp
     MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp
     MergeTree/MergeTreeIndexBloomFilter.cpp
     MergeTree/MergeTreeIndexConditionBloomFilter.cpp

From 2ab87d61ebf14b6bf13296ea88858b6f3ecfe26e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 2 Apr 2021 15:37:42 +0300
Subject: [PATCH 042/152] Some build fixes

---
 .../MergeTree/MergeTreeBlockOutputStream.cpp  |  4 +--
 .../MergeTree/MergeTreeDeduplicationLog.cpp   | 25 +++++++++++++------
 .../MergeTree/MergeTreeDeduplicationLog.h     | 14 +++++------
 src/Storages/StorageMergeTree.cpp             |  5 ++--
 src/Storages/StorageMergeTree.h               |  2 +-
 5 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
index 1d7faaec111..c887f6731af 100644
--- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
@@ -35,10 +35,10 @@ void MergeTreeBlockOutputStream::write(const Block & block)
         if (!part)
             continue;
 
-        if (auto deduplication_log = storage.getDeduplicationLog())
+        if (auto & deduplication_log = storage.getDeduplicationLog())
         {
             String block_id = part->getZeroLevelPartBlockID();
-            auto res = deduplication_log.addPart(block_id, part);
+            auto res = deduplication_log->addPart(block_id, part);
             if (!res.second)
             {
                 LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName());
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index 6fc9f958214..98f818f6ff7 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -3,6 +3,8 @@
 #include <boost/algorithm/string/split.hpp>
 #include <boost/algorithm/string/join.hpp>
 #include <boost/algorithm/string/trim.hpp>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
 
 namespace DB
 {
@@ -27,9 +29,9 @@ void writeRecord(const MergeTreeDeduplicationLogRecord & record, WriteBuffer & o
 {
     writeIntText(static_cast<uint8_t>(record.operation), out);
     writeChar(' ', out);
-    writeStringBinary(record.part_name);
+    writeStringBinary(record.part_name, out);
     writeChar(' ', out);
-    writeStringBinary(record.block_id);
+    writeStringBinary(record.block_id, out);
     writeChar('\n', out);
 }
 
@@ -133,19 +135,28 @@ void MergeTreeDeduplicationLog::rotate()
 void MergeTreeDeduplicationLog::dropOutdatedLogs()
 {
     size_t current_sum = 0;
-    for (auto itr = existing_logs.rbegin(); itr != existing_logs.rend();)
+    size_t remove_from_value = 0;
+    for (auto itr = existing_logs.rbegin(); itr != existing_logs.rend(); ++itr)
     {
         auto & description = itr->second;
         if (current_sum > deduplication_window)
         {
-            std::filesystem::remove(description.path);
-            itr = existing_logs.erase(itr);
+            //std::filesystem::remove(description.path);
+            //itr = existing_logs.erase(itr);
+            remove_from_value = itr->first;
+            break;
         }
-        else
+        current_sum += description.entries_count;
+        ++itr;
+    }
+
+    if (remove_from_value != 0)
+    {
+        for (auto itr = existing_logs.begin(); itr != existing_logs.end();)
         {
-            current_sum += description.entries_count;
         }
     }
+
 }
 
 void MergeTreeDeduplicationLog::rotateAndDropIfNeeded()
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
index 9ff7763a7fe..d2d67ae6a56 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
@@ -24,22 +24,22 @@ private:
         V value;
     };
     using Queue = std::list<ListNode>;
-    using IndexMap = std::unordered_map<StringRef, typename List::iterator, StringRefHash>;
+    using IndexMap = std::unordered_map<StringRef, typename Queue::iterator, StringRefHash>;
 
     Queue queue;
     IndexMap map;
     const size_t max_size;
 public:
-    using iterator = typename List::iterator;
-    using const_iterator = typename List::const_iterator;
-    using reverse_iterator = typename List::reverse_iterator;
-    using const_reverse_iterator = typename List::const_reverse_iterator;
+    using iterator = typename Queue::iterator;
+    using const_iterator = typename Queue::const_iterator;
+    using reverse_iterator = typename Queue::reverse_iterator;
+    using const_reverse_iterator = typename Queue::const_reverse_iterator;
 
     explicit LimitedOrderedHashMap(size_t max_size_)
         : max_size(max_size_)
     {}
 
-    bool contains(const std::string & key, const V & value) const
+    bool contains(const std::string & key) const
     {
         return map.find(key) != map.end();
     }
@@ -134,7 +134,7 @@ private:
     void rotate();
     void dropOutdatedLogs();
     void rotateAndDropIfNeeded();
-    size_t loadSingleLog(const std::string & path) const;
+    size_t loadSingleLog(const std::string & path);
 };
 
 }
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index f8491a6b8e9..5a20c97807b 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -94,10 +94,11 @@ StorageMergeTree::StorageMergeTree(
 
     loadMutations();
 
-    if (storage_settings->non_replicated_deduplication_window != 0)
+    auto settings = getSettings();
+    if (settings->non_replicated_deduplication_window != 0)
     {
         std::string path = getDataPaths()[0] + "/deduplication_logs";
-        deduplication_log.emplace(path, storage_settings->non_replicated_deduplication_window, format_version);
+        deduplication_log.emplace(path, settings->non_replicated_deduplication_window, format_version);
         deduplication_log->load();
     }
 }
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index 993a798d049..061e6122897 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -96,7 +96,7 @@ public:
 
     std::optional<JobAndPool> getDataProcessingJob() override;
 
-    std::optional<MergeTreeDeduplicationLog> getDeduplicationLog() const { return deduplication_log; }
+    std::optional<MergeTreeDeduplicationLog> & getDeduplicationLog() { return deduplication_log; }
 private:
 
     /// Mutex and condvar for synchronous mutations wait

From 6f36661575c92b8036fa587389ddadb3ca30fda6 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 2 Apr 2021 16:34:36 +0300
Subject: [PATCH 043/152] Buildable code

---
 .../MergeTree/MergeTreeDeduplicationLog.cpp   | 26 +++++++++++--------
 .../MergeTree/MergeTreeDeduplicationLog.h     |  2 +-
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index 98f818f6ff7..5ab570d8a68 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -141,19 +141,21 @@ void MergeTreeDeduplicationLog::dropOutdatedLogs()
         auto & description = itr->second;
         if (current_sum > deduplication_window)
         {
-            //std::filesystem::remove(description.path);
-            //itr = existing_logs.erase(itr);
             remove_from_value = itr->first;
             break;
         }
         current_sum += description.entries_count;
-        ++itr;
     }
 
     if (remove_from_value != 0)
     {
         for (auto itr = existing_logs.begin(); itr != existing_logs.end();)
         {
+            size_t number = itr->first;
+            std::filesystem::remove(itr->second.path);
+            itr = existing_logs.erase(itr);
+            if (remove_from_value == number)
+                break;
         }
     }
 
@@ -173,7 +175,7 @@ std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std:
     std::lock_guard lock(state_mutex);
 
     if (deduplication_map.contains(block_id))
-        return std::make_pair(deduplication_map->get(block_id), false);
+        return std::make_pair(deduplication_map.get(block_id), false);
 
     assert(current_writer != nullptr);
     MergeTreeDeduplicationLogRecord record;
@@ -201,15 +203,16 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreeData::DataPartPtr & part
 
     for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); ++itr)
     {
-        const auto & part_info = itr->second;
-        if (part.contains(part_info))
+        const auto & part_info = itr->value;
+        if (part->info.contains(part_info))
         {
+            MergeTreeDeduplicationLogRecord record;
             record.operation = MergeTreeDeduplicationOp::DROP;
             record.part_name = part_info.getPartName();
-            record.block_id = itr->first;
+            record.block_id = itr->key;
             writeRecord(record, *current_writer);
             existing_logs[current_log_number].entries_count++;
-            deduplication_map.erase(itr->first);
+            deduplication_map.erase(itr->key);
             rotateAndDropIfNeeded();
         }
     }
@@ -222,14 +225,15 @@ void MergeTreeDeduplicationLog::dropPartition(const std::string & partition_id)
 
     for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); ++itr)
     {
-        const auto & part_info = itr->second;
+        const auto & part_info = itr->value;
         if (part_info.partition_id == partition_id)
         {
+            MergeTreeDeduplicationLogRecord record;
             record.operation = MergeTreeDeduplicationOp::DROP;
             record.part_name = part_info.getPartName();
-            record.block_id = itr->first;
+            record.block_id = itr->key;
             writeRecord(record, *current_writer);
-            deduplication_map.erase(itr->first);
+            deduplication_map.erase(itr->key);
             existing_logs[current_log_number].entries_count++;
 
             rotateAndDropIfNeeded();
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
index d2d67ae6a56..c7d4d6d3098 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
@@ -111,7 +111,7 @@ public:
         size_t deduplication_window_,
         const MergeTreeDataFormatVersion & format_version_);
 
-    std::pair<MergeTreePartInfo, bool> addPart(const std::string block_id, const MergeTreeData::MutableDataPartPtr & part);
+    std::pair<MergeTreePartInfo, bool> addPart(const std::string & block_id, const MergeTreeData::MutableDataPartPtr & part);
     std::pair<MergeTreePartInfo, bool> addPart(const MergeTreeData::MutableDataPartPtr & part);
     void dropPart(const MergeTreeData::DataPartPtr & part);
     void dropPartition(const std::string & partition_id);

From e456e4023292dc6491510eb3346be677776516a7 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 2 Apr 2021 15:09:52 +0300
Subject: [PATCH 044/152] show ccache config

---
 docker/packager/binary/build.sh | 7 +++++--
 docker/packager/deb/build.sh    | 6 +++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 2498700354d..d47f41d3f1a 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -18,17 +18,20 @@ export CCACHE_DEBUG=1
 
 mkdir -p build/build_docker
 cd build/build_docker
-ccache --show-stats ||:
-ccache --zero-stats ||:
 ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
 rm -f CMakeCache.txt
 # Read cmake arguments into array (possibly empty)
 read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
 cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
 
+ccache --show-config ||:
+ccache --show-stats ||:
+ccache --zero-stats ||:
+
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
 ninja $NINJA_FLAGS --verbose clickhouse-bundle
 
+ccache --show-config ||:
 ccache --show-stats ||:
 
 mv ./programs/clickhouse* /output
diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh
index 4fbb3628b73..6c89b6fec31 100755
--- a/docker/packager/deb/build.sh
+++ b/docker/packager/deb/build.sh
@@ -6,6 +6,7 @@ set -x -e
 export CCACHE_LOGFILE=/build/ccache.log
 export CCACHE_DEBUG=1
 
+ccache --show-config ||:
 ccache --show-stats ||:
 ccache --zero-stats ||:
 
@@ -27,9 +28,12 @@ then
     mv /build/obj-*/src/unit_tests_dbms /output/binary
   fi
 fi
-ccache --show-stats ||:
+
 ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
 
+ccache --show-config ||:
+ccache --show-stats ||:
+
 if [ "${CCACHE_DEBUG:-}" == "1" ]
 then
     find /build -name '*.ccache-*' -print0 \

From d5a7809873db00c2d8f3a03fb3f6baa6e1fd8294 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 2 Apr 2021 16:35:42 +0300
Subject: [PATCH 045/152] add pixz

---
 docker/packager/deb/Dockerfile | 35 +++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile
index 8fd89d60f85..902929a2644 100644
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@@ -34,31 +34,32 @@ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
 # Libraries from OS are only needed to test the "unbundled" build (this is not used in production).
 RUN apt-get update \
     && apt-get install \
-        gcc-9 \
-        g++-9 \
-        clang-11 \
-        clang-tidy-11 \
-        lld-11 \
-        llvm-11 \
-        llvm-11-dev \
+        alien \
         clang-10 \
+        clang-11 \
         clang-tidy-10 \
+        clang-tidy-11 \
+        cmake \
+        debhelper \
+        devscripts \
+        g++-9 \
+        gcc-9 \
+        gdb  \
+        git \
+        gperf \
         lld-10 \
+        lld-11 \
         llvm-10 \
         llvm-10-dev \
+        llvm-11 \
+        llvm-11-dev \
+        moreutils \
         ninja-build \
         perl \
-        pkg-config \
-        devscripts \
-        debhelper \
-        git \
-        tzdata \
-        gperf \
-        alien \
-        cmake \
-        gdb  \
-        moreutils \
         pigz \
+        pixz \
+        pkg-config \
+        tzdata \
         --yes --no-install-recommends
 
 # NOTE: For some reason we have outdated version of gcc-10 in ubuntu 20.04 stable.

From 04214609439d11f2f22bd0ceeae0eca48c0b906e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 2 Apr 2021 17:06:53 +0300
Subject: [PATCH 046/152] Always call even.set for zk client callbacks.

---
 src/Common/ZooKeeper/ZooKeeper.cpp     | 14 +++++++-------
 src/Common/ZooKeeper/ZooKeeperImpl.cpp | 13 +++++++++++--
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index 330985e1599..9f59da233fc 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -243,6 +243,7 @@ Coordination::Error ZooKeeper::getChildrenImpl(const std::string & path, Strings
 
     auto callback = [&](const Coordination::ListResponse & response)
     {
+        SCOPE_EXIT(event.set());
         code = response.error;
         if (code == Coordination::Error::ZOK)
         {
@@ -250,7 +251,6 @@ Coordination::Error ZooKeeper::getChildrenImpl(const std::string & path, Strings
             if (stat)
                 *stat = response.stat;
         }
-        event.set();
     };
 
     impl->list(path, callback, watch_callback);
@@ -303,10 +303,10 @@ Coordination::Error ZooKeeper::createImpl(const std::string & path, const std::s
 
     auto callback = [&](const Coordination::CreateResponse & response)
     {
+        SCOPE_EXIT(event.set());
         code = response.error;
         if (code == Coordination::Error::ZOK)
             path_created = response.path_created;
-        event.set();
     };
 
     impl->create(path, data, mode & 1, mode & 2, {}, callback);  /// TODO better mode
@@ -371,9 +371,9 @@ Coordination::Error ZooKeeper::removeImpl(const std::string & path, int32_t vers
 
     auto callback = [&](const Coordination::RemoveResponse & response)
     {
+        SCOPE_EXIT(event.set());
         if (response.error != Coordination::Error::ZOK)
             code = response.error;
-        event.set();
     };
 
     impl->remove(path, version, callback);
@@ -404,10 +404,10 @@ Coordination::Error ZooKeeper::existsImpl(const std::string & path, Coordination
 
     auto callback = [&](const Coordination::ExistsResponse & response)
     {
+        SCOPE_EXIT(event.set());
         code = response.error;
         if (code == Coordination::Error::ZOK && stat)
             *stat = response.stat;
-        event.set();
     };
 
     impl->exists(path, callback, watch_callback);
@@ -436,6 +436,7 @@ Coordination::Error ZooKeeper::getImpl(const std::string & path, std::string & r
 
     auto callback = [&](const Coordination::GetResponse & response)
     {
+        SCOPE_EXIT(event.set());
         code = response.error;
         if (code == Coordination::Error::ZOK)
         {
@@ -443,7 +444,6 @@ Coordination::Error ZooKeeper::getImpl(const std::string & path, std::string & r
             if (stat)
                 *stat = response.stat;
         }
-        event.set();
     };
 
     impl->get(path, callback, watch_callback);
@@ -508,10 +508,10 @@ Coordination::Error ZooKeeper::setImpl(const std::string & path, const std::stri
 
     auto callback = [&](const Coordination::SetResponse & response)
     {
+        SCOPE_EXIT(event.set());
         code = response.error;
         if (code == Coordination::Error::ZOK && stat)
             *stat = response.stat;
-        event.set();
     };
 
     impl->set(path, data, version, callback);
@@ -558,9 +558,9 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests
 
     auto callback = [&](const Coordination::MultiResponse & response)
     {
+        SCOPE_EXIT(event.set());
         code = response.error;
         responses = response.responses;
-        event.set();
     };
 
     impl->multi(requests, callback);
diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
index 2314139af69..93ecaef8365 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@@ -796,8 +796,17 @@ void ZooKeeper::receiveEvent()
         /// In case we cannot read the response, we should indicate it as the error of that type
         ///  when the user cannot assume whether the request was processed or not.
         response->error = Error::ZCONNECTIONLOSS;
-        if (request_info.callback)
-            request_info.callback(*response);
+
+        try
+        {
+            if (request_info.callback)
+                request_info.callback(*response);
+        }
+        catch (...)
+        {
+            /// Throw initial exception, not exception from callback.
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
 
         throw;
     }

From 759dd79c769bff593283c2fb0ea1dc6478351c65 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 2 Apr 2021 19:45:18 +0300
Subject: [PATCH 047/152] Add tests

---
 .../MergeTree/MergeTreeBlockOutputStream.cpp  |  20 +---
 src/Storages/MergeTree/MergeTreeData.cpp      |  21 +++-
 src/Storages/MergeTree/MergeTreeData.h        |   7 +-
 .../MergeTree/MergeTreeDeduplicationLog.cpp   |  76 ++++++-------
 .../MergeTree/MergeTreeDeduplicationLog.h     |  15 ++-
 src/Storages/StorageMergeTree.cpp             |   4 +-
 src/Storages/StorageMergeTree.h               |   4 +-
 .../01781_merge_tree_deduplication.reference  |  50 +++++++++
 .../01781_merge_tree_deduplication.sql        | 105 ++++++++++++++++++
 9 files changed, 225 insertions(+), 77 deletions(-)
 create mode 100644 tests/queries/0_stateless/01781_merge_tree_deduplication.reference
 create mode 100644 tests/queries/0_stateless/01781_merge_tree_deduplication.sql

diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
index c887f6731af..a8375b43c3a 100644
--- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
@@ -35,23 +35,13 @@ void MergeTreeBlockOutputStream::write(const Block & block)
         if (!part)
             continue;
 
-        if (auto & deduplication_log = storage.getDeduplicationLog())
+        if (storage.renameTempPartAndAdd(part, &storage.increment, nullptr, storage.getDeduplicationLog()))
         {
-            String block_id = part->getZeroLevelPartBlockID();
-            auto res = deduplication_log->addPart(block_id, part);
-            if (!res.second)
-            {
-                LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName());
-                continue;
-            }
+            PartLog::addNewPart(storage.global_context, part, watch.elapsed());
+
+            /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'.
+            storage.background_executor.triggerTask();
         }
-
-        storage.renameTempPartAndAdd(part, &storage.increment);
-
-        PartLog::addNewPart(storage.global_context, part, watch.elapsed());
-
-        /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'.
-        storage.background_executor.triggerTask();
     }
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 3cd4187af0a..3febe4d63d9 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2022,7 +2022,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace(
 }
 
 
-bool MergeTreeData::renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction)
+bool MergeTreeData::renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction, MergeTreeDeduplicationLog * deduplication_log)
 {
     if (out_transaction && &out_transaction->data != this)
         throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.",
@@ -2031,7 +2031,7 @@ bool MergeTreeData::renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrem
     DataPartsVector covered_parts;
     {
         auto lock = lockParts();
-        if (!renameTempPartAndReplace(part, increment, out_transaction, lock, &covered_parts))
+        if (!renameTempPartAndReplace(part, increment, out_transaction, lock, &covered_parts, deduplication_log))
             return false;
     }
     if (!covered_parts.empty())
@@ -2044,7 +2044,7 @@ bool MergeTreeData::renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrem
 
 bool MergeTreeData::renameTempPartAndReplace(
     MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction,
-    std::unique_lock<std::mutex> & lock, DataPartsVector * out_covered_parts)
+    std::unique_lock<std::mutex> & lock, DataPartsVector * out_covered_parts, MergeTreeDeduplicationLog * deduplication_log)
 {
     if (out_transaction && &out_transaction->data != this)
         throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.",
@@ -2099,6 +2099,17 @@ bool MergeTreeData::renameTempPartAndReplace(
         return false;
     }
 
+    if (deduplication_log)
+    {
+        String block_id = part->getZeroLevelPartBlockID();
+        auto res = deduplication_log->addPart(block_id, part_info);
+        if (!res.second)
+        {
+            LOG_INFO(log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName());
+            return false;
+        }
+    }
+
     /// All checks are passed. Now we can rename the part on disk.
     /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts
     ///
@@ -2155,7 +2166,7 @@ bool MergeTreeData::renameTempPartAndReplace(
 }
 
 MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
-    MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction)
+    MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction, MergeTreeDeduplicationLog * deduplication_log)
 {
     if (out_transaction && &out_transaction->data != this)
         throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.",
@@ -2164,7 +2175,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
     DataPartsVector covered_parts;
     {
         auto lock = lockParts();
-        renameTempPartAndReplace(part, increment, out_transaction, lock, &covered_parts);
+        renameTempPartAndReplace(part, increment, out_transaction, lock, &covered_parts, deduplication_log);
     }
     return covered_parts;
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 2b6da96fede..9be0a0b6caa 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -54,6 +54,7 @@ struct CurrentlySubmergingEmergingTagger;
 class ExpressionActions;
 using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
 using ManyExpressionActions = std::vector<ExpressionActionsPtr>;
+class MergeTreeDeduplicationLog;
 
 namespace ErrorCodes
 {
@@ -447,18 +448,18 @@ public:
     /// active set later with out_transaction->commit()).
     /// Else, commits the part immediately.
     /// Returns true if part was added. Returns false if part is covered by bigger part.
-    bool renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrement * increment = nullptr, Transaction * out_transaction = nullptr);
+    bool renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrement * increment = nullptr, Transaction * out_transaction = nullptr, MergeTreeDeduplicationLog * deduplication_log = nullptr);
 
     /// The same as renameTempPartAndAdd but the block range of the part can contain existing parts.
     /// Returns all parts covered by the added part (in ascending order).
     /// If out_transaction == nullptr, marks covered parts as Outdated.
     DataPartsVector renameTempPartAndReplace(
-        MutableDataPartPtr & part, SimpleIncrement * increment = nullptr, Transaction * out_transaction = nullptr);
+        MutableDataPartPtr & part, SimpleIncrement * increment = nullptr, Transaction * out_transaction = nullptr, MergeTreeDeduplicationLog * deduplication_log = nullptr);
 
     /// Low-level version of previous one, doesn't lock mutex
     bool renameTempPartAndReplace(
             MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction, DataPartsLock & lock,
-            DataPartsVector * out_covered_parts = nullptr);
+            DataPartsVector * out_covered_parts = nullptr, MergeTreeDeduplicationLog * deduplication_log = nullptr);
 
 
     /// Remove parts from working set immediately (without wait for background
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index 5ab570d8a68..856aaffa401 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -3,6 +3,7 @@
 #include <boost/algorithm/string/split.hpp>
 #include <boost/algorithm/string/join.hpp>
 #include <boost/algorithm/string/trim.hpp>
+#include <IO/ReadBufferFromFile.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 
@@ -28,11 +29,12 @@ struct MergeTreeDeduplicationLogRecord
 void writeRecord(const MergeTreeDeduplicationLogRecord & record, WriteBuffer & out)
 {
     writeIntText(static_cast<uint8_t>(record.operation), out);
-    writeChar(' ', out);
-    writeStringBinary(record.part_name, out);
-    writeChar(' ', out);
-    writeStringBinary(record.block_id, out);
+    writeChar('\t', out);
+    writeString(record.part_name, out);
+    writeChar('\t', out);
+    writeString(record.block_id, out);
     writeChar('\n', out);
+    out.next();
 }
 
 void readRecord(MergeTreeDeduplicationLogRecord & record, ReadBuffer & in)
@@ -40,10 +42,10 @@ void readRecord(MergeTreeDeduplicationLogRecord & record, ReadBuffer & in)
     uint8_t op;
     readIntText(op, in);
     record.operation = static_cast<MergeTreeDeduplicationOp>(op);
-    assertChar(' ', in);
-    readStringBinary(record.part_name, in);
-    assertChar(' ', in);
-    readStringBinary(record.block_id, in);
+    assertChar('\t', in);
+    readString(record.part_name, in);
+    assertChar('\t', in);
+    readString(record.block_id, in);
     assertChar('\n', in);
 }
 
@@ -99,6 +101,8 @@ void MergeTreeDeduplicationLog::load()
     }
 
     rotateAndDropIfNeeded();
+    if (!current_writer)
+        current_writer = std::make_unique<WriteBufferFromFile>(existing_logs.rbegin()->second.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
 }
 
 size_t MergeTreeDeduplicationLog::loadSingleLog(const std::string & path)
@@ -163,80 +167,64 @@ void MergeTreeDeduplicationLog::dropOutdatedLogs()
 
 void MergeTreeDeduplicationLog::rotateAndDropIfNeeded()
 {
-    if (existing_logs.empty() || existing_logs[current_log_number].entries_count > rotate_interval)
+    if (existing_logs.empty() || existing_logs[current_log_number].entries_count >= rotate_interval)
     {
         rotate();
         dropOutdatedLogs();
     }
+
 }
 
-std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std::string & block_id, const MergeTreeData::MutableDataPartPtr & part)
+std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std::string & block_id, const MergeTreePartInfo & part_info)
 {
     std::lock_guard lock(state_mutex);
 
     if (deduplication_map.contains(block_id))
-        return std::make_pair(deduplication_map.get(block_id), false);
+    {
+        auto info = deduplication_map.get(block_id);
+        return std::make_pair(info, false);
+    }
 
     assert(current_writer != nullptr);
+
     MergeTreeDeduplicationLogRecord record;
     record.operation = MergeTreeDeduplicationOp::ADD;
-    record.part_name = part->name;
+    record.part_name = part_info.getPartName();
     record.block_id = block_id;
     writeRecord(record, *current_writer);
     existing_logs[current_log_number].entries_count++;
 
-    deduplication_map.insert(record.block_id, part->info);
+    deduplication_map.insert(record.block_id, part_info);
+    rotateAndDropIfNeeded();
 
-    return std::make_pair(part->info, true);
+    return std::make_pair(part_info, true);
 }
 
-std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const MergeTreeData::MutableDataPartPtr & part)
-{
-    return addPart(part->getZeroLevelPartBlockID(), part);
-}
-
-void MergeTreeDeduplicationLog::dropPart(const MergeTreeData::DataPartPtr & part)
+void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_info)
 {
     std::lock_guard lock(state_mutex);
 
     assert(current_writer != nullptr);
 
-    for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); ++itr)
+    for (auto itr = deduplication_map.begin(); itr != deduplication_map.end();)
     {
         const auto & part_info = itr->value;
-        if (part->info.contains(part_info))
+        if (drop_part_info.contains(part_info))
         {
             MergeTreeDeduplicationLogRecord record;
             record.operation = MergeTreeDeduplicationOp::DROP;
             record.part_name = part_info.getPartName();
             record.block_id = itr->key;
             writeRecord(record, *current_writer);
+
             existing_logs[current_log_number].entries_count++;
-            deduplication_map.erase(itr->key);
+            ++itr;
+            deduplication_map.erase(record.block_id);
             rotateAndDropIfNeeded();
         }
-    }
-}
-
-void MergeTreeDeduplicationLog::dropPartition(const std::string & partition_id)
-{
-    std::lock_guard lock(state_mutex);
-    assert(current_writer != nullptr);
-
-    for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); ++itr)
-    {
-        const auto & part_info = itr->value;
-        if (part_info.partition_id == partition_id)
+        else
         {
-            MergeTreeDeduplicationLogRecord record;
-            record.operation = MergeTreeDeduplicationOp::DROP;
-            record.part_name = part_info.getPartName();
-            record.block_id = itr->key;
-            writeRecord(record, *current_writer);
-            deduplication_map.erase(itr->key);
-            existing_logs[current_log_number].entries_count++;
-
-            rotateAndDropIfNeeded();
+            ++itr;
         }
     }
 }
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
index c7d4d6d3098..66b3360dc4c 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
@@ -1,8 +1,13 @@
 #pragma once
 #include <Core/Types.h>
-#include <Storages/MergeTree/IMergeTreeDataPart.h>
-#include <Storages/MergeTree/MergeTreeData.h>
+#include <common/StringRef.h>
+#include <IO/WriteBufferFromFile.h>
 #include <Storages/MergeTree/MergeTreePartInfo.h>
+#include <map>
+#include <list>
+#include <mutex>
+#include <string>
+#include <unordered_map>
 
 namespace DB
 {
@@ -111,10 +116,8 @@ public:
         size_t deduplication_window_,
         const MergeTreeDataFormatVersion & format_version_);
 
-    std::pair<MergeTreePartInfo, bool> addPart(const std::string & block_id, const MergeTreeData::MutableDataPartPtr & part);
-    std::pair<MergeTreePartInfo, bool> addPart(const MergeTreeData::MutableDataPartPtr & part);
-    void dropPart(const MergeTreeData::DataPartPtr & part);
-    void dropPartition(const std::string & partition_id);
+    std::pair<MergeTreePartInfo, bool> addPart(const std::string & block_id, const MergeTreePartInfo & part);
+    void dropPart(const MergeTreePartInfo & part);
 
     void load();
 
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 5a20c97807b..5e192f76bc9 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -98,7 +98,7 @@ StorageMergeTree::StorageMergeTree(
     if (settings->non_replicated_deduplication_window != 0)
     {
         std::string path = getDataPaths()[0] + "/deduplication_logs";
-        deduplication_log.emplace(path, settings->non_replicated_deduplication_window, format_version);
+        deduplication_log = std::make_unique<MergeTreeDeduplicationLog>(path, settings->non_replicated_deduplication_window, format_version);
         deduplication_log->load();
     }
 }
@@ -1220,7 +1220,7 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, bool
         if (deduplication_log)
         {
             for (const auto & part : parts_to_remove)
-                deduplication_log->dropPart(part);
+                deduplication_log->dropPart(part->info);
         }
 
         if (detach)
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index 061e6122897..3ac7b8a0270 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -96,7 +96,7 @@ public:
 
     std::optional<JobAndPool> getDataProcessingJob() override;
 
-    std::optional<MergeTreeDeduplicationLog> & getDeduplicationLog() { return deduplication_log; }
+    MergeTreeDeduplicationLog * getDeduplicationLog() { return deduplication_log.get(); }
 private:
 
     /// Mutex and condvar for synchronous mutations wait
@@ -109,7 +109,7 @@ private:
     BackgroundJobsExecutor background_executor;
     BackgroundMovesExecutor background_moves_executor;
 
-    std::optional<MergeTreeDeduplicationLog> deduplication_log;
+    std::unique_ptr<MergeTreeDeduplicationLog> deduplication_log;
 
     /// For block numbers.
     SimpleIncrement increment;
diff --git a/tests/queries/0_stateless/01781_merge_tree_deduplication.reference b/tests/queries/0_stateless/01781_merge_tree_deduplication.reference
new file mode 100644
index 00000000000..402a8919da5
--- /dev/null
+++ b/tests/queries/0_stateless/01781_merge_tree_deduplication.reference
@@ -0,0 +1,50 @@
+1	1
+1	1
+===============
+1	1
+1	1
+2	2
+3	3
+4	4
+===============
+1	1
+1	1
+2	2
+3	3
+4	4
+5	5
+6	6
+7	7
+===============
+1	1
+1	1
+2	2
+3	3
+4	4
+5	5
+6	6
+7	7
+8	8
+9	9
+10	10
+11	11
+12	12
+===============
+10	10
+12	12
+===============
+1	1
+1	1
+2	2
+3	3
+4	4
+5	5
+6	6
+8	8
+9	9
+11	11
+12	12
+===============
+88	11	11
+77	11	11
+77	12	12
diff --git a/tests/queries/0_stateless/01781_merge_tree_deduplication.sql b/tests/queries/0_stateless/01781_merge_tree_deduplication.sql
new file mode 100644
index 00000000000..7e4b6f7db2b
--- /dev/null
+++ b/tests/queries/0_stateless/01781_merge_tree_deduplication.sql
@@ -0,0 +1,105 @@
+DROP TABLE IF EXISTS merge_tree_deduplication;
+
+CREATE TABLE merge_tree_deduplication
+(
+    key UInt64,
+    value String,
+    part UInt8 DEFAULT 77
+)
+ENGINE=MergeTree()
+ORDER BY key
+PARTITION BY part
+SETTINGS non_replicated_deduplication_window=3;
+
+SYSTEM STOP MERGES merge_tree_deduplication;
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (1, '1');
+
+SELECT key, value FROM merge_tree_deduplication;
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (1, '1');
+
+SELECT key, value FROM merge_tree_deduplication;
+
+SELECT '===============';
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (2, '2');
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (3, '3');
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (4, '4');
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (1, '1');
+
+SELECT key, value FROM merge_tree_deduplication ORDER BY key;
+
+SELECT '===============';
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (5, '5');
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (6, '6');
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (7, '7');
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (5, '5');
+
+SELECT key, value FROM merge_tree_deduplication ORDER BY key;
+
+SELECT '===============';
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (8, '8');
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (9, '9');
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (10, '10');
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (11, '11');
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (12, '12');
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (10, '10');
+INSERT INTO merge_tree_deduplication (key, value) VALUES (11, '11');
+INSERT INTO merge_tree_deduplication (key, value) VALUES (12, '12');
+
+SELECT key, value FROM merge_tree_deduplication ORDER BY key;
+
+SELECT '===============';
+
+ALTER TABLE merge_tree_deduplication DROP PART '77_9_9_0'; -- some old part
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (10, '10');
+
+SELECT key, value FROM merge_tree_deduplication WHERE key = 10;
+
+ALTER TABLE merge_tree_deduplication DROP PART '77_13_13_0'; -- fresh part
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (12, '12');
+
+SELECT key, value FROM merge_tree_deduplication WHERE key = 12;
+
+DETACH TABLE merge_tree_deduplication;
+ATTACH TABLE merge_tree_deduplication;
+
+OPTIMIZE TABLE  merge_tree_deduplication FINAL;
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (11, '11'); -- deduplicated
+INSERT INTO merge_tree_deduplication (key, value) VALUES (12, '12'); -- deduplicated
+
+SELECT '===============';
+
+SELECT key, value FROM merge_tree_deduplication ORDER BY key;
+
+SELECT '===============';
+
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (11, '11', 88);
+
+ALTER TABLE merge_tree_deduplication DROP PARTITION 77;
+
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (11, '11', 88); --deduplicated
+
+INSERT INTO merge_tree_deduplication (key, value) VALUES (11, '11'); -- not deduplicated
+INSERT INTO merge_tree_deduplication (key, value) VALUES (12, '12'); -- not deduplicated
+
+SELECT part, key, value FROM merge_tree_deduplication ORDER BY key;
+
+DROP TABLE IF EXISTS merge_tree_deduplication;

From a555d078a231bec875b5ceee6d048bd2f3f4d703 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 2 Apr 2021 19:56:02 +0300
Subject: [PATCH 048/152] Add exception handling

---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp        |  2 --
 src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 11 +++++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index d59d877b372..5805c1d66f7 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1382,6 +1382,4 @@ bool isInMemoryPart(const MergeTreeDataPartPtr & data_part)
     return (data_part && data_part->getType() == MergeTreeDataPartType::IN_MEMORY);
 }
 
-
 }
-
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index 856aaffa401..5b11d111a95 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -96,8 +96,15 @@ void MergeTreeDeduplicationLog::load()
     /// Order important
     for (auto & [log_number, desc] : existing_logs)
     {
-        desc.entries_count = loadSingleLog(desc.path);
-        current_log_number = log_number;
+        try
+        {
+            desc.entries_count = loadSingleLog(desc.path);
+            current_log_number = log_number;
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__, "Error while loading MergeTree deduplication log on path " + desc.path);
+        }
     }
 
     rotateAndDropIfNeeded();

From ab34873344c48edd17b6046af7d642d432400f89 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 2 Apr 2021 20:46:01 +0300
Subject: [PATCH 049/152] Add some comments

---
 .../MergeTree/MergeTreeDeduplicationLog.cpp   | 36 +++++++++++++---
 .../MergeTree/MergeTreeDeduplicationLog.h     | 43 +++++++++++++++++--
 2 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index 5b11d111a95..8b87f0069a1 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -13,12 +13,14 @@ namespace DB
 namespace
 {
 
+/// Deduplication operation part was dropped or added
 enum class MergeTreeDeduplicationOp : uint8_t
 {
     ADD = 1,
     DROP = 2,
 };
 
+/// Record for deduplication on disk
 struct MergeTreeDeduplicationLogRecord
 {
     MergeTreeDeduplicationOp operation;
@@ -93,7 +95,7 @@ void MergeTreeDeduplicationLog::load()
         existing_logs[log_number] = {path, 0};
     }
 
-    /// Order important
+    /// Order important, we load history from the begging to the end
     for (auto & [log_number, desc] : existing_logs)
     {
         try
@@ -107,7 +109,10 @@ void MergeTreeDeduplicationLog::load()
         }
     }
 
+    /// Start new log, drop previous
     rotateAndDropIfNeeded();
+
+    /// Can happen in case we have unfinished log
     if (!current_writer)
         current_writer = std::make_unique<WriteBufferFromFile>(existing_logs.rbegin()->second.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
 }
@@ -147,19 +152,24 @@ void MergeTreeDeduplicationLog::dropOutdatedLogs()
 {
     size_t current_sum = 0;
     size_t remove_from_value = 0;
+    /// Go from end to the beginning
     for (auto itr = existing_logs.rbegin(); itr != existing_logs.rend(); ++itr)
     {
-        auto & description = itr->second;
         if (current_sum > deduplication_window)
         {
+            /// We have more logs than required, all older files (including current) can be dropped
             remove_from_value = itr->first;
             break;
         }
+
+        auto & description = itr->second;
         current_sum += description.entries_count;
     }
 
+    /// If we found some logs to drop
     if (remove_from_value != 0)
     {
+        /// Go from beginning to the end and drop all outdated logs
         for (auto itr = existing_logs.begin(); itr != existing_logs.end();)
         {
             size_t number = itr->first;
@@ -174,18 +184,19 @@ void MergeTreeDeduplicationLog::dropOutdatedLogs()
 
 void MergeTreeDeduplicationLog::rotateAndDropIfNeeded()
 {
+    /// If we don't have logs at all or already have enough records in current
     if (existing_logs.empty() || existing_logs[current_log_number].entries_count >= rotate_interval)
     {
         rotate();
         dropOutdatedLogs();
     }
-
 }
 
 std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std::string & block_id, const MergeTreePartInfo & part_info)
 {
     std::lock_guard lock(state_mutex);
 
+    /// If we alredy have this block let's deduplicate it
     if (deduplication_map.contains(block_id))
     {
         auto info = deduplication_map.get(block_id);
@@ -194,14 +205,18 @@ std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std:
 
     assert(current_writer != nullptr);
 
+    /// Create new record
     MergeTreeDeduplicationLogRecord record;
     record.operation = MergeTreeDeduplicationOp::ADD;
     record.part_name = part_info.getPartName();
     record.block_id = block_id;
+    /// Write it to disk
     writeRecord(record, *current_writer);
+    /// We have one more record in current log
     existing_logs[current_log_number].entries_count++;
-
+    /// Add to deduplication map
     deduplication_map.insert(record.block_id, part_info);
+    /// Rotate and drop old logs if needed
     rotateAndDropIfNeeded();
 
     return std::make_pair(part_info, true);
@@ -213,20 +228,29 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf
 
     assert(current_writer != nullptr);
 
-    for (auto itr = deduplication_map.begin(); itr != deduplication_map.end();)
+    for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); /* no increment here, we erasing from map */)
     {
         const auto & part_info = itr->value;
+        /// Part is covered by dropped part, let's remove it from
+        /// deduplication history
         if (drop_part_info.contains(part_info))
         {
+            /// Create drop record
             MergeTreeDeduplicationLogRecord record;
             record.operation = MergeTreeDeduplicationOp::DROP;
             record.part_name = part_info.getPartName();
             record.block_id = itr->key;
+            /// Write it to disk
             writeRecord(record, *current_writer);
-
+            /// We have one more record on disk
             existing_logs[current_log_number].entries_count++;
+
+            /// Increment itr before erase, otherwise it will invalidated
             ++itr;
+            /// Remove block_id from in-memory table
             deduplication_map.erase(record.block_id);
+
+            /// Rotate and drop old logs if needed
             rotateAndDropIfNeeded();
         }
         else
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
index 66b3360dc4c..e1fd80f2222 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
@@ -12,13 +12,19 @@
 namespace DB
 {
 
+/// Description of dedupliction log
 struct MergeTreeDeduplicationLogNameDescription
 {
+    /// Path to log
     std::string path;
+
+    /// How many entries we have in log
     size_t entries_count;
 };
 
-
+/// Simple string-key HashTable with fixed size based on STL containers.
+/// Preserves order using linked list and remove elements
+/// on overflow in FIFO order.
 template <typename V>
 class LimitedOrderedHashMap
 {
@@ -72,7 +78,6 @@ public:
         return true;
     }
 
-
     bool insert(const std::string & key, const V & value)
     {
         auto it = map.find(key);
@@ -108,6 +113,14 @@ public:
     const_reverse_iterator rend() const { return queue.crend(); }
 };
 
+/// Fixed-size log for deduplication in non-replicated MergeTree.
+/// Stores records on disk for zero-level parts in human-readable format:
+///  operation   part_name       partition_id_check_sum
+///  1           88_18_18_0      88_10619499460461868496_9553701830997749308
+///  2           77_14_14_0      77_15147918179036854170_6725063583757244937
+///  2           77_15_15_0      77_14977227047908934259_8047656067364802772
+///  1           77_20_20_0      77_15147918179036854170_6725063583757244937
+/// Also stores them in memory in hash table with limited size.
 class MergeTreeDeduplicationLog
 {
 public:
@@ -116,27 +129,51 @@ public:
         size_t deduplication_window_,
         const MergeTreeDataFormatVersion & format_version_);
 
+    /// Add part into in-memory hash table and to disk
+    /// Return true and part info if insertion was successful.
+    /// Otherwise, in case of duplicate, return false and previous part name with same hash (useful for logging)
     std::pair<MergeTreePartInfo, bool> addPart(const std::string & block_id, const MergeTreePartInfo & part);
+
+    /// Remove all covered parts from in memory table and add DROP records to the disk
     void dropPart(const MergeTreePartInfo & part);
 
+    /// Load history from disk. Ignores broken logs.
     void load();
-
 private:
     const std::string logs_dir;
+    /// Size of deduplication window
     const size_t deduplication_window;
+
+    /// How often we create new logs. Not very important,
+    /// default value equals deduplication_window * 2
     const size_t rotate_interval;
     const MergeTreeDataFormatVersion format_version;
 
+    /// Current log number. Always growing number.
     size_t current_log_number = 0;
+
+    /// All existing logs in order of their numbers
     std::map<size_t, MergeTreeDeduplicationLogNameDescription> existing_logs;
+
+    /// In memory hash-table
     LimitedOrderedHashMap<MergeTreePartInfo> deduplication_map;
+
+    /// Writer to the current log file
     std::unique_ptr<WriteBufferFromFile> current_writer;
 
+    /// Overall mutex because we can have a lot of cocurrent inserts
     std::mutex state_mutex;
 
+    /// Start new log
     void rotate();
+
+    /// Remove all old logs with non-needed records for deduplication_window
     void dropOutdatedLogs();
+
+    /// Execute both previous methods if needed
     void rotateAndDropIfNeeded();
+
+    /// Load single log from disk. In case of corruption throws exceptions
     size_t loadSingleLog(const std::string & path);
 };
 

From b105153d2c848d786ec290915fba675af29abf08 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 2 Apr 2021 20:48:58 +0300
Subject: [PATCH 050/152] Useful comment

---
 src/Storages/MergeTree/MergeTreeData.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 3febe4d63d9..cbdedeeec86 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2099,6 +2099,10 @@ bool MergeTreeData::renameTempPartAndReplace(
         return false;
     }
 
+    /// Deduplication log used only from non-replicated MergeTree. Replicated
+    /// tables have their own mechanism. We try to deduplicated at such deep
+    /// level, because only here we know real part name which is required for
+    /// deduplication.
     if (deduplication_log)
     {
         String block_id = part->getZeroLevelPartBlockID();

From fc3afe108773d5fcf32faa42ae2c9df9de052dde Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 2 Apr 2021 20:51:57 +0300
Subject: [PATCH 051/152] Fix typos

---
 src/Storages/MergeTree/MergeTreeData.cpp             | 2 +-
 src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index cbdedeeec86..5973ed2e588 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2100,7 +2100,7 @@ bool MergeTreeData::renameTempPartAndReplace(
     }
 
     /// Deduplication log used only from non-replicated MergeTree. Replicated
-    /// tables have their own mechanism. We try to deduplicated at such deep
+    /// tables have their own mechanism. We try to deduplicate at such deep
     /// level, because only here we know real part name which is required for
     /// deduplication.
     if (deduplication_log)
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index 8b87f0069a1..d04283ff489 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -196,7 +196,7 @@ std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std:
 {
     std::lock_guard lock(state_mutex);
 
-    /// If we alredy have this block let's deduplicate it
+    /// If we already have this block let's deduplicate it
     if (deduplication_map.contains(block_id))
     {
         auto info = deduplication_map.get(block_id);

From ca30c9186812aabb159db413fd6c88e83466da9f Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 2 Apr 2021 20:54:24 +0300
Subject: [PATCH 052/152] More comments

---
 src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp | 1 +
 src/Storages/MergeTree/MergeTreeData.cpp              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
index a8375b43c3a..6c9e14f9796 100644
--- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp
@@ -35,6 +35,7 @@ void MergeTreeBlockOutputStream::write(const Block & block)
         if (!part)
             continue;
 
+        /// Part can be deduplicated, so increment counters and add to part log only if it's really added
         if (storage.renameTempPartAndAdd(part, &storage.increment, nullptr, storage.getDeduplicationLog()))
         {
             PartLog::addNewPart(storage.global_context, part, watch.elapsed());
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 5973ed2e588..5d0e7c13e57 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2109,6 +2109,7 @@ bool MergeTreeData::renameTempPartAndReplace(
         auto res = deduplication_log->addPart(block_id, part_info);
         if (!res.second)
         {
+            ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks);
             LOG_INFO(log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName());
             return false;
         }

From b884593186e90bfbaaab27bd2fd00e3e1804c03a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 2 Apr 2021 20:55:05 +0300
Subject: [PATCH 053/152] Followup fix

---
 src/Storages/MergeTree/MergeTreeData.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 5d0e7c13e57..6ddfbd338c7 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -71,6 +71,7 @@ namespace ProfileEvents
     extern const Event RejectedInserts;
     extern const Event DelayedInserts;
     extern const Event DelayedInsertsMilliseconds;
+    extern const Event DuplicatedInsertedBlocks;
 }
 
 namespace CurrentMetrics

From b127e314514aa71d57ac7884d0b8e124b8939426 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Sat, 3 Apr 2021 14:56:56 +0300
Subject: [PATCH 054/152] CI fixes

---
 src/Storages/MergeTree/MergeTreeDeduplicationLog.h | 2 +-
 tests/queries/skip_list.json                       | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
index e1fd80f2222..643b2ef9fad 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
@@ -135,7 +135,7 @@ public:
     std::pair<MergeTreePartInfo, bool> addPart(const std::string & block_id, const MergeTreePartInfo & part);
 
     /// Remove all covered parts from in memory table and add DROP records to the disk
-    void dropPart(const MergeTreePartInfo & part);
+    void dropPart(const MergeTreePartInfo & drop_part_info);
 
     /// Load history from disk. Ignores broken logs.
     void load();
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 3df639b73cf..2cee0254cd7 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -136,6 +136,7 @@
         "00626_replace_partition_from_table_zookeeper",
         "00626_replace_partition_from_table",
         "00152_insert_different_granularity",
+        "01781_merge_tree_deduplication",
         /// Old syntax is not allowed
         "01062_alter_on_mutataion_zookeeper",
         "00925_zookeeper_empty_replicated_merge_tree_optimize_final",

From 1906c778841f1ed1af2f2892f163aef009de4553 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Sat, 3 Apr 2021 17:57:03 +0300
Subject: [PATCH 055/152] Fix tidy one more time

---
 src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index d04283ff489..9cbffe977c4 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -90,7 +90,7 @@ void MergeTreeDeduplicationLog::load()
 
     for (const auto & p : fs::directory_iterator(logs_dir))
     {
-        auto path = p.path();
+        const auto & path = p.path();
         auto log_number = getLogNumber(path);
         existing_logs[log_number] = {path, 0};
     }

From adf494ae1ff3f1887c69b141cfde6b8cf72a6b0e Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Sun, 4 Apr 2021 11:00:48 +0300
Subject: [PATCH 056/152] Nullable types

---
 .../external-dictionaries/external-dicts-dict-structure.md    | 4 ++--
 docs/en/sql-reference/dictionaries/index.md                   | 2 --
 .../external-dictionaries/external-dicts-dict-structure.md    | 4 ++--
 docs/ru/sql-reference/dictionaries/index.md                   | 2 --
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index dbf2fa67ac5..05cb7c1c571 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -159,8 +159,8 @@ Configuration fields:
 | Tag                                                  | Description                                                                                                                                                                                                                                                                                                                                     | Required |
 |------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
 | `name`                                               | Column name.                                                                                                                                                                                                                                                                                                                                    | Yes      |
-| `type`                                               | ClickHouse data type.<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../../sql-reference/data-types/nullable.md) is not supported.       | Yes      |
-| `null_value`                                         | Default value for a non-existing element.<br/>In the example, it is an empty string. You cannot use `NULL` in this field.                                                                                                                                                                                                                       | Yes      |
+| `type`                                               | ClickHouse data type.<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed) dictionaries. In [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache), `Polygon`, [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported.       | Yes      |
+| `null_value`                                         | Default value for a non-existing element.<br/>In the example, it is an empty string. [NULL](../../syntax.md#null-literal) value can be used only for the `Nullable` types (see the previous line with types description).                                                                                                                                                                                                                       | Yes      |
 | `expression`                                         | [Expression](../../../sql-reference/syntax.md#syntax-expressions) that ClickHouse executes on the value.<br/>The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.<br/><br/>Default value: no expression.                                                              | No       |
 | <a name="hierarchical-dict-attr"></a> `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).<br/><br/>Default value: `false`.                                                                                               | No       |
 | `injective`                                          | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).<br/>If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.<br/><br/>Default value: `false`. | No       |
diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md
index fa127dab103..22f4182a1c0 100644
--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@@ -10,8 +10,6 @@ A dictionary is a mapping (`key -> attributes`) that is convenient for various t
 
 ClickHouse supports special functions for working with dictionaries that can be used in queries. It is easier and more efficient to use dictionaries with functions than a `JOIN` with reference tables.
 
-[NULL](../../sql-reference/syntax.md#null-literal) values can’t be stored in a dictionary.
-
 ClickHouse supports:
 
 -   [Built-in dictionaries](../../sql-reference/dictionaries/internal-dicts.md#internal_dicts) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md).
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index 57f53390d1c..6c17efd0edb 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -159,8 +159,8 @@ CREATE DICTIONARY somename (
 | Тег                                                  | Описание                                                                                                                                                                                                                                                                                                                                                      | Обязательный |
 |------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
 | `name`                                               | Имя столбца.                                                                                                                                                                                                                                                                                                                                                  | Да           |
-| `type`                                               | Тип данных ClickHouse.<br/>ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`. [Nullable](../../../sql-reference/data-types/nullable.md) не поддерживается. | Да           |
-| `null_value`                                         | Значение по умолчанию для несуществующего элемента.<br/>В примере это пустая строка. Нельзя указать значение `NULL`.                                                                                                                                                                                                                                          | Да           |
+| `type`                                               | Тип данных ClickHouse.<br/>ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`. <br/>[Nullable](../../../sql-reference/data-types/nullable.md) в настоящее время поддерживается для словарей [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed). Для словарей [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache), `Polygon` и [IPTrie](external-dicts-dict-layout.md#ip-trie) `Nullable`-типы не поддерживаются. | Да           |
+| `null_value`                                         | Значение по умолчанию для несуществующего элемента.<br/>В примере это пустая строка. Значение [NULL](../../syntax.md#null-literal) можно указывать только для типов `Nullable` (см. предыдущую строку с описанием типов).                                                                                                                                                                                                                                          | Да           |
 | `expression`                                         | [Выражение](../../syntax.md#syntax-expressions), которое ClickHouse выполняет со значением.<br/>Выражением может быть имя столбца в удаленной SQL базе. Таким образом, вы можете использовать его для создания псевдонима удаленного столбца.<br/><br/>Значение по умолчанию: нет выражения.                                                                  | Нет          |
 | <a name="hierarchical-dict-attr"></a> `hierarchical` | Если `true`, то атрибут содержит ключ предка для текущего элемента. Смотрите [Иерархические словари](external-dicts-dict-hierarchical.md).<br/><br/>Default value: `false`.                                                                                                                                                                                   | No           |
 | `is_object_id`                                       | Признак того, что запрос выполняется к документу MongoDB по `ObjectID`.<br/><br/>Значение по умолчанию: `false`.                                                                                                                                                                                                                                              | Нет          |
diff --git a/docs/ru/sql-reference/dictionaries/index.md b/docs/ru/sql-reference/dictionaries/index.md
index bd432497be8..59c7518d0c5 100644
--- a/docs/ru/sql-reference/dictionaries/index.md
+++ b/docs/ru/sql-reference/dictionaries/index.md
@@ -10,8 +10,6 @@ toc_title: "Введение"
 
 ClickHouse поддерживает специальные функции для работы со словарями, которые можно использовать в запросах. Проще и эффективнее использовать словари с помощью функций, чем `JOIN` с таблицами-справочниками.
 
-В словаре нельзя хранить значения [NULL](../../sql-reference/syntax.md#null-literal).
-
 ClickHouse поддерживает:
 
 -   [Встроенные словари](internal-dicts.md#internal_dicts) со специфическим [набором функций](../../sql-reference/dictionaries/external-dictionaries/index.md).

From 548ec6d2cbcda4d5019e552ef712aa5c11c3355c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Sun, 4 Apr 2021 11:02:09 +0300
Subject: [PATCH 057/152] Trying enabled setting

---
 src/Storages/MergeTree/MergeTreeSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 06d909eb912..aee25ee21a0 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -54,7 +54,7 @@ struct Settings;
     M(UInt64, write_ahead_log_bytes_to_fsync, 100ULL * 1024 * 1024, "Amount of bytes, accumulated in WAL to do fsync.", 0) \
     M(UInt64, write_ahead_log_interval_ms_to_fsync, 100, "Interval in milliseconds after which fsync for WAL is being done.", 0) \
     M(Bool, in_memory_parts_insert_sync, false, "If true insert of part with in-memory format will wait for fsync of WAL", 0) \
-    M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \
+    M(UInt64, non_replicated_deduplication_window, 100, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \
     \
     /** Inserts settings. */ \
     M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \

From b590926c1ca9503cfcc92deccb9b1aa1f2839d5f Mon Sep 17 00:00:00 2001
From: fuqi <fuqi@growingio.com>
Date: Sun, 4 Apr 2021 17:23:47 +0800
Subject: [PATCH 058/152] fix dist map type select

---
 src/Parsers/ASTFunction.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index 3cb2e8bfa37..c5f2a3034cd 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -483,14 +483,14 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
 
         if (!written && 0 == strcmp(name.c_str(), "map"))
         {
-            settings.ostr << (settings.hilite ? hilite_operator : "") << '{' << (settings.hilite ? hilite_none : "");
+            settings.ostr << (settings.hilite ? hilite_operator : "") << 'map(' << (settings.hilite ? hilite_none : "");
             for (size_t i = 0; i < arguments->children.size(); ++i)
             {
                 if (i != 0)
                     settings.ostr << ", ";
                 arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
             }
-            settings.ostr << (settings.hilite ? hilite_operator : "") << '}' << (settings.hilite ? hilite_none : "");
+            settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : "");
             written = true;
         }
     }

From 0d075622cb230e64fd1add0d4bf58d0a8ec8ecf5 Mon Sep 17 00:00:00 2001
From: olgarev <56617294+olgarev@users.noreply.github.com>
Date: Sun, 4 Apr 2021 12:37:38 +0300
Subject: [PATCH 059/152] Update
 docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 .../external-dictionaries/external-dicts-dict-structure.md     | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index 6c17efd0edb..9728f3eb29a 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -162,10 +162,9 @@ CREATE DICTIONARY somename (
 | `type`                                               | Тип данных ClickHouse.<br/>ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`. <br/>[Nullable](../../../sql-reference/data-types/nullable.md) в настоящее время поддерживается для словарей [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed). Для словарей [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache), `Polygon` и [IPTrie](external-dicts-dict-layout.md#ip-trie) `Nullable`-типы не поддерживаются. | Да           |
 | `null_value`                                         | Значение по умолчанию для несуществующего элемента.<br/>В примере это пустая строка. Значение [NULL](../../syntax.md#null-literal) можно указывать только для типов `Nullable` (см. предыдущую строку с описанием типов).                                                                                                                                                                                                                                          | Да           |
 | `expression`                                         | [Выражение](../../syntax.md#syntax-expressions), которое ClickHouse выполняет со значением.<br/>Выражением может быть имя столбца в удаленной SQL базе. Таким образом, вы можете использовать его для создания псевдонима удаленного столбца.<br/><br/>Значение по умолчанию: нет выражения.                                                                  | Нет          |
-| <a name="hierarchical-dict-attr"></a> `hierarchical` | Если `true`, то атрибут содержит ключ предка для текущего элемента. Смотрите [Иерархические словари](external-dicts-dict-hierarchical.md).<br/><br/>Default value: `false`.                                                                                                                                                                                   | No           |
+| <a name="hierarchical-dict-attr"></a> `hierarchical` | Если `true`, то атрибут содержит ключ предка для текущего элемента. Смотрите [Иерархические словари](external-dicts-dict-hierarchical.md).<br/><br/>Значение по умолчанию: `false`.                                                                                                                                                                                   | Нет           |
 | `is_object_id`                                       | Признак того, что запрос выполняется к документу MongoDB по `ObjectID`.<br/><br/>Значение по умолчанию: `false`.                                                                                                                                                                                                                                              | Нет          |
 
 ## Смотрите также {#smotrite-takzhe}
 
 -   [Функции для работы с внешними словарями](../../../sql-reference/functions/ext-dict-functions.md).
-

From 3fc988f6e6df3777edc41e66ad77d2e2f2727706 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Sun, 4 Apr 2021 12:39:04 +0300
Subject: [PATCH 060/152] Polygon link added

---
 .../external-dictionaries/external-dicts-dict-structure.md      | 2 +-
 .../external-dictionaries/external-dicts-dict-structure.md      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index 05cb7c1c571..b787af003f1 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -159,7 +159,7 @@ Configuration fields:
 | Tag                                                  | Description                                                                                                                                                                                                                                                                                                                                     | Required |
 |------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
 | `name`                                               | Column name.                                                                                                                                                                                                                                                                                                                                    | Yes      |
-| `type`                                               | ClickHouse data type.<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed) dictionaries. In [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache), `Polygon`, [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported.       | Yes      |
+| `type`                                               | ClickHouse data type.<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed) dictionaries. In [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache), [Polygon](external-dicts-dict-polygon.md), [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported.       | Yes      |
 | `null_value`                                         | Default value for a non-existing element.<br/>In the example, it is an empty string. [NULL](../../syntax.md#null-literal) value can be used only for the `Nullable` types (see the previous line with types description).                                                                                                                                                                                                                       | Yes      |
 | `expression`                                         | [Expression](../../../sql-reference/syntax.md#syntax-expressions) that ClickHouse executes on the value.<br/>The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.<br/><br/>Default value: no expression.                                                              | No       |
 | <a name="hierarchical-dict-attr"></a> `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).<br/><br/>Default value: `false`.                                                                                               | No       |
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index 9728f3eb29a..a65e967d1ce 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -159,7 +159,7 @@ CREATE DICTIONARY somename (
 | Тег                                                  | Описание                                                                                                                                                                                                                                                                                                                                                      | Обязательный |
 |------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
 | `name`                                               | Имя столбца.                                                                                                                                                                                                                                                                                                                                                  | Да           |
-| `type`                                               | Тип данных ClickHouse.<br/>ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`. <br/>[Nullable](../../../sql-reference/data-types/nullable.md) в настоящее время поддерживается для словарей [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed). Для словарей [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache), `Polygon` и [IPTrie](external-dicts-dict-layout.md#ip-trie) `Nullable`-типы не поддерживаются. | Да           |
+| `type`                                               | Тип данных ClickHouse.<br/>ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`. <br/>[Nullable](../../../sql-reference/data-types/nullable.md) в настоящее время поддерживается для словарей [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed). Для словарей [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache), [Polygon](external-dicts-dict-polygon.md) и [IPTrie](external-dicts-dict-layout.md#ip-trie) `Nullable`-типы не поддерживаются. | Да           |
 | `null_value`                                         | Значение по умолчанию для несуществующего элемента.<br/>В примере это пустая строка. Значение [NULL](../../syntax.md#null-literal) можно указывать только для типов `Nullable` (см. предыдущую строку с описанием типов).                                                                                                                                                                                                                                          | Да           |
 | `expression`                                         | [Выражение](../../syntax.md#syntax-expressions), которое ClickHouse выполняет со значением.<br/>Выражением может быть имя столбца в удаленной SQL базе. Таким образом, вы можете использовать его для создания псевдонима удаленного столбца.<br/><br/>Значение по умолчанию: нет выражения.                                                                  | Нет          |
 | <a name="hierarchical-dict-attr"></a> `hierarchical` | Если `true`, то атрибут содержит ключ предка для текущего элемента. Смотрите [Иерархические словари](external-dicts-dict-hierarchical.md).<br/><br/>Значение по умолчанию: `false`.                                                                                                                                                                                   | Нет           |

From da3f973eac9d4ce9eca0ded54eb12a72bea98a46 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Sun, 4 Apr 2021 12:41:30 +0300
Subject: [PATCH 061/152] Minor fixes

---
 .../external-dictionaries/external-dicts-dict-structure.md  | 2 +-
 .../external-dictionaries/external-dicts-dict-structure.md  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index b787af003f1..02f0792ca77 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -166,7 +166,7 @@ Configuration fields:
 | `injective`                                          | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).<br/>If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.<br/><br/>Default value: `false`. | No       |
 | `is_object_id`                                       | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.<br/><br/>Default value: `false`.                                                                                                                                                                                                                            | No       |
 
-## See Also {#see-also}
+**See Also**
 
 -   [Functions for working with external dictionaries](../../../sql-reference/functions/ext-dict-functions.md).
 
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index a65e967d1ce..85126bda9c4 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -3,7 +3,7 @@ toc_priority: 44
 toc_title: "Ключ и поля словаря"
 ---
 
-# Ключ и поля словаря {#kliuch-i-polia-slovaria}
+# Ключ и поля словаря {#dictionary-key-and-fields}
 
 Секция `<structure>` описывает ключ словаря и поля, доступные для запросов.
 
@@ -88,7 +88,7 @@ PRIMARY KEY Id
 
 -   `PRIMARY KEY` – имя столбца с ключами.
 
-### Составной ключ {#sostavnoi-kliuch}
+### Составной ключ {#composite-key}
 
 Ключом может быть кортеж (`tuple`) из полей произвольных типов. В этом случае [layout](external-dicts-dict-layout.md) должен быть `complex_key_hashed` или `complex_key_cache`.
 
@@ -165,6 +165,6 @@ CREATE DICTIONARY somename (
 | <a name="hierarchical-dict-attr"></a> `hierarchical` | Если `true`, то атрибут содержит ключ предка для текущего элемента. Смотрите [Иерархические словари](external-dicts-dict-hierarchical.md).<br/><br/>Значение по умолчанию: `false`.                                                                                                                                                                                   | Нет           |
 | `is_object_id`                                       | Признак того, что запрос выполняется к документу MongoDB по `ObjectID`.<br/><br/>Значение по умолчанию: `false`.                                                                                                                                                                                                                                              | Нет          |
 
-## Смотрите также {#smotrite-takzhe}
+**Смотрите также**
 
 -   [Функции для работы с внешними словарями](../../../sql-reference/functions/ext-dict-functions.md).

From 3f33ab064e752e79cac99977af04ec572aa64a4c Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Sun, 4 Apr 2021 12:47:33 +0300
Subject: [PATCH 062/152] Added Polygon NULL support from #21890

---
 .../external-dictionaries/external-dicts-dict-structure.md      | 2 +-
 .../external-dictionaries/external-dicts-dict-structure.md      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index 02f0792ca77..f22d2a0b59e 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -159,7 +159,7 @@ Configuration fields:
 | Tag                                                  | Description                                                                                                                                                                                                                                                                                                                                     | Required |
 |------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
 | `name`                                               | Column name.                                                                                                                                                                                                                                                                                                                                    | Yes      |
-| `type`                                               | ClickHouse data type.<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed) dictionaries. In [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache), [Polygon](external-dicts-dict-polygon.md), [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported.       | Yes      |
+| `type`                                               | ClickHouse data type.<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md) dictionaries. In [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache), [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported.       | Yes      |
 | `null_value`                                         | Default value for a non-existing element.<br/>In the example, it is an empty string. [NULL](../../syntax.md#null-literal) value can be used only for the `Nullable` types (see the previous line with types description).                                                                                                                                                                                                                       | Yes      |
 | `expression`                                         | [Expression](../../../sql-reference/syntax.md#syntax-expressions) that ClickHouse executes on the value.<br/>The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.<br/><br/>Default value: no expression.                                                              | No       |
 | <a name="hierarchical-dict-attr"></a> `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).<br/><br/>Default value: `false`.                                                                                               | No       |
diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index 85126bda9c4..609ee225ce2 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -159,7 +159,7 @@ CREATE DICTIONARY somename (
 | Тег                                                  | Описание                                                                                                                                                                                                                                                                                                                                                      | Обязательный |
 |------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
 | `name`                                               | Имя столбца.                                                                                                                                                                                                                                                                                                                                                  | Да           |
-| `type`                                               | Тип данных ClickHouse.<br/>ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`. <br/>[Nullable](../../../sql-reference/data-types/nullable.md) в настоящее время поддерживается для словарей [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed). Для словарей [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache), [Polygon](external-dicts-dict-polygon.md) и [IPTrie](external-dicts-dict-layout.md#ip-trie) `Nullable`-типы не поддерживаются. | Да           |
+| `type`                                               | Тип данных ClickHouse.<br/>ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`. <br/>[Nullable](../../../sql-reference/data-types/nullable.md) в настоящее время поддерживается для словарей [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md). Для словарей [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) и [IPTrie](external-dicts-dict-layout.md#ip-trie) `Nullable`-типы не поддерживаются. | Да           |
 | `null_value`                                         | Значение по умолчанию для несуществующего элемента.<br/>В примере это пустая строка. Значение [NULL](../../syntax.md#null-literal) можно указывать только для типов `Nullable` (см. предыдущую строку с описанием типов).                                                                                                                                                                                                                                          | Да           |
 | `expression`                                         | [Выражение](../../syntax.md#syntax-expressions), которое ClickHouse выполняет со значением.<br/>Выражением может быть имя столбца в удаленной SQL базе. Таким образом, вы можете использовать его для создания псевдонима удаленного столбца.<br/><br/>Значение по умолчанию: нет выражения.                                                                  | Нет          |
 | <a name="hierarchical-dict-attr"></a> `hierarchical` | Если `true`, то атрибут содержит ключ предка для текущего элемента. Смотрите [Иерархические словари](external-dicts-dict-hierarchical.md).<br/><br/>Значение по умолчанию: `false`.                                                                                                                                                                                   | Нет           |

From 043bd11d2eaa8aeee63e28b67c465fa9c9e8da13 Mon Sep 17 00:00:00 2001
From: fuqi <fuqi@growingio.com>
Date: Sun, 4 Apr 2021 18:00:30 +0800
Subject: [PATCH 063/152] fix dist map type select

---
 src/Parsers/ASTFunction.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index c5f2a3034cd..4a68e8fde92 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -483,7 +483,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
 
         if (!written && 0 == strcmp(name.c_str(), "map"))
         {
-            settings.ostr << (settings.hilite ? hilite_operator : "") << 'map(' << (settings.hilite ? hilite_none : "");
+            settings.ostr << (settings.hilite ? hilite_operator : "") << "map(" << (settings.hilite ? hilite_none : "");
             for (size_t i = 0; i < arguments->children.size(); ++i)
             {
                 if (i != 0)

From ff86c21e65385405d80b009379608d972751b451 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 2 Apr 2021 23:16:04 +0300
Subject: [PATCH 064/152] Dictionary update field fix

---
 src/Common/HashTable/HashMap.h                |  19 +-
 src/Dictionaries/DictionaryHelpers.h          | 102 +++++++
 src/Dictionaries/FlatDictionary.cpp           | 275 +++++++-----------
 src/Dictionaries/FlatDictionary.h             |  30 +-
 src/Dictionaries/HashedDictionary.cpp         |  83 +-----
 src/Dictionaries/HashedDictionary.h           |   7 +-
 .../01785_dictionary_element_count.reference  |   8 +
 .../01785_dictionary_element_count.sql        |  91 ++++++
 tests/queries/skip_list.json                  |   3 +-
 9 files changed, 353 insertions(+), 265 deletions(-)
 create mode 100644 tests/queries/0_stateless/01785_dictionary_element_count.reference
 create mode 100644 tests/queries/0_stateless/01785_dictionary_element_count.sql

diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h
index c3cd09eccb2..466943c698b 100644
--- a/src/Common/HashTable/HashMap.h
+++ b/src/Common/HashTable/HashMap.h
@@ -22,19 +22,28 @@ struct PairNoInit
     First first;
     Second second;
 
-    PairNoInit() {}
+    PairNoInit() = default;
 
-    template <typename First_>
-    PairNoInit(First_ && first_, NoInitTag) : first(std::forward<First_>(first_))
+    template <typename FirstValue>
+    PairNoInit(FirstValue && first_, NoInitTag)
+        : first(std::forward<FirstValue>(first_))
     {
     }
 
-    template <typename First_, typename Second_>
-    PairNoInit(First_ && first_, Second_ && second_) : first(std::forward<First_>(first_)), second(std::forward<Second_>(second_))
+    template <typename FirstValue, typename SecondValue>
+    PairNoInit(FirstValue && first_, SecondValue && second_)
+        : first(std::forward<FirstValue>(first_))
+        , second(std::forward<SecondValue>(second_))
     {
     }
 };
 
+template <typename First, typename Second>
+PairNoInit<std::decay_t<First>, std::decay_t<Second>> makePairNoInit(First && first, Second && second)
+{
+    return PairNoInit<std::decay_t<First>, std::decay_t<Second>>(std::forward<First>(first), std::forward<Second>(second));
+}
+
 
 template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
 struct HashMapCell
diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
index 3e7063bb9ef..d68c634fc2d 100644
--- a/src/Dictionaries/DictionaryHelpers.h
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -1,11 +1,14 @@
 #pragma once
 
 #include <Common/Arena.h>
+#include <Common/HashTable/HashMap.h>
 #include <Columns/IColumn.h>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnVector.h>
+#include <DataStreams/IBlockInputStream.h>
 #include <DataTypes/DataTypesDecimal.h>
+#include <Core/Block.h>
 #include <Dictionaries/IDictionary.h>
 #include <Dictionaries/DictionaryStructure.h>
 
@@ -416,6 +419,105 @@ private:
     Arena * complex_key_arena;
 };
 
+/** Merge block with blocks from stream. If there are duplicate keys in block they are filtered out.
+  * In result block_to_update will be merged with each block readed from stream.
+  * Note: readPrefix readImpl readSuffix will be called on stream object during function execution.
+  */
+template <DictionaryKeyType dictionary_key_type>
+void mergeBlockWithStream(
+    size_t key_column_size [[maybe_unused]],
+    Block & block_to_update [[maybe_unused]],
+    BlockInputStreamPtr & stream [[maybe_unused]])
+{
+    using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::simple, UInt64, StringRef>;
+    static_assert(dictionary_key_type != DictionaryKeyType::range, "Range key type is not supported by updatePreviousyLoadedBlockWithStream");
+
+    Columns saved_block_key_columns;
+    saved_block_key_columns.reserve(key_column_size);
+
+    /// Split into keys columns and attribute columns
+    for (size_t i = 0; i < key_column_size; ++i)
+        saved_block_key_columns.emplace_back(block_to_update.safeGetByPosition(i).column);
+
+    DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
+    DictionaryKeysExtractor<dictionary_key_type> saved_keys_extractor(saved_block_key_columns, arena_holder.getComplexKeyArena());
+    auto saved_keys_extracted_from_block = saved_keys_extractor.extractAllKeys();
+
+    IColumn::Filter filter(saved_keys_extracted_from_block.size(), true);
+
+    HashMap<KeyType, size_t> saved_key_to_index;
+    saved_key_to_index.reserve(saved_keys_extracted_from_block.size());
+
+    size_t indexes_to_remove_count = 0;
+
+    for (size_t i = 0; i < saved_keys_extracted_from_block.size(); ++i)
+    {
+        auto saved_key = saved_keys_extracted_from_block[i];
+        auto [it, was_inserted] = saved_key_to_index.insert(makePairNoInit(saved_key, i));
+
+        if (!was_inserted)
+        {
+            size_t index_to_remove = it->getMapped();
+            filter[index_to_remove] = false;
+            it->getMapped() = i;
+            ++indexes_to_remove_count;
+        }
+    }
+
+    auto result_fetched_columns = block_to_update.cloneEmptyColumns();
+
+    stream->readPrefix();
+
+    while (Block block = stream->read())
+    {
+        Columns block_key_columns;
+        block_key_columns.reserve(key_column_size);
+
+        /// Split into keys columns and attribute columns
+        for (size_t i = 0; i < key_column_size; ++i)
+            block_key_columns.emplace_back(block.safeGetByPosition(i).column);
+
+        DictionaryKeysExtractor<dictionary_key_type> update_keys_extractor(block_key_columns, arena_holder.getComplexKeyArena());
+        PaddedPODArray<KeyType> update_keys = update_keys_extractor.extractAllKeys();
+
+        for (auto update_key : update_keys)
+        {
+            const auto * it = saved_key_to_index.find(update_key);
+            if (it != nullptr)
+            {
+                size_t index_to_filter = it->getMapped();
+                filter[index_to_filter] = false;
+                ++indexes_to_remove_count;
+            }
+        }
+
+        size_t rows = block.rows();
+
+        for (size_t column_index = 0; column_index < block.columns(); ++column_index)
+        {
+            const auto update_column = block.safeGetByPosition(column_index).column;
+            MutableColumnPtr & result_fetched_column = result_fetched_columns[column_index];
+
+            result_fetched_column->insertRangeFrom(*update_column, 0, rows);
+        }
+    }
+
+    stream->readSuffix();
+
+    size_t result_fetched_rows = result_fetched_columns.front()->size();
+    size_t filter_hint = filter.size() - indexes_to_remove_count;
+
+    for (size_t column_index = 0; column_index < block_to_update.columns(); ++column_index)
+    {
+        auto & column = block_to_update.getByPosition(column_index).column;
+        column = column->filter(filter, filter_hint);
+
+        MutableColumnPtr mutable_column = column->assumeMutable();
+        const IColumn & fetched_column = *result_fetched_columns[column_index];
+        mutable_column->insertRangeFrom(fetched_column, 0, result_fetched_rows);
+    }
+}
+
 /**
  * Returns ColumnVector data as PaddedPodArray.
 
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 2d8d208d76b..def5ae75690 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -32,14 +32,14 @@ FlatDictionary::FlatDictionary(
     DictionarySourcePtr source_ptr_,
     const DictionaryLifetime dict_lifetime_,
     bool require_nonempty_,
-    BlockPtr saved_block_)
+    BlockPtr previously_loaded_block_)
     : IDictionary(dict_id_)
     , dict_struct(dict_struct_)
     , source_ptr{std::move(source_ptr_)}
     , dict_lifetime(dict_lifetime_)
     , require_nonempty(require_nonempty_)
-    , loaded_ids(initial_array_size, false)
-    , saved_block{std::move(saved_block_)}
+    , loaded_keys(initial_array_size, false)
+    , previously_loaded_block(std::move(previously_loaded_block_))
 {
     createAttributes();
     loadData();
@@ -126,20 +126,19 @@ ColumnPtr FlatDictionary::getColumn(
 ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
     PaddedPODArray<UInt64> backup_storage;
-    const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
+    const auto & keys = getColumnVectorData(this, key_columns.front(), backup_storage);
+    size_t keys_size = keys.size();
 
-    auto result = ColumnUInt8::create(ext::size(ids));
-    auto& out = result->getData();
+    auto result = ColumnUInt8::create(keys_size);
+    auto & out = result->getData();
 
-    const auto ids_count = ext::size(ids);
-
-    for (const auto i : ext::range(0, ids_count))
+    for (size_t key_index = 0; key_index < keys_size; ++key_index)
     {
-        const auto id = ids[i];
-        out[i] = id < loaded_ids.size() && loaded_ids[id];
+        const auto key = keys[key_index];
+        out[key_index] = key < loaded_keys.size() && loaded_keys[key];
     }
 
-    query_count.fetch_add(ids_count, std::memory_order_relaxed);
+    query_count.fetch_add(keys_size, std::memory_order_relaxed);
 
     return result;
 }
@@ -153,22 +152,14 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &
     const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
 
     const UInt64 null_value = std::get<UInt64>(hierarchical_attribute.null_values);
-    const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.arrays);
+    const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.container);
 
-    auto is_key_valid_func = [&, this](auto & key)
-    {
-        return key < loaded_ids.size() && loaded_ids[key];
-    };
+    auto is_key_valid_func = [&, this](auto & key) { return key < loaded_keys.size() && loaded_keys[key]; };
 
     auto get_parent_key_func = [&, this](auto & hierarchy_key)
     {
-        std::optional<UInt64> result;
-
-        if (hierarchy_key >= loaded_ids.size() || !loaded_ids[hierarchy_key])
-            return result;
-
-        result = parent_keys[hierarchy_key];
-
+        bool is_key_valid = hierarchy_key < loaded_keys.size() && loaded_keys[hierarchy_key];
+        std::optional<UInt64> result = is_key_valid ? std::make_optional(parent_keys[hierarchy_key]) : std::nullopt;
         return result;
     };
 
@@ -194,22 +185,14 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
     const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
 
     const UInt64 null_value = std::get<UInt64>(hierarchical_attribute.null_values);
-    const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.arrays);
+    const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.container);
 
-    auto is_key_valid_func = [&, this](auto & key)
-    {
-        return key < loaded_ids.size() && loaded_ids[key];
-    };
+    auto is_key_valid_func = [&, this](auto & key) { return key < loaded_keys.size() && loaded_keys[key]; };
 
     auto get_parent_key_func = [&, this](auto & hierarchy_key)
     {
-        std::optional<UInt64> result;
-
-        if (hierarchy_key >= loaded_ids.size() || !loaded_ids[hierarchy_key])
-            return result;
-
-        result = parent_keys[hierarchy_key];
-
+        bool is_key_valid = hierarchy_key < loaded_keys.size() && loaded_keys[hierarchy_key];
+        std::optional<UInt64> result = is_key_valid ? std::make_optional(parent_keys[hierarchy_key]) : std::nullopt;
         return result;
     };
 
@@ -230,7 +213,7 @@ ColumnPtr FlatDictionary::getDescendants(
 
     size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
     const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
-    const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.arrays);
+    const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.container);
 
     HashMap<UInt64, PaddedPODArray<UInt64>> parent_to_child;
 
@@ -238,7 +221,7 @@ ColumnPtr FlatDictionary::getDescendants(
     {
         auto parent_key = parent_keys[i];
 
-        if (loaded_ids[i])
+        if (loaded_keys[i])
             parent_to_child[parent_key].emplace_back(static_cast<UInt64>(i));
     }
 
@@ -260,22 +243,34 @@ void FlatDictionary::createAttributes()
 
 void FlatDictionary::blockToAttributes(const Block & block)
 {
-    const IColumn & id_column = *block.safeGetByPosition(0).column;
-    element_count += id_column.size();
+    const auto keys_column = block.safeGetByPosition(0).column;
 
-    for (const size_t attribute_idx : ext::range(0, attributes.size()))
+    DictionaryKeysArenaHolder<DictionaryKeyType::simple> arena_holder;
+    DictionaryKeysExtractor<DictionaryKeyType::simple> keys_extractor({ keys_column }, arena_holder.getComplexKeyArena());
+    auto keys = keys_extractor.extractAllKeys();
+
+    size_t key_offset = 1;
+    for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
     {
-        const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
-        Attribute & attribute = attributes[attribute_idx];
+        const IColumn & attribute_column = *block.safeGetByPosition(attribute_index + key_offset).column;
+        Attribute & attribute = attributes[attribute_index];
 
-        for (const auto row_idx : ext::range(0, id_column.size()))
-            setAttributeValue(attribute, id_column[row_idx].get<UInt64>(), attribute_column[row_idx]);
+        for (size_t i = 0; i < keys.size(); ++i)
+        {
+            auto key = keys[i];
+
+            if (key < loaded_keys.size() && loaded_keys[key])
+                continue;
+
+            setAttributeValue(attribute, key, attribute_column[i]);
+            ++element_count;
+        }
     }
 }
 
 void FlatDictionary::updateData()
 {
-    if (!saved_block || saved_block->rows() == 0)
+    if (!previously_loaded_block || previously_loaded_block->rows() == 0)
     {
         auto stream = source_ptr->loadUpdatedAll();
         stream->readPrefix();
@@ -283,12 +278,13 @@ void FlatDictionary::updateData()
         while (const auto block = stream->read())
         {
             /// We are using this to keep saved data if input stream consists of multiple blocks
-            if (!saved_block)
-                saved_block = std::make_shared<DB::Block>(block.cloneEmpty());
-            for (const auto attribute_idx : ext::range(0, attributes.size() + 1))
+            if (!previously_loaded_block)
+                previously_loaded_block = std::make_shared<DB::Block>(block.cloneEmpty());
+
+            for (size_t column_index = 0; column_index < block.columns(); ++column_index)
             {
-                const IColumn & update_column = *block.getByPosition(attribute_idx).column.get();
-                MutableColumnPtr saved_column = saved_block->getByPosition(attribute_idx).column->assumeMutable();
+                const IColumn & update_column = *block.getByPosition(column_index).column.get();
+                MutableColumnPtr saved_column = previously_loaded_block->getByPosition(column_index).column->assumeMutable();
                 saved_column->insertRangeFrom(update_column, 0, update_column.size());
             }
         }
@@ -297,51 +293,14 @@ void FlatDictionary::updateData()
     else
     {
         auto stream = source_ptr->loadUpdatedAll();
-        stream->readPrefix();
-
-        while (Block block = stream->read())
-        {
-            const auto & saved_id_column = *saved_block->safeGetByPosition(0).column;
-            const auto & update_id_column = *block.safeGetByPosition(0).column;
-
-            std::unordered_map<UInt64, std::vector<size_t>> update_ids;
-            for (size_t row = 0; row < update_id_column.size(); ++row)
-            {
-                const auto id = update_id_column.get64(row);
-                update_ids[id].push_back(row);
-            }
-
-            const size_t saved_rows = saved_id_column.size();
-            IColumn::Filter filter(saved_rows);
-            std::unordered_map<UInt64, std::vector<size_t>>::iterator it;
-
-            for (size_t row = 0; row < saved_id_column.size(); ++row)
-            {
-                auto id = saved_id_column.get64(row);
-                it = update_ids.find(id);
-
-                if (it != update_ids.end())
-                    filter[row] = 0;
-                else
-                    filter[row] = 1;
-            }
-
-            auto block_columns = block.mutateColumns();
-            for (const auto attribute_idx : ext::range(0, attributes.size() + 1))
-            {
-                auto & column = saved_block->safeGetByPosition(attribute_idx).column;
-                const auto & filtered_column = column->filter(filter, -1);
-
-                block_columns[attribute_idx]->insertRangeFrom(*filtered_column.get(), 0, filtered_column->size());
-            }
-
-            saved_block->setColumns(std::move(block_columns));
-        }
-        stream->readSuffix();
+        mergeBlockWithStream<DictionaryKeyType::simple>(
+            dict_struct.getKeysSize(),
+            *previously_loaded_block,
+            stream);
     }
 
-    if (saved_block)
-        blockToAttributes(*saved_block.get());
+    if (previously_loaded_block)
+        blockToAttributes(*previously_loaded_block.get());
 }
 
 void FlatDictionary::loadData()
@@ -363,24 +322,6 @@ void FlatDictionary::loadData()
         throw Exception{full_name + ": dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY};
 }
 
-
-template <typename T>
-void FlatDictionary::addAttributeSize(const Attribute & attribute)
-{
-    const auto & array_ref = std::get<ContainerType<T>>(attribute.arrays);
-    bytes_allocated += sizeof(PaddedPODArray<T>) + array_ref.allocated_bytes();
-    bucket_count = array_ref.capacity();
-}
-
-template <>
-void FlatDictionary::addAttributeSize<String>(const Attribute & attribute)
-{
-    const auto & array_ref = std::get<ContainerType<StringRef>>(attribute.arrays);
-    bytes_allocated += sizeof(PaddedPODArray<StringRef>) + array_ref.allocated_bytes();
-    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-    bucket_count = array_ref.capacity();
-}
-
 void FlatDictionary::calculateBytesAllocated()
 {
     bytes_allocated += attributes.size() * sizeof(attributes.front());
@@ -391,8 +332,14 @@ void FlatDictionary::calculateBytesAllocated()
         {
             using Type = std::decay_t<decltype(dictionary_attribute_type)>;
             using AttributeType = typename Type::AttributeType;
+            using ValueType = DictionaryValueType<AttributeType>;
 
-            addAttributeSize<AttributeType>(attribute);
+            const auto & container = std::get<ContainerType<ValueType>>(attribute.container);
+            bytes_allocated += sizeof(PaddedPODArray<ValueType>) + container.allocated_bytes();
+            bucket_count = container.capacity();
+
+            if constexpr (std::is_same_v<ValueType, StringRef>)
+                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
         };
 
         callOnDictionaryAttributeType(attribute.type, type_call);
@@ -405,7 +352,7 @@ void FlatDictionary::createAttributeImpl(Attribute & attribute, const Field & nu
 {
     attribute.null_values = T(null_value.get<T>());
     const auto & null_value_ref = std::get<T>(attribute.null_values);
-    attribute.arrays.emplace<ContainerType<T>>(initial_array_size, null_value_ref);
+    attribute.container.emplace<ContainerType<T>>(initial_array_size, null_value_ref);
 }
 
 template <>
@@ -415,7 +362,7 @@ void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Fi
     const String & string = null_value.get<String>();
     const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
     attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
-    attribute.arrays.emplace<ContainerType<StringRef>>(initial_array_size, StringRef(string_in_arena, string.size()));
+    attribute.container.emplace<ContainerType<StringRef>>(initial_array_size, StringRef(string_in_arena, string.size()));
 }
 
 FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
@@ -436,57 +383,64 @@ FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttrib
     return attr;
 }
 
-
 template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void FlatDictionary::getItemsImpl(
     const Attribute & attribute,
-    const PaddedPODArray<UInt64> & ids,
+    const PaddedPODArray<UInt64> & keys,
     ValueSetter && set_value,
     DefaultValueExtractor & default_value_extractor) const
 {
-    const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays);
-    const auto rows = ext::size(ids);
+    const auto & container = std::get<ContainerType<AttributeType>>(attribute.container);
+    const auto rows = keys.size();
 
-    for (const auto row : ext::range(0, rows))
+    for (size_t row = 0; row < rows; ++row)
     {
-        const auto id = ids[row];
-        set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : default_value_extractor[row]);
+        const auto key = keys[row];
+
+        if (key < loaded_keys.size() && loaded_keys[key])
+            set_value(row, static_cast<OutputType>(container[key]));
+        else
+            set_value(row, default_value_extractor[row]);
     }
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
 }
 
 template <typename T>
-void FlatDictionary::resize(Attribute & attribute, const UInt64 id)
+void FlatDictionary::resize(Attribute & attribute, UInt64 key)
 {
-    if (id >= max_array_size)
-        throw Exception{full_name + ": identifier should be less than " + toString(max_array_size), ErrorCodes::ARGUMENT_OUT_OF_BOUND};
+    if (key >= max_array_size)
+        throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
+            "({}): identifier should be less than ({})",
+            full_name,
+            toString(max_array_size));
 
-    auto & array = std::get<ContainerType<T>>(attribute.arrays);
-    if (id >= array.size())
+    auto & container = std::get<ContainerType<T>>(attribute.container);
+
+    if (key >= container.size())
     {
-        const size_t elements_count = id + 1; //id=0 -> elements_count=1
-        loaded_ids.resize(elements_count, false);
-        array.resize_fill(elements_count, std::get<T>(attribute.null_values));
+        const size_t elements_count = key + 1; //id=0 -> elements_count=1
+        loaded_keys.resize(elements_count, false);
+        container.resize_fill(elements_count, std::get<T>(attribute.null_values));
     }
 }
 
 template <typename T>
-void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const UInt64 id, const T & value)
+void FlatDictionary::setAttributeValueImpl(Attribute & attribute, UInt64 key, const T & value)
 {
-    auto & array = std::get<ContainerType<T>>(attribute.arrays);
-    array[id] = value;
-    loaded_ids[id] = true;
+    auto & array = std::get<ContainerType<T>>(attribute.container);
+    array[key] = value;
+    loaded_keys[key] = true;
 }
 
 template <>
-void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, const UInt64 id, const String & value)
+void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, UInt64 key, const String & value)
 {
     const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
-    setAttributeValueImpl(attribute, id, StringRef{string_in_arena, value.size()});
+    setAttributeValueImpl(attribute, key, StringRef{string_in_arena, value.size()});
 }
 
-void FlatDictionary::setAttributeValue(Attribute & attribute, const UInt64 id, const Field & value)
+void FlatDictionary::setAttributeValue(Attribute & attribute, const UInt64 key, const Field & value)
 {
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -494,44 +448,36 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const UInt64 id, c
         using AttributeType = typename Type::AttributeType;
         using ResizeType = std::conditional_t<std::is_same_v<AttributeType, String>, StringRef, AttributeType>;
 
-        resize<ResizeType>(attribute, id);
+        resize<ResizeType>(attribute, key);
 
         if (attribute.nullable_set)
         {
             if (value.isNull())
             {
-                attribute.nullable_set->insert(id);
-                loaded_ids[id] = true;
+                attribute.nullable_set->insert(key);
+                loaded_keys[key] = true;
                 return;
             }
-            else
-            {
-                attribute.nullable_set->erase(id);
-            }
         }
 
-        setAttributeValueImpl<AttributeType>(attribute, id, value.get<AttributeType>());
+        setAttributeValueImpl<AttributeType>(attribute, key, value.get<AttributeType>());
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
 }
 
-PaddedPODArray<UInt64> FlatDictionary::getIds() const
-{
-    const auto ids_count = ext::size(loaded_ids);
-
-    PaddedPODArray<UInt64> ids;
-    ids.reserve(ids_count);
-
-    for (auto idx : ext::range(0, ids_count))
-        if (loaded_ids[idx])
-            ids.push_back(idx);
-    return ids;
-}
-
 BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    return std::make_shared<DictionaryBlockInputStream>(shared_from_this(), max_block_size, getIds(), column_names);
+    const auto keys_count = loaded_keys.size();
+
+    PaddedPODArray<UInt64> keys;
+    keys.reserve(keys_count);
+
+    for (size_t key_index = 0; key_index < keys_count; ++key_index)
+        if (loaded_keys[key_index])
+            keys.push_back(key_index);
+
+    return std::make_shared<DictionaryBlockInputStream>(shared_from_this(), max_block_size, std::move(keys), column_names);
 }
 
 void registerDictionaryFlat(DictionaryFactory & factory)
@@ -543,19 +489,20 @@ void registerDictionaryFlat(DictionaryFactory & factory)
                              DictionarySourcePtr source_ptr) -> DictionaryPtr
     {
         if (dict_struct.key)
-            throw Exception{"'key' is not supported for dictionary of layout 'flat'", ErrorCodes::UNSUPPORTED_METHOD};
+            throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is not supported for dictionary of layout 'flat'");
 
         if (dict_struct.range_min || dict_struct.range_max)
-            throw Exception{full_name
-                                + ": elements .structure.range_min and .structure.range_max should be defined only "
-                                  "for a dictionary of layout 'range_hashed'",
-                            ErrorCodes::BAD_ARGUMENTS};
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                            "({}): elements .structure.range_min and .structure.range_max should be defined only "
+                            "for a dictionary of layout 'range_hashed'",
+                            full_name);
 
         const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
         const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
         const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
         return std::make_unique<FlatDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
     };
+
     factory.registerLayout("flat", create_layout, false);
 }
 
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index 09721bf1a99..b32d211e15c 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -32,7 +32,7 @@ public:
         DictionarySourcePtr source_ptr_,
         const DictionaryLifetime dict_lifetime_,
         bool require_nonempty_,
-        BlockPtr saved_block_ = nullptr);
+        BlockPtr previously_loaded_block_ = nullptr);
 
     std::string getTypeName() const override { return "Flat"; }
 
@@ -48,7 +48,7 @@ public:
 
     std::shared_ptr<const IExternalLoadable> clone() const override
     {
-        return std::make_shared<FlatDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, saved_block);
+        return std::make_shared<FlatDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, previously_loaded_block);
     }
 
     const IDictionarySource * getSource() const override { return source_ptr.get(); }
@@ -133,7 +133,7 @@ private:
             ContainerType<Float32>,
             ContainerType<Float64>,
             ContainerType<StringRef>>
-            arrays;
+            container;
 
         std::unique_ptr<Arena> string_arena;
     };
@@ -143,9 +143,6 @@ private:
     void updateData();
     void loadData();
 
-    template <typename T>
-    void addAttributeSize(const Attribute & attribute);
-
     void calculateBytesAllocated();
 
     template <typename T>
@@ -156,41 +153,32 @@ private:
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
         const Attribute & attribute,
-        const PaddedPODArray<UInt64> & ids,
+        const PaddedPODArray<UInt64> & keys,
         ValueSetter && set_value,
         DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
-    void resize(Attribute & attribute, const UInt64 id);
+    void resize(Attribute & attribute, UInt64 key);
 
     template <typename T>
-    void setAttributeValueImpl(Attribute & attribute, const UInt64 id, const T & value);
+    void setAttributeValueImpl(Attribute & attribute, UInt64 key, const T & value);
 
-    void setAttributeValue(Attribute & attribute, const UInt64 id, const Field & value);
-
-    const Attribute & getAttribute(const std::string & attribute_name) const;
-
-    template <typename ChildType, typename AncestorType>
-    void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
-
-    PaddedPODArray<UInt64> getIds() const;
+    void setAttributeValue(Attribute & attribute, UInt64 key, const Field & value);
 
     const DictionaryStructure dict_struct;
     const DictionarySourcePtr source_ptr;
     const DictionaryLifetime dict_lifetime;
     const bool require_nonempty;
 
-    std::map<std::string, size_t> attribute_index_by_name;
     std::vector<Attribute> attributes;
-    std::vector<bool> loaded_ids;
+    std::vector<bool> loaded_keys;
 
     size_t bytes_allocated = 0;
     size_t element_count = 0;
     size_t bucket_count = 0;
     mutable std::atomic<size_t> query_count{0};
 
-    /// TODO: Remove
-    BlockPtr saved_block;
+    BlockPtr previously_loaded_block;
 };
 
 }
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 3fbab4c3e39..aebaffca1f1 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -1,9 +1,5 @@
 #include "HashedDictionary.h"
 
-#include <ext/size.h>
-
-#include <absl/container/flat_hash_map.h>
-
 #include <Core/Defines.h>
 #include <DataTypes/DataTypesDecimal.h>
 #include <Columns/ColumnsNumber.h>
@@ -46,13 +42,13 @@ HashedDictionary<dictionary_key_type, sparse>::HashedDictionary(
     DictionarySourcePtr source_ptr_,
     const DictionaryLifetime dict_lifetime_,
     bool require_nonempty_,
-    BlockPtr saved_block_)
+    BlockPtr previously_loaded_block_)
     : IDictionary(dict_id_)
     , dict_struct(dict_struct_)
     , source_ptr(std::move(source_ptr_))
     , dict_lifetime(dict_lifetime_)
     , require_nonempty(require_nonempty_)
-    , saved_block(std::move(saved_block_))
+    , previously_loaded_block(std::move(previously_loaded_block_))
 {
     createAttributes();
     loadData();
@@ -347,7 +343,7 @@ void HashedDictionary<dictionary_key_type, sparse>::createAttributes()
 template <DictionaryKeyType dictionary_key_type, bool sparse>
 void HashedDictionary<dictionary_key_type, sparse>::updateData()
 {
-    if (!saved_block || saved_block->rows() == 0)
+    if (!previously_loaded_block || previously_loaded_block->rows() == 0)
     {
         auto stream = source_ptr->loadUpdatedAll();
         stream->readPrefix();
@@ -355,13 +351,13 @@ void HashedDictionary<dictionary_key_type, sparse>::updateData()
         while (const auto block = stream->read())
         {
             /// We are using this to keep saved data if input stream consists of multiple blocks
-            if (!saved_block)
-                saved_block = std::make_shared<DB::Block>(block.cloneEmpty());
+            if (!previously_loaded_block)
+                previously_loaded_block = std::make_shared<DB::Block>(block.cloneEmpty());
 
             for (const auto attribute_idx : ext::range(0, attributes.size() + 1))
             {
                 const IColumn & update_column = *block.getByPosition(attribute_idx).column.get();
-                MutableColumnPtr saved_column = saved_block->getByPosition(attribute_idx).column->assumeMutable();
+                MutableColumnPtr saved_column = previously_loaded_block->getByPosition(attribute_idx).column->assumeMutable();
                 saved_column->insertRangeFrom(update_column, 0, update_column.size());
             }
         }
@@ -369,70 +365,17 @@ void HashedDictionary<dictionary_key_type, sparse>::updateData()
     }
     else
     {
-        size_t skip_keys_size_offset = dict_struct.getKeysSize();
-
-        Columns saved_block_key_columns;
-        saved_block_key_columns.reserve(skip_keys_size_offset);
-
-        /// Split into keys columns and attribute columns
-        for (size_t i = 0; i < skip_keys_size_offset; ++i)
-            saved_block_key_columns.emplace_back(saved_block->safeGetByPosition(i).column);
-
-
-        DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
-        DictionaryKeysExtractor<dictionary_key_type> saved_keys_extractor(saved_block_key_columns, arena_holder.getComplexKeyArena());
-        auto saved_keys_extracted_from_block = saved_keys_extractor.extractAllKeys();
-
         auto stream = source_ptr->loadUpdatedAll();
-        stream->readPrefix();
-
-        while (Block block = stream->read())
-        {
-            /// TODO: Rewrite
-            Columns block_key_columns;
-            block_key_columns.reserve(skip_keys_size_offset);
-
-            /// Split into keys columns and attribute columns
-            for (size_t i = 0; i < skip_keys_size_offset; ++i)
-                block_key_columns.emplace_back(block.safeGetByPosition(i).column);
-
-            DictionaryKeysExtractor<dictionary_key_type> block_keys_extractor(saved_block_key_columns, arena_holder.getComplexKeyArena());
-            auto keys_extracted_from_block = block_keys_extractor.extractAllKeys();
-
-            absl::flat_hash_map<KeyType, std::vector<size_t>, DefaultHash<KeyType>> update_keys;
-            for (size_t row = 0; row < keys_extracted_from_block.size(); ++row)
-            {
-                auto key = keys_extracted_from_block[row];
-                update_keys[key].push_back(row);
-            }
-
-            IColumn::Filter filter(saved_keys_extracted_from_block.size());
-
-            for (size_t row = 0; row < saved_keys_extracted_from_block.size(); ++row)
-            {
-                auto key = saved_keys_extracted_from_block[row];
-                auto it = update_keys.find(key);
-                filter[row] = (it == update_keys.end());
-            }
-
-            auto block_columns = block.mutateColumns();
-            for (const auto attribute_idx : ext::range(0, attributes.size() + 1))
-            {
-                auto & column = saved_block->safeGetByPosition(attribute_idx).column;
-                const auto & filtered_column = column->filter(filter, -1);
-                block_columns[attribute_idx]->insertRangeFrom(*filtered_column.get(), 0, filtered_column->size());
-            }
-
-            saved_block->setColumns(std::move(block_columns));
-        }
-
-        stream->readSuffix();
+        mergeBlockWithStream<dictionary_key_type>(
+            dict_struct.getKeysSize(),
+            *previously_loaded_block,
+            stream);
     }
 
-    if (saved_block)
+    if (previously_loaded_block)
     {
-        resize(saved_block->rows());
-        blockToAttributes(*saved_block.get());
+        resize(previously_loaded_block->rows());
+        blockToAttributes(*previously_loaded_block.get());
     }
 }
 
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index 3882b669324..bc66d10d7fa 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -41,7 +41,7 @@ public:
         DictionarySourcePtr source_ptr_,
         const DictionaryLifetime dict_lifetime_,
         bool require_nonempty_,
-        BlockPtr saved_block_ = nullptr);
+        BlockPtr previously_loaded_block_ = nullptr);
 
     std::string getTypeName() const override
     {
@@ -67,7 +67,7 @@ public:
 
     std::shared_ptr<const IExternalLoadable> clone() const override
     {
-        return std::make_shared<HashedDictionary<dictionary_key_type, sparse>>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, saved_block);
+        return std::make_shared<HashedDictionary<dictionary_key_type, sparse>>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, previously_loaded_block);
     }
 
     const IDictionarySource * getSource() const override { return source_ptr.get(); }
@@ -219,8 +219,7 @@ private:
     size_t bucket_count = 0;
     mutable std::atomic<size_t> query_count{0};
 
-    /// TODO: Remove
-    BlockPtr saved_block;
+    BlockPtr previously_loaded_block;
     Arena complex_key_arena;
 };
 
diff --git a/tests/queries/0_stateless/01785_dictionary_element_count.reference b/tests/queries/0_stateless/01785_dictionary_element_count.reference
new file mode 100644
index 00000000000..4b79788b4d4
--- /dev/null
+++ b/tests/queries/0_stateless/01785_dictionary_element_count.reference
@@ -0,0 +1,8 @@
+1	First
+simple_key_flat_dictionary	01785_db	1
+1	First
+simple_key_hashed_dictionary	01785_db	1
+1	First
+simple_key_cache_dictionary	01785_db	1
+1	FirstKey	First
+complex_key_hashed_dictionary	01785_db	1
diff --git a/tests/queries/0_stateless/01785_dictionary_element_count.sql b/tests/queries/0_stateless/01785_dictionary_element_count.sql
new file mode 100644
index 00000000000..6db65152a56
--- /dev/null
+++ b/tests/queries/0_stateless/01785_dictionary_element_count.sql
@@ -0,0 +1,91 @@
+DROP DATABASE IF EXISTS 01785_db;
+CREATE DATABASE 01785_db;
+
+DROP TABLE IF EXISTS 01785_db.simple_key_source_table;
+CREATE TABLE 01785_db.simple_key_source_table
+(
+    id UInt64,
+    value String
+) ENGINE = TinyLog();
+
+INSERT INTO 01785_db.simple_key_source_table VALUES (1, 'First');
+INSERT INTO 01785_db.simple_key_source_table VALUES (1, 'First');
+
+DROP DICTIONARY IF EXISTS 01785_db.simple_key_flat_dictionary;
+CREATE DICTIONARY 01785_db.simple_key_flat_dictionary
+(
+    id UInt64,
+    value String
+)
+PRIMARY KEY id
+SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DB '01785_db' TABLE 'simple_key_source_table'))
+LAYOUT(FLAT())
+LIFETIME(MIN 0 MAX 1000);
+
+SELECT * FROM 01785_db.simple_key_flat_dictionary;
+SELECT name, database, element_count FROM system.dictionaries WHERE database = '01785_db' AND name = 'simple_key_flat_dictionary';
+
+DROP DICTIONARY 01785_db.simple_key_flat_dictionary;
+
+CREATE DICTIONARY 01785_db.simple_key_hashed_dictionary
+(
+    id UInt64,
+    value String
+)
+PRIMARY KEY id
+SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DB '01785_db' TABLE 'simple_key_source_table'))
+LAYOUT(HASHED())
+LIFETIME(MIN 0 MAX 1000);
+
+SELECT * FROM 01785_db.simple_key_hashed_dictionary;
+SELECT name, database, element_count FROM system.dictionaries WHERE database = '01785_db' AND name = 'simple_key_hashed_dictionary';
+
+DROP DICTIONARY 01785_db.simple_key_hashed_dictionary;
+
+CREATE DICTIONARY 01785_db.simple_key_cache_dictionary
+(
+    id UInt64,
+    value String
+)
+PRIMARY KEY id
+SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DB '01785_db' TABLE 'simple_key_source_table'))
+LAYOUT(CACHE(SIZE_IN_CELLS 100000))
+LIFETIME(MIN 0 MAX 1000);
+
+SELECT toUInt64(1) as key, dictGet('01785_db.simple_key_cache_dictionary', 'value', key);
+SELECT name, database, element_count FROM system.dictionaries WHERE database = '01785_db' AND name = 'simple_key_cache_dictionary';
+
+DROP DICTIONARY 01785_db.simple_key_cache_dictionary;
+
+DROP TABLE 01785_db.simple_key_source_table;
+
+DROP TABLE IF EXISTS 01785_db.complex_key_source_table;
+CREATE TABLE 01785_db.complex_key_source_table
+(
+    id UInt64,
+    id_key String,
+    value String
+) ENGINE = TinyLog();
+
+INSERT INTO 01785_db.complex_key_source_table VALUES (1, 'FirstKey', 'First');
+INSERT INTO 01785_db.complex_key_source_table VALUES (1, 'FirstKey', 'First');
+
+CREATE DICTIONARY 01785_db.complex_key_hashed_dictionary
+(
+    id UInt64,
+    id_key String,
+    value String
+)
+PRIMARY KEY id, id_key
+SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DB '01785_db' TABLE 'complex_key_source_table'))
+LAYOUT(COMPLEX_KEY_HASHED())
+LIFETIME(MIN 0 MAX 1000);
+
+SELECT * FROM 01785_db.complex_key_hashed_dictionary;
+SELECT name, database, element_count FROM system.dictionaries WHERE database = '01785_db' AND name = 'complex_key_hashed_dictionary';
+
+DROP DICTIONARY 01785_db.complex_key_hashed_dictionary;
+
+DROP TABLE 01785_db.complex_key_source_table;
+
+DROP DATABASE 01785_db;
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 90dbf3e65d8..39613971f3e 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -696,6 +696,7 @@
         "01760_system_dictionaries",
         "01760_polygon_dictionaries",
         "01778_hierarchical_dictionaries",
-        "01780_clickhouse_dictionary_source_loop"
+        "01780_clickhouse_dictionary_source_loop",
+        "01785_dictionary_element_count"
     ]
 }

From 41ef9291f1bf2fe46dc91d0a8406ef981e8a132e Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 3 Apr 2021 15:47:01 +0300
Subject: [PATCH 065/152] Fixed tests

---
 src/Dictionaries/FlatDictionary.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index def5ae75690..d5c40436aec 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -2,6 +2,7 @@
 
 #include <Core/Defines.h>
 #include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashSet.h>
 
 #include <DataTypes/DataTypesDecimal.h>
 #include <IO/WriteHelpers.h>
@@ -249,6 +250,8 @@ void FlatDictionary::blockToAttributes(const Block & block)
     DictionaryKeysExtractor<DictionaryKeyType::simple> keys_extractor({ keys_column }, arena_holder.getComplexKeyArena());
     auto keys = keys_extractor.extractAllKeys();
 
+    HashSet<UInt64> already_processed_keys;
+
     size_t key_offset = 1;
     for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
     {
@@ -259,12 +262,15 @@ void FlatDictionary::blockToAttributes(const Block & block)
         {
             auto key = keys[i];
 
-            if (key < loaded_keys.size() && loaded_keys[key])
+            if (already_processed_keys.find(key) != nullptr)
                 continue;
+            already_processed_keys.insert(key);
 
             setAttributeValue(attribute, key, attribute_column[i]);
             ++element_count;
         }
+
+        already_processed_keys.clear();
     }
 }
 

From 027ca2484e85751326f3dee9ecdc9cdaad893d0a Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 4 Apr 2021 16:30:01 +0300
Subject: [PATCH 066/152] FlatDictionary added layout options
 initial_array_size, max_array_size

---
 src/Dictionaries/DictionaryHelpers.h  |  2 +-
 src/Dictionaries/FlatDictionary.cpp   | 76 ++++++++++++++-------------
 src/Dictionaries/FlatDictionary.h     | 18 ++++---
 tests/performance/flat_dictionary.xml | 14 ++---
 4 files changed, 58 insertions(+), 52 deletions(-)

diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
index d68c634fc2d..86b1d505e72 100644
--- a/src/Dictionaries/DictionaryHelpers.h
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -420,7 +420,7 @@ private:
 };
 
 /** Merge block with blocks from stream. If there are duplicate keys in block they are filtered out.
-  * In result block_to_update will be merged with each block readed from stream.
+  * In result block_to_update will be merged with blocks from stream.
   * Note: readPrefix readImpl readSuffix will be called on stream object during function execution.
   */
 template <DictionaryKeyType dictionary_key_type>
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index d5c40436aec..bd2b987eb7c 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -24,22 +24,19 @@ namespace ErrorCodes
     extern const int UNSUPPORTED_METHOD;
 }
 
-static const auto initial_array_size = 1024;
-static const auto max_array_size = 500000;
-
 FlatDictionary::FlatDictionary(
     const StorageID & dict_id_,
     const DictionaryStructure & dict_struct_,
     DictionarySourcePtr source_ptr_,
     const DictionaryLifetime dict_lifetime_,
-    bool require_nonempty_,
+    Configuration configuration_,
     BlockPtr previously_loaded_block_)
     : IDictionary(dict_id_)
     , dict_struct(dict_struct_)
     , source_ptr{std::move(source_ptr_)}
     , dict_lifetime(dict_lifetime_)
-    , require_nonempty(require_nonempty_)
-    , loaded_keys(initial_array_size, false)
+    , configuration(configuration_)
+    , loaded_keys(configuration.initial_array_size, false)
     , previously_loaded_block(std::move(previously_loaded_block_))
 {
     createAttributes();
@@ -324,7 +321,7 @@ void FlatDictionary::loadData()
     else
         updateData();
 
-    if (require_nonempty && 0 == element_count)
+    if (configuration.require_nonempty && 0 == element_count)
         throw Exception{full_name + ": dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY};
 }
 
@@ -352,41 +349,34 @@ void FlatDictionary::calculateBytesAllocated()
     }
 }
 
-
-template <typename T>
-void FlatDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
+FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute & dictionary_attribute, const Field & null_value)
 {
-    attribute.null_values = T(null_value.get<T>());
-    const auto & null_value_ref = std::get<T>(attribute.null_values);
-    attribute.container.emplace<ContainerType<T>>(initial_array_size, null_value_ref);
-}
+    auto nullable_set = dictionary_attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
+    Attribute attribute{dictionary_attribute.underlying_type, std::move(nullable_set), {}, {}, {}};
 
-template <>
-void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
-{
-    attribute.string_arena = std::make_unique<Arena>();
-    const String & string = null_value.get<String>();
-    const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-    attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
-    attribute.container.emplace<ContainerType<StringRef>>(initial_array_size, StringRef(string_in_arena, string.size()));
-}
-
-FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
-{
-    auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
-    Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}};
-
-    auto type_call = [&](const auto &dictionary_attribute_type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
 
-        createAttributeImpl<AttributeType>(attr, null_value);
+        if constexpr (std::is_same_v<ValueType, StringRef>)
+        {
+            attribute.string_arena = std::make_unique<Arena>();
+            const String & string = null_value.get<String>();
+            const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
+            attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
+        }
+        else
+            attribute.null_values = ValueType(null_value.get<NearestFieldType<ValueType>>());
+
+        const auto & null_value_ref = std::get<ValueType>(attribute.null_values);
+        attribute.container.emplace<ContainerType<ValueType>>(configuration.initial_array_size, null_value_ref);
     };
 
-    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
+    callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
 
-    return attr;
+    return attribute;
 }
 
 template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
@@ -415,11 +405,11 @@ void FlatDictionary::getItemsImpl(
 template <typename T>
 void FlatDictionary::resize(Attribute & attribute, UInt64 key)
 {
-    if (key >= max_array_size)
+    if (key >= configuration.max_array_size)
         throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
             "({}): identifier should be less than ({})",
             full_name,
-            toString(max_array_size));
+            toString(configuration.max_array_size));
 
     auto & container = std::get<ContainerType<T>>(attribute.container);
 
@@ -503,10 +493,22 @@ void registerDictionaryFlat(DictionaryFactory & factory)
                             "for a dictionary of layout 'range_hashed'",
                             full_name);
 
+        static constexpr size_t default_initial_array_size = 1024;
+        static constexpr size_t default_max_array_size = 500000;
+
+        String dictionary_layout_prefix = config_prefix + ".layout" + ".flat";
+
+        FlatDictionary::Configuration configuration
+        {
+            .initial_array_size = config.getUInt64(dictionary_layout_prefix + ".initial_array_size", default_initial_array_size),
+            .max_array_size = config.getUInt64(dictionary_layout_prefix + ".max_array_size", default_max_array_size),
+            .require_nonempty = config.getBool(config_prefix + ".require_nonempty", false)
+        };
+
         const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
         const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
-        const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
-        return std::make_unique<FlatDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
+
+        return std::make_unique<FlatDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, std::move(configuration));
     };
 
     factory.registerLayout("flat", create_layout, false);
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index b32d211e15c..27d361da1e1 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -26,12 +26,19 @@ namespace DB
 class FlatDictionary final : public IDictionary
 {
 public:
+    struct Configuration
+    {
+        size_t initial_array_size;
+        size_t max_array_size;
+        bool require_nonempty;
+    };
+
     FlatDictionary(
         const StorageID & dict_id_,
         const DictionaryStructure & dict_struct_,
         DictionarySourcePtr source_ptr_,
         const DictionaryLifetime dict_lifetime_,
-        bool require_nonempty_,
+        Configuration configuration_,
         BlockPtr previously_loaded_block_ = nullptr);
 
     std::string getTypeName() const override { return "Flat"; }
@@ -48,7 +55,7 @@ public:
 
     std::shared_ptr<const IExternalLoadable> clone() const override
     {
-        return std::make_shared<FlatDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, previously_loaded_block);
+        return std::make_shared<FlatDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, configuration, previously_loaded_block);
     }
 
     const IDictionarySource * getSource() const override { return source_ptr.get(); }
@@ -145,10 +152,7 @@ private:
 
     void calculateBytesAllocated();
 
-    template <typename T>
-    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
-
-    static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
+    Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
@@ -168,7 +172,7 @@ private:
     const DictionaryStructure dict_struct;
     const DictionarySourcePtr source_ptr;
     const DictionaryLifetime dict_lifetime;
-    const bool require_nonempty;
+    const Configuration configuration;
 
     std::vector<Attribute> attributes;
     std::vector<bool> loaded_keys;
diff --git a/tests/performance/flat_dictionary.xml b/tests/performance/flat_dictionary.xml
index 426aa929bbc..f039173a777 100644
--- a/tests/performance/flat_dictionary.xml
+++ b/tests/performance/flat_dictionary.xml
@@ -21,7 +21,7 @@
         )
         PRIMARY KEY id
         SOURCE(CLICKHOUSE(DB 'default' TABLE 'simple_key_flat_dictionary_source_table'))
-        LAYOUT(FLAT())
+        LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000))
         LIFETIME(MIN 0 MAX 1000)
     </create_query>
 
@@ -29,7 +29,7 @@
         INSERT INTO simple_key_flat_dictionary_source_table
         SELECT number, number, toString(number), toDecimal64(number, 8), toString(number)
         FROM system.numbers
-        LIMIT 500000;
+        LIMIT 5000000;
     </fill_query>
 
     <substitutions>
@@ -46,10 +46,10 @@
         <substitution>
             <name>elements_count</name>
             <values>
-                <value>250000</value>
-                <value>500000</value>
-                <value>750000</value>
-                <value>1000000</value>
+                <value>2500000</value>
+                <value>5000000</value>
+                <value>7500000</value>
+                <value>10000000</value>
             </values>
         </substitution>
     </substitutions>
@@ -58,7 +58,7 @@
         SELECT dictGet('default.simple_key_flat_dictionary', {column_name}, number)
         FROM system.numbers
         LIMIT {elements_count}
-        FORMAR Null;
+        FORMAT Null;
     </query>
 
     <query>

From f298375f075bd23e0d8031682491e69e4ac0c0a8 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Sun, 4 Apr 2021 20:33:32 +0300
Subject: [PATCH 067/152] Supported range

---
 .../en/sql-reference/data-types/datetime64.md | 30 ++++++++--------
 .../ru/sql-reference/data-types/datetime64.md | 34 ++++++++++---------
 2 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md
index 5cba8315090..32cdbb8aaa9 100644
--- a/docs/en/sql-reference/data-types/datetime64.md
+++ b/docs/en/sql-reference/data-types/datetime64.md
@@ -9,7 +9,7 @@ Allows to store an instant in time, that can be expressed as a calendar date and
 
 Tick size (precision): 10<sup>-precision</sup> seconds
 
-Syntax:
+**Syntax:**
 
 ``` sql
 DateTime64(precision, [timezone])
@@ -17,9 +17,11 @@ DateTime64(precision, [timezone])
 
 Internally, stores data as a number of ‘ticks’ since epoch start (1970-01-01 00:00:00 UTC) as Int64. The tick resolution is determined by the precision parameter. Additionally, the `DateTime64` type can store time zone that is the same for the entire column, that affects how the values of the `DateTime64` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01.000’). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. See details in [DateTime](../../sql-reference/data-types/datetime.md).
 
+Supported range from January 1, 1925 till December 31, 2238.
+
 ## Examples {#examples}
 
-**1.** Creating a table with `DateTime64`-type column and inserting data into it:
+1. Creating a table with `DateTime64`-type column and inserting data into it:
 
 ``` sql
 CREATE TABLE dt
@@ -27,15 +29,15 @@ CREATE TABLE dt
     `timestamp` DateTime64(3, 'Europe/Moscow'),
     `event_id` UInt8
 )
-ENGINE = TinyLog
+ENGINE = TinyLog;
 ```
 
 ``` sql
-INSERT INTO dt Values (1546300800000, 1), ('2019-01-01 00:00:00', 2)
+INSERT INTO dt Values (1546300800000, 1), ('2019-01-01 00:00:00', 2);
 ```
 
 ``` sql
-SELECT * FROM dt
+SELECT * FROM dt;
 ```
 
 ``` text
@@ -45,13 +47,13 @@ SELECT * FROM dt
 └─────────────────────────┴──────────┘
 ```
 
--   When inserting datetime as an integer, it is treated as an appropriately scaled Unix Timestamp (UTC). `1546300800000` (with precision 3) represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Europe/Moscow` (UTC+3) timezone specified, when outputting as a string the value will be shown as `'2019-01-01 03:00:00'`
+-   When inserting datetime as an integer, it is treated as an appropriately scaled Unix Timestamp (UTC). `1546300800000` (with precision 3) represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Europe/Moscow` (UTC+3) timezone specified, when outputting as a string the value will be shown as `'2019-01-01 03:00:00'`.
 -   When inserting string value as datetime, it is treated as being in column timezone. `'2019-01-01 00:00:00'` will be treated as being in `Europe/Moscow` timezone and stored as `1546290000000`.
 
-**2.** Filtering on `DateTime64` values
+2. Filtering on `DateTime64` values
 
 ``` sql
-SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow')
+SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow');
 ```
 
 ``` text
@@ -60,12 +62,12 @@ SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europ
 └─────────────────────────┴──────────┘
 ```
 
-Unlike `DateTime`, `DateTime64` values are not converted from `String` automatically
+Unlike `DateTime`, `DateTime64` values are not converted from `String` automatically.
 
-**3.** Getting a time zone for a `DateTime64`-type value:
+3. Getting a time zone for a `DateTime64`-type value:
 
 ``` sql
-SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS x
+SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS x;
 ```
 
 ``` text
@@ -74,13 +76,13 @@ SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS
 └─────────────────────────┴────────────────────────────────┘
 ```
 
-**4.** Timezone conversion
+4. Timezone conversion
 
 ``` sql
 SELECT
 toDateTime64(timestamp, 3, 'Europe/London') as lon_time,
 toDateTime64(timestamp, 3, 'Europe/Moscow') as mos_time
-FROM dt
+FROM dt;
 ```
 
 ``` text
@@ -90,7 +92,7 @@ FROM dt
 └─────────────────────────┴─────────────────────────┘
 ```
 
-## See Also {#see-also}
+**See Also**
 
 -   [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md)
 -   [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
diff --git a/docs/ru/sql-reference/data-types/datetime64.md b/docs/ru/sql-reference/data-types/datetime64.md
index 6576bf9dc0d..ae478045e73 100644
--- a/docs/ru/sql-reference/data-types/datetime64.md
+++ b/docs/ru/sql-reference/data-types/datetime64.md
@@ -7,9 +7,9 @@ toc_title: DateTime64
 
 Позволяет хранить момент времени, который может быть представлен как календарная дата и время, с заданной суб-секундной точностью.
 
-Размер тика/точность: 10<sup>-precision</sup> секунд, где precision - целочисленный параметр типа.
+Размер тика (точность, precision): 10<sup>-precision</sup> секунд, где precision - целочисленный параметр.
 
-Синтаксис:
+**Синтаксис:**
 
 ``` sql
 DateTime64(precision, [timezone])
@@ -17,9 +17,11 @@ DateTime64(precision, [timezone])
 
 Данные хранятся в виде количества ‘тиков’, прошедших с момента начала эпохи (1970-01-01 00:00:00 UTC), в Int64. Размер тика определяется параметром precision. Дополнительно, тип `DateTime64` позволяет хранить часовой пояс, единый для всей колонки, который влияет на то, как будут отображаться значения типа `DateTime64` в текстовом виде и как будут парситься значения заданные в виде строк (‘2020-01-01 05:00:01.000’). Часовой пояс не хранится в строках таблицы (выборки), а хранится в метаданных колонки. Подробнее см. [DateTime](datetime.md).
 
-## Пример {#primer}
+Поддерживаются значения от 1 января 1925 г. и до 31 декабря 2238 г.
 
-**1.** Создание таблицы с столбцом типа `DateTime64` и вставка данных в неё:
+## Примеры {#examples}
+
+1. Создание таблицы со столбцом типа `DateTime64` и вставка данных в неё:
 
 ``` sql
 CREATE TABLE dt
@@ -27,15 +29,15 @@ CREATE TABLE dt
     `timestamp` DateTime64(3, 'Europe/Moscow'),
     `event_id` UInt8
 )
-ENGINE = TinyLog
+ENGINE = TinyLog;
 ```
 
 ``` sql
-INSERT INTO dt Values (1546300800000, 1), ('2019-01-01 00:00:00', 2)
+INSERT INTO dt Values (1546300800000, 1), ('2019-01-01 00:00:00', 2);
 ```
 
 ``` sql
-SELECT * FROM dt
+SELECT * FROM dt;
 ```
 
 ``` text
@@ -46,12 +48,12 @@ SELECT * FROM dt
 ```
 
 -   При вставке даты-времени как числа (аналогично ‘Unix timestamp’), время трактуется как UTC. Unix timestamp `1546300800` в часовом поясе `Europe/London (UTC+0)` представляет время `'2019-01-01 00:00:00'`. Однако, столбец `timestamp` имеет тип `DateTime('Europe/Moscow (UTC+3)')`, так что при выводе в виде строки время отобразится как `2019-01-01 03:00:00`.
--   При вставке даты-времени в виде строки, время трактуется соответственно часовому поясу установленному для колонки. `'2019-01-01 00:00:00'` трактуется как время по Москве (и в базу сохраняется `'2018-12-31 21:00:00'` в виде Unix Timestamp)
+-   При вставке даты-времени в виде строки, время трактуется соответственно часовому поясу установленному для колонки. `'2019-01-01 00:00:00'` трактуется как время по Москве (и в базу сохраняется `'2018-12-31 21:00:00'` в виде Unix Timestamp).
 
-**2.** Фильтрация по значениям даты-времени
+2. Фильтрация по значениям даты и времени
 
 ``` sql
-SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow')
+SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow');
 ```
 
 ``` text
@@ -60,12 +62,12 @@ SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europ
 └─────────────────────────┴──────────┘
 ```
 
-В отличие от типа `DateTime`, `DateTime64` не конвертируется из строк автоматически
+В отличие от типа `DateTime`, `DateTime64` не конвертируется из строк автоматически.
 
-**3.** Получение часового пояса для значения типа `DateTime64`:
+3. Получение часового пояса для значения типа `DateTime64`:
 
 ``` sql
-SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS x
+SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS x;
 ```
 
 ``` text
@@ -74,13 +76,13 @@ SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS
 └─────────────────────────┴────────────────────────────────┘
 ```
 
-**4.** Конвертация часовых поясов
+4. Конвертация часовых поясов
 
 ``` sql
 SELECT
 toDateTime64(timestamp, 3, 'Europe/London') as lon_time,
 toDateTime64(timestamp, 3, 'Europe/Moscow') as mos_time
-FROM dt
+FROM dt;
 ```
 
 ``` text
@@ -90,7 +92,7 @@ FROM dt
 └─────────────────────────┴─────────────────────────┘
 ```
 
-## See Also {#see-also}
+**See Also**
 
 -   [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md)
 -   [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)

From fa498174219aba5c1ef79ef9f1d9fd2f6b97156b Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 5 Apr 2021 01:01:20 +0300
Subject: [PATCH 068/152] Update HashMap.h

---
 src/Common/HashTable/HashMap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h
index 466943c698b..e8cb7636a07 100644
--- a/src/Common/HashTable/HashMap.h
+++ b/src/Common/HashTable/HashMap.h
@@ -22,7 +22,7 @@ struct PairNoInit
     First first;
     Second second;
 
-    PairNoInit() = default;
+    PairNoInit() {};
 
     template <typename FirstValue>
     PairNoInit(FirstValue && first_, NoInitTag)

From e486fde35e37f6099aa73a57bfec150a139db850 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 5 Apr 2021 01:20:17 +0300
Subject: [PATCH 069/152] Update HashMap.h

---
 src/Common/HashTable/HashMap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h
index e8cb7636a07..50324fe64c7 100644
--- a/src/Common/HashTable/HashMap.h
+++ b/src/Common/HashTable/HashMap.h
@@ -22,7 +22,7 @@ struct PairNoInit
     First first;
     Second second;
 
-    PairNoInit() {};
+    PairNoInit() {}
 
     template <typename FirstValue>
     PairNoInit(FirstValue && first_, NoInitTag)

From deb35c5f4ef780979899bf1969ad946d55624e23 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 5 Apr 2021 01:26:26 +0300
Subject: [PATCH 070/152] DirectDictionary dictGet multiple columns
 optimization

---
 src/Dictionaries/CacheDictionary.cpp    |  6 +-
 src/Dictionaries/DictionaryHelpers.h    | 20 +++++--
 src/Dictionaries/DirectDictionary.cpp   | 73 +++++++++++++++++--------
 src/Dictionaries/DirectDictionary.h     |  7 +++
 tests/performance/direct_dictionary.xml | 12 ++++
 5 files changed, 88 insertions(+), 30 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index 535e862af40..184626bddcc 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -133,7 +133,7 @@ ColumnPtr CacheDictionary<dictionary_key_type>::getColumn(
 template <DictionaryKeyType dictionary_key_type>
 Columns CacheDictionary<dictionary_key_type>::getColumns(
     const Strings & attribute_names,
-    const DataTypes &,
+    const DataTypes & result_types,
     const Columns & key_columns,
     const DataTypes & key_types,
     const Columns & default_values_columns) const
@@ -159,7 +159,7 @@ Columns CacheDictionary<dictionary_key_type>::getColumns(
     DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
     auto keys = extractor.extractAllKeys();
 
-    DictionaryStorageFetchRequest request(dict_struct, attribute_names, default_values_columns);
+    DictionaryStorageFetchRequest request(dict_struct, attribute_names, result_types, default_values_columns);
 
     FetchResult result_of_fetch_from_storage;
 
@@ -277,7 +277,7 @@ ColumnUInt8::Ptr CacheDictionary<dictionary_key_type>::hasKeys(const Columns & k
     const auto keys = extractor.extractAllKeys();
 
     /// We make empty request just to fetch if keys exists
-    DictionaryStorageFetchRequest request(dict_struct, {}, {});
+    DictionaryStorageFetchRequest request(dict_struct, {}, {}, {});
 
     FetchResult result_of_fetch_from_storage;
 
diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
index 3e7063bb9ef..33529a0208c 100644
--- a/src/Dictionaries/DictionaryHelpers.h
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -60,7 +60,11 @@ private:
 class DictionaryStorageFetchRequest
 {
 public:
-    DictionaryStorageFetchRequest(const DictionaryStructure & structure, const Strings & attributes_names_to_fetch, Columns attributes_default_values_columns)
+    DictionaryStorageFetchRequest(
+        const DictionaryStructure & structure,
+        const Strings & attributes_names_to_fetch,
+        DataTypes attributes_to_fetch_result_types,
+        Columns attributes_default_values_columns)
         : attributes_to_fetch_names_set(attributes_names_to_fetch.begin(), attributes_names_to_fetch.end())
         , attributes_to_fetch_filter(structure.attributes.size(), false)
     {
@@ -73,7 +77,7 @@ public:
         dictionary_attributes_types.reserve(attributes_size);
         attributes_default_value_providers.reserve(attributes_to_fetch_names_set.size());
 
-        size_t default_values_column_index = 0;
+        size_t attributes_to_fetch_index = 0;
         for (size_t i = 0; i < attributes_size; ++i)
         {
             const auto & dictionary_attribute = structure.attributes[i];
@@ -84,8 +88,16 @@ public:
             if (attributes_to_fetch_names_set.find(name) != attributes_to_fetch_names_set.end())
             {
                 attributes_to_fetch_filter[i] = true;
-                attributes_default_value_providers.emplace_back(dictionary_attribute.null_value, attributes_default_values_columns[default_values_column_index]);
-                ++default_values_column_index;
+                auto & attribute_to_fetch_result_type = attributes_to_fetch_result_types[attributes_to_fetch_index];
+
+                if (!attribute_to_fetch_result_type->equals(*type))
+                    throw Exception(ErrorCodes::TYPE_MISMATCH,
+                    "Attribute type does not match, expected ({}), found ({})",
+                    attribute_to_fetch_result_type->getName(),
+                    type->getName());
+
+                attributes_default_value_providers.emplace_back(dictionary_attribute.null_value, attributes_default_values_columns[attributes_to_fetch_index]);
+                ++attributes_to_fetch_index;
             }
             else
                 attributes_default_value_providers.emplace_back(dictionary_attribute.null_value);
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index 96ef259106a..93c382eba86 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -31,12 +31,12 @@ DirectDictionary<dictionary_key_type>::DirectDictionary(
 }
 
 template <DictionaryKeyType dictionary_key_type>
-ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
-        const std::string & attribute_name,
-        const DataTypePtr & result_type,
-        const Columns & key_columns,
-        const DataTypes & key_types [[maybe_unused]],
-        const ColumnPtr & default_values_column) const
+Columns DirectDictionary<dictionary_key_type>::getColumns(
+    const Strings & attribute_names,
+    const DataTypes & result_types,
+    const Columns & key_columns,
+    const DataTypes & key_types,
+    const Columns & default_values_columns) const
 {
     if constexpr (dictionary_key_type == DictionaryKeyType::complex)
         dict_struct.validateKeyTypes(key_types);
@@ -45,16 +45,14 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
     DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
     const auto requested_keys = extractor.extractAllKeys();
 
-    const DictionaryAttribute & attribute = dict_struct.getAttribute(attribute_name, result_type);
-    DefaultValueProvider default_value_provider(attribute.null_value, default_values_column);
+    DictionaryStorageFetchRequest request(dict_struct, attribute_names, result_types, default_values_columns);
 
     HashMap<KeyType, size_t> key_to_fetched_index;
     key_to_fetched_index.reserve(requested_keys.size());
 
-    auto fetched_from_storage = attribute.type->createColumn();
+    auto fetched_columns_from_storage = request.makeAttributesResultColumns();
 
     size_t fetched_key_index = 0;
-    size_t requested_attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
 
     Columns block_key_columns;
     size_t dictionary_keys_size = dict_struct.getKeysNames().size();
@@ -73,8 +71,14 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
         DictionaryKeysExtractor<dictionary_key_type> block_keys_extractor(block_key_columns, arena_holder.getComplexKeyArena());
         auto block_keys = block_keys_extractor.extractAllKeys();
 
-        const auto & block_column = block.safeGetByPosition(dictionary_keys_size + requested_attribute_index).column;
-        fetched_from_storage->insertRangeFrom(*block_column, 0, block_keys.size());
+        for (size_t attribute_index = 0; attribute_index < request.attributesSize(); ++attribute_index)
+        {
+            if (!request.shouldFillResultColumnWithIndex(attribute_index))
+                continue;
+
+            const auto & block_column = block.safeGetByPosition(dictionary_keys_size + attribute_index).column;
+            fetched_columns_from_storage[attribute_index]->insertRangeFrom(*block_column, 0, block_keys.size());
+        }
 
         for (size_t block_key_index = 0; block_key_index < block_keys.size(); ++block_key_index)
         {
@@ -92,25 +96,48 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
 
     size_t requested_keys_size = requested_keys.size();
 
-    auto result = fetched_from_storage->cloneEmpty();
-    result->reserve(requested_keys_size);
+    auto result_columns = request.makeAttributesResultColumns();
 
-    for (size_t requested_key_index = 0; requested_key_index < requested_keys_size; ++requested_key_index)
+    for (size_t attribute_index = 0; attribute_index < result_columns.size(); ++attribute_index)
     {
-        const auto requested_key = requested_keys[requested_key_index];
-        const auto * it = key_to_fetched_index.find(requested_key);
+        if (!request.shouldFillResultColumnWithIndex(attribute_index))
+            continue;
 
-        if (it)
-            fetched_from_storage->get(it->getMapped(), value_to_insert);
-        else
-            value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
+        auto & result_column = result_columns[attribute_index];
 
-        result->insert(value_to_insert);
+        const auto & fetched_column_from_storage = fetched_columns_from_storage[attribute_index];
+        const auto & default_value_provider = request.defaultValueProviderAtIndex(attribute_index);
+
+        result_column->reserve(requested_keys_size);
+
+        for (size_t requested_key_index = 0; requested_key_index < requested_keys_size; ++requested_key_index)
+        {
+            const auto requested_key = requested_keys[requested_key_index];
+            const auto * it = key_to_fetched_index.find(requested_key);
+
+            if (it)
+                fetched_column_from_storage->get(it->getMapped(), value_to_insert);
+            else
+                value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
+
+            result_column->insert(value_to_insert);
+        }
     }
 
     query_count.fetch_add(requested_keys_size, std::memory_order_relaxed);
 
-    return result;
+   return request.filterRequestedColumns(result_columns);
+}
+
+template <DictionaryKeyType dictionary_key_type>
+ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr & result_type,
+    const Columns & key_columns,
+    const DataTypes & key_types,
+    const ColumnPtr & default_values_column) const
+{
+    return getColumns({ attribute_name }, { result_type }, key_columns, key_types, { default_values_column }).front();
 }
 
 template <DictionaryKeyType dictionary_key_type>
diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h
index 6bca6ac6a18..e547e10433c 100644
--- a/src/Dictionaries/DirectDictionary.h
+++ b/src/Dictionaries/DirectDictionary.h
@@ -66,6 +66,13 @@ public:
 
     DictionaryKeyType getKeyType() const override { return dictionary_key_type; }
 
+    Columns getColumns(
+        const Strings & attribute_names,
+        const DataTypes & result_types,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const Columns & default_values_columns) const override;
+
     ColumnPtr getColumn(
         const std::string& attribute_name,
         const DataTypePtr & result_type,
diff --git a/tests/performance/direct_dictionary.xml b/tests/performance/direct_dictionary.xml
index 97ecdfe3e95..dc7732ebb98 100644
--- a/tests/performance/direct_dictionary.xml
+++ b/tests/performance/direct_dictionary.xml
@@ -93,6 +93,12 @@
         LIMIT {elements_count}
         FORMAT Null;
     </query>
+    <query>
+        SELECT dictGet('default.simple_key_direct_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), number)
+        FROM system.numbers
+        LIMIT {elements_count}
+        FORMAT Null;
+    </query>
     <query>
         SELECT dictHas('default.simple_key_direct_dictionary', number)
         FROM system.numbers
@@ -106,6 +112,12 @@
         LIMIT {elements_count}
         FORMAT Null;
     </query>
+    <query>
+        SELECT dictGet('default.simple_key_direct_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), (number, toString(number)))
+        FROM system.numbers
+        LIMIT {elements_count}
+        FORMAT Null;
+    </query>
     <query>
         SELECT dictHas('default.complex_key_direct_dictionary', (number, toString(number)))
         FROM system.numbers

From d9cc03bc34e5c582b57c499480e2c02bf7dd3b4e Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 5 Apr 2021 11:29:05 +0300
Subject: [PATCH 071/152] Fixed tests

---
 src/Dictionaries/DirectDictionary.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index 93c382eba86..bacb1a87dc9 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -35,7 +35,7 @@ Columns DirectDictionary<dictionary_key_type>::getColumns(
     const Strings & attribute_names,
     const DataTypes & result_types,
     const Columns & key_columns,
-    const DataTypes & key_types,
+    const DataTypes & key_types [[maybe_unused]],
     const Columns & default_values_columns) const
 {
     if constexpr (dictionary_key_type == DictionaryKeyType::complex)
@@ -126,7 +126,7 @@ Columns DirectDictionary<dictionary_key_type>::getColumns(
 
     query_count.fetch_add(requested_keys_size, std::memory_order_relaxed);
 
-   return request.filterRequestedColumns(result_columns);
+    return request.filterRequestedColumns(result_columns);
 }
 
 template <DictionaryKeyType dictionary_key_type>
@@ -141,7 +141,9 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
 }
 
 template <DictionaryKeyType dictionary_key_type>
-ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(const Columns & key_columns, const DataTypes & key_types [[maybe_unused]]) const
+ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
+    const Columns & key_columns,
+    const DataTypes & key_types [[maybe_unused]]) const
 {
     if constexpr (dictionary_key_type == DictionaryKeyType::complex)
         dict_struct.validateKeyTypes(key_types);

From 156ba8a7fc25575fdf612a973cc9e8d6b90d2a39 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 5 Apr 2021 12:16:38 +0300
Subject: [PATCH 072/152] Disable setting by default and ban for old-syntax
 MergeTree

---
 src/Storages/MergeTree/MergeTreeSettings.h | 2 +-
 src/Storages/StorageMergeTree.cpp          | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index aee25ee21a0..06d909eb912 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -54,7 +54,7 @@ struct Settings;
     M(UInt64, write_ahead_log_bytes_to_fsync, 100ULL * 1024 * 1024, "Amount of bytes, accumulated in WAL to do fsync.", 0) \
     M(UInt64, write_ahead_log_interval_ms_to_fsync, 100, "Interval in milliseconds after which fsync for WAL is being done.", 0) \
     M(Bool, in_memory_parts_insert_sync, false, "If true insert of part with in-memory format will wait for fsync of WAL", 0) \
-    M(UInt64, non_replicated_deduplication_window, 100, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \
+    M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \
     \
     /** Inserts settings. */ \
     M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 5e192f76bc9..5f8032d7749 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -97,6 +97,9 @@ StorageMergeTree::StorageMergeTree(
     auto settings = getSettings();
     if (settings->non_replicated_deduplication_window != 0)
     {
+        if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
+            throw Exception("Deduplication for non-replicated MergeTree in old syntax is not supported", ErrorCodes::BAD_ARGUMENTS);
+
         std::string path = getDataPaths()[0] + "/deduplication_logs";
         deduplication_log = std::make_unique<MergeTreeDeduplicationLog>(path, settings->non_replicated_deduplication_window, format_version);
         deduplication_log->load();

From bb48041fba046ab2fd184b28ece85e06eec52095 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Mon, 5 Apr 2021 13:53:58 +0300
Subject: [PATCH 073/152] once again


From 8422774b358b65cca8229dbf8e235715f07359ca Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Mon, 5 Apr 2021 18:22:28 +0300
Subject: [PATCH 074/152] boop the CI


From 27d4fbd13b7c6eeade74a84ab0813c2fd1962eaf Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 2 Apr 2021 07:09:39 +0300
Subject: [PATCH 075/152] Compare Block itself for distributed async INSERT
 batches

INSERT into Distributed with insert_distributed_sync=1 stores the
distributed batches on the disk for sending in background.

But types may be a little bit different for the Distributed and it's
underlying table, so the initiator need to know whether conversion is
required or not.

Before this patch those on disk distributed batches contains header,
which includes dumpStructure() for the block in that batch, however it
checks not only names and types and plus dumpStructure() is a debug
method.

So instead of storing string representation for the block header we
should store empty block in the file header (note, that we cannot store
the empty block not in header, since this will require reading all
blocks from file, due to some trickery of the readers interface).

Note, that this patch also contains tiny refactoring:
- s/header/distributed_header/

v1: dumpNamesAndTypes()
v2: dump empty block into the batch itself
v3: move empty block into the header
---
 src/Storages/Distributed/DirectoryMonitor.cpp | 173 +++++++++++-------
 .../DistributedBlockOutputStream.cpp          |   8 +-
 ...ructure_mismatch_types_and_names.reference |   0
 ...ock_structure_mismatch_types_and_names.sql |  22 +++
 ..._INSERT_block_structure_mismatch.reference |   6 +
 ...91_dist_INSERT_block_structure_mismatch.sh |  30 +++
 .../queries/0_stateless/arcadia_skip_list.txt |   2 +
 7 files changed, 177 insertions(+), 64 deletions(-)
 create mode 100644 tests/queries/0_stateless/01790_dist_INSERT_block_structure_mismatch_types_and_names.reference
 create mode 100644 tests/queries/0_stateless/01790_dist_INSERT_block_structure_mismatch_types_and_names.sql
 create mode 100644 tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference
 create mode 100755 tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.sh

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index fb5e5080314..7f4a8e06b75 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -104,12 +104,14 @@ namespace
         size_t rows = 0;
         size_t bytes = 0;
 
-        std::string header;
+        /// dumpStructure() of the header -- obsolete
+        std::string block_header_string;
+        Block block_header;
     };
 
-    DistributedHeader readDistributedHeader(ReadBuffer & in, Poco::Logger * log)
+    DistributedHeader readDistributedHeader(ReadBufferFromFile & in, Poco::Logger * log)
     {
-        DistributedHeader header;
+        DistributedHeader distributed_header;
 
         UInt64 query_size;
         readVarUInt(query_size, in);
@@ -135,17 +137,25 @@ namespace
                 LOG_WARNING(log, "ClickHouse shard version is older than ClickHouse initiator version. It may lack support for new features.");
             }
 
-            readStringBinary(header.insert_query, header_buf);
-            header.insert_settings.read(header_buf);
+            readStringBinary(distributed_header.insert_query, header_buf);
+            distributed_header.insert_settings.read(header_buf);
 
             if (header_buf.hasPendingData())
-                header.client_info.read(header_buf, initiator_revision);
+                distributed_header.client_info.read(header_buf, initiator_revision);
 
             if (header_buf.hasPendingData())
             {
-                readVarUInt(header.rows, header_buf);
-                readVarUInt(header.bytes, header_buf);
-                readStringBinary(header.header, header_buf);
+                readVarUInt(distributed_header.rows, header_buf);
+                readVarUInt(distributed_header.bytes, header_buf);
+                readStringBinary(distributed_header.block_header_string, header_buf);
+            }
+
+            if (header_buf.hasPendingData())
+            {
+                NativeBlockInputStream header_block_in(header_buf, DBMS_TCP_PROTOCOL_VERSION);
+                distributed_header.block_header = header_block_in.read();
+                if (!distributed_header.block_header)
+                    throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read header from the {} batch", in.getFileName());
             }
 
             /// Add handling new data here, for example:
@@ -155,20 +165,20 @@ namespace
             ///
             /// And note that it is safe, because we have checksum and size for header.
 
-            return header;
+            return distributed_header;
         }
 
         if (query_size == DBMS_DISTRIBUTED_SIGNATURE_HEADER_OLD_FORMAT)
         {
-            header.insert_settings.read(in, SettingsWriteFormat::BINARY);
-            readStringBinary(header.insert_query, in);
-            return header;
+            distributed_header.insert_settings.read(in, SettingsWriteFormat::BINARY);
+            readStringBinary(distributed_header.insert_query, in);
+            return distributed_header;
         }
 
-        header.insert_query.resize(query_size);
-        in.readStrict(header.insert_query.data(), query_size);
+        distributed_header.insert_query.resize(query_size);
+        in.readStrict(distributed_header.insert_query.data(), query_size);
 
-        return header;
+        return distributed_header;
     }
 
     /// remote_error argument is used to decide whether some errors should be
@@ -200,35 +210,58 @@ namespace
         return nullptr;
     }
 
-    void writeRemoteConvert(const DistributedHeader & header, RemoteBlockOutputStream & remote, ReadBufferFromFile & in, Poco::Logger * log)
+    void writeAndConvert(RemoteBlockOutputStream & remote, ReadBufferFromFile & in)
     {
-        if (remote.getHeader() && header.header != remote.getHeader().dumpStructure())
+        CompressedReadBuffer decompressing_in(in);
+        NativeBlockInputStream block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION);
+        block_in.readPrefix();
+
+        while (Block block = block_in.read())
         {
-            LOG_WARNING(log,
-                "Structure does not match (remote: {}, local: {}), implicit conversion will be done",
-                remote.getHeader().dumpStructure(), header.header);
-
-            CompressedReadBuffer decompressing_in(in);
-            /// Lack of header, requires to read blocks
-            NativeBlockInputStream block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION);
-
-            block_in.readPrefix();
-            while (Block block = block_in.read())
-            {
-                ConvertingBlockInputStream convert(
-                    std::make_shared<OneBlockInputStream>(block),
-                    remote.getHeader(),
-                    ConvertingBlockInputStream::MatchColumnsMode::Name);
-                auto adopted_block = convert.read();
-                remote.write(adopted_block);
-            }
-            block_in.readSuffix();
+            ConvertingBlockInputStream convert(
+                std::make_shared<OneBlockInputStream>(block),
+                remote.getHeader(),
+                ConvertingBlockInputStream::MatchColumnsMode::Name);
+            auto adopted_block = convert.read();
+            remote.write(adopted_block);
         }
-        else
+
+        block_in.readSuffix();
+    }
+
+    void writeRemoteConvert(const DistributedHeader & distributed_header, RemoteBlockOutputStream & remote, ReadBufferFromFile & in, Poco::Logger * log)
+    {
+        if (!remote.getHeader())
         {
             CheckingCompressedReadBuffer checking_in(in);
             remote.writePrepared(checking_in);
+            return;
         }
+
+        /// This is old format, that does not have header for the block in the file header,
+        /// applying ConvertingBlockInputStream in this case is not a big overhead.
+        ///
+        /// Anyway we can get header only from the first block, which contain all rows anyway.
+        if (!distributed_header.block_header)
+        {
+            LOG_TRACE(log, "Processing batch {} with old format (no header)", in.getFileName());
+
+            writeAndConvert(remote, in);
+            return;
+        }
+
+        if (!blocksHaveEqualStructure(distributed_header.block_header, remote.getHeader()))
+        {
+            LOG_WARNING(log,
+                "Structure does not match (remote: {}, local: {}), implicit conversion will be done",
+                remote.getHeader().dumpStructure(), distributed_header.block_header.dumpStructure());
+
+            writeAndConvert(remote, in);
+            return;
+        }
+
+        CheckingCompressedReadBuffer checking_in(in);
+        remote.writePrepared(checking_in);
     }
 }
 
@@ -498,13 +531,15 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
         CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
 
         ReadBufferFromFile in(file_path);
-        const auto & header = readDistributedHeader(in, log);
+        const auto & distributed_header = readDistributedHeader(in, log);
 
-        auto connection = pool->get(timeouts, &header.insert_settings);
+        auto connection = pool->get(timeouts, &distributed_header.insert_settings);
         RemoteBlockOutputStream remote{*connection, timeouts,
-            header.insert_query, header.insert_settings, header.client_info};
+            distributed_header.insert_query,
+            distributed_header.insert_settings,
+            distributed_header.client_info};
         remote.writePrefix();
-        writeRemoteConvert(header, remote, in, log);
+        writeRemoteConvert(distributed_header, remote, in, log);
         remote.writeSuffix();
     }
     catch (const Exception & e)
@@ -523,20 +558,21 @@ struct StorageDistributedDirectoryMonitor::BatchHeader
     Settings settings;
     String query;
     ClientInfo client_info;
-    String sample_block_structure;
+    Block header;
 
-    BatchHeader(Settings settings_, String query_, ClientInfo client_info_, String sample_block_structure_)
+    BatchHeader(Settings settings_, String query_, ClientInfo client_info_, Block header_)
         : settings(std::move(settings_))
         , query(std::move(query_))
         , client_info(std::move(client_info_))
-        , sample_block_structure(std::move(sample_block_structure_))
+        , header(std::move(header_))
     {
     }
 
     bool operator==(const BatchHeader & other) const
     {
-        return std::tie(settings, query, client_info.query_kind, sample_block_structure) ==
-               std::tie(other.settings, other.query, other.client_info.query_kind, other.sample_block_structure);
+        return std::tie(settings, query, client_info.query_kind) ==
+               std::tie(other.settings, other.query, other.client_info.query_kind) &&
+               blocksHaveEqualStructure(header, other.header);
     }
 
     struct Hash
@@ -545,7 +581,7 @@ struct StorageDistributedDirectoryMonitor::BatchHeader
         {
             SipHash hash_state;
             hash_state.update(batch_header.query.data(), batch_header.query.size());
-            hash_state.update(batch_header.sample_block_structure.data(), batch_header.sample_block_structure.size());
+            batch_header.header.updateHash(hash_state);
             return hash_state.get64();
         }
     };
@@ -632,16 +668,17 @@ struct StorageDistributedDirectoryMonitor::Batch
                 }
 
                 ReadBufferFromFile in(file_path->second);
-                const auto & header = readDistributedHeader(in, parent.log);
+                const auto & distributed_header = readDistributedHeader(in, parent.log);
 
                 if (!remote)
                 {
                     remote = std::make_unique<RemoteBlockOutputStream>(*connection, timeouts,
-                        header.insert_query, header.insert_settings, header.client_info);
+                        distributed_header.insert_query,
+                        distributed_header.insert_settings,
+                        distributed_header.client_info);
                     remote->writePrefix();
                 }
-
-                writeRemoteConvert(header, *remote, in, parent.log);
+                writeRemoteConvert(distributed_header, *remote, in, parent.log);
             }
 
             if (remote)
@@ -808,22 +845,27 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
 
         size_t total_rows = 0;
         size_t total_bytes = 0;
-        std::string sample_block_structure;
-        DistributedHeader header;
+        Block header;
+        DistributedHeader distributed_header;
         try
         {
             /// Determine metadata of the current file and check if it is not broken.
             ReadBufferFromFile in{file_path};
-            header = readDistributedHeader(in, log);
+            distributed_header = readDistributedHeader(in, log);
 
-            if (header.rows)
+            if (distributed_header.rows)
             {
-                total_rows += header.rows;
-                total_bytes += header.bytes;
-                sample_block_structure = header.header;
+                total_rows += distributed_header.rows;
+                total_bytes += distributed_header.bytes;
             }
-            else
+
+            if (distributed_header.block_header)
+                header = distributed_header.block_header;
+
+            if (!total_rows || !header)
             {
+                LOG_TRACE(log, "Processing batch {} with old format (no header/rows)", in.getFileName());
+
                 CompressedReadBuffer decompressing_in(in);
                 NativeBlockInputStream block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION);
                 block_in.readPrefix();
@@ -833,8 +875,8 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
                     total_rows += block.rows();
                     total_bytes += block.bytes();
 
-                    if (sample_block_structure.empty())
-                        sample_block_structure = block.cloneEmpty().dumpStructure();
+                    if (!header)
+                        header = block.cloneEmpty();
                 }
                 block_in.readSuffix();
             }
@@ -850,7 +892,12 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
                 throw;
         }
 
-        BatchHeader batch_header(std::move(header.insert_settings), std::move(header.insert_query), std::move(header.client_info), std::move(sample_block_structure));
+        BatchHeader batch_header(
+            std::move(distributed_header.insert_settings),
+            std::move(distributed_header.insert_query),
+            std::move(distributed_header.client_info),
+            std::move(header)
+        );
         Batch & batch = header_to_batch.try_emplace(batch_header, *this, files).first->second;
 
         batch.file_indices.push_back(file_idx);
diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index f8ba4221842..5fd778b8063 100644
--- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -679,7 +679,13 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std::
             context.getClientInfo().write(header_buf, DBMS_TCP_PROTOCOL_VERSION);
             writeVarUInt(block.rows(), header_buf);
             writeVarUInt(block.bytes(), header_buf);
-            writeStringBinary(block.cloneEmpty().dumpStructure(), header_buf);
+            writeStringBinary(block.cloneEmpty().dumpStructure(), header_buf); /// obsolete
+            /// Write block header separately in the batch header.
+            /// It is required for checking does conversion is required or not.
+            {
+                NativeBlockOutputStream header_stream{header_buf, DBMS_TCP_PROTOCOL_VERSION, block.cloneEmpty()};
+                header_stream.write(block.cloneEmpty());
+            }
 
             /// Add new fields here, for example:
             /// writeVarUInt(my_new_data, header_buf);
diff --git a/tests/queries/0_stateless/01790_dist_INSERT_block_structure_mismatch_types_and_names.reference b/tests/queries/0_stateless/01790_dist_INSERT_block_structure_mismatch_types_and_names.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01790_dist_INSERT_block_structure_mismatch_types_and_names.sql b/tests/queries/0_stateless/01790_dist_INSERT_block_structure_mismatch_types_and_names.sql
new file mode 100644
index 00000000000..e921460ccfc
--- /dev/null
+++ b/tests/queries/0_stateless/01790_dist_INSERT_block_structure_mismatch_types_and_names.sql
@@ -0,0 +1,22 @@
+DROP TABLE IF EXISTS tmp_01781;
+DROP TABLE IF EXISTS dist_01781;
+
+SET prefer_localhost_replica=0;
+
+CREATE TABLE tmp_01781 (n LowCardinality(String)) ENGINE=Memory;
+CREATE TABLE dist_01781 (n LowCardinality(String)) Engine=Distributed(test_cluster_two_shards, currentDatabase(), tmp_01781, cityHash64(n));
+
+SET insert_distributed_sync=1;
+INSERT INTO dist_01781 VALUES ('1'),('2');
+-- different LowCardinality size
+INSERT INTO dist_01781 SELECT * FROM numbers(1000);
+
+SET insert_distributed_sync=0;
+SYSTEM STOP DISTRIBUTED SENDS dist_01781;
+INSERT INTO dist_01781 VALUES ('1'),('2');
+-- different LowCardinality size
+INSERT INTO dist_01781 SELECT * FROM numbers(1000);
+SYSTEM FLUSH DISTRIBUTED dist_01781;
+
+DROP TABLE tmp_01781;
+DROP TABLE dist_01781;
diff --git a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference
new file mode 100644
index 00000000000..3bba1ac23c0
--- /dev/null
+++ b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference
@@ -0,0 +1,6 @@
+<Warning> DistributedBlockOutputStream: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done.
+<Warning> DistributedBlockOutputStream: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done.
+1
+1
+2
+2
diff --git a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.sh b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.sh
new file mode 100755
index 00000000000..e989696da03
--- /dev/null
+++ b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+# NOTE: this is a partial copy of the 01683_dist_INSERT_block_structure_mismatch,
+# but this test also checks the log messages
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 -nm -q "
+    DROP TABLE IF EXISTS tmp_01683;
+    DROP TABLE IF EXISTS dist_01683;
+
+    CREATE TABLE tmp_01683 (n Int8) ENGINE=Memory;
+    CREATE TABLE dist_01683 (n UInt64) Engine=Distributed(test_cluster_two_shards, currentDatabase(), tmp_01683, n);
+
+    SET insert_distributed_sync=1;
+    INSERT INTO dist_01683 VALUES (1),(2);
+
+    SET insert_distributed_sync=0;
+    INSERT INTO dist_01683 VALUES (1),(2);
+    SYSTEM FLUSH DISTRIBUTED dist_01683;
+
+    -- TODO: cover distributed_directory_monitor_batch_inserts=1
+
+    SELECT * FROM tmp_01683 ORDER BY n;
+
+    DROP TABLE tmp_01683;
+    DROP TABLE dist_01683;
+" |& sed 's/^.*</</g'
diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 2fdad900589..601f496bb13 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -225,3 +225,5 @@
 01702_system_query_log
 01759_optimize_skip_unused_shards_zero_shards
 01780_clickhouse_dictionary_source_loop
+01790_dist_INSERT_block_structure_mismatch_types_and_names
+01791_dist_INSERT_block_structure_mismatch

From ea563b193c089788bf5df21b217cd57b09f47c7f Mon Sep 17 00:00:00 2001
From: Denis Krivak <dokrivak@avito.ru>
Date: Tue, 6 Apr 2021 10:16:36 +0300
Subject: [PATCH 076/152] Fix grammar in docs

---
 docs/en/sql-reference/statements/system.md | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index 2348a2a2668..919bd65d56b 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -169,7 +169,7 @@ SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name
 ### STOP TTL MERGES {#query_language-stop-ttl-merges}
 
 Provides possibility to stop background delete old data according to [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) for tables in the MergeTree family:
-Return `Ok.` even table doesn’t exists or table have not MergeTree engine. Return error when database doesn’t exists:
+Returns `Ok.` even if table doesn’t exist or table has not MergeTree engine. Returns error when database doesn’t exist:
 
 ``` sql
 SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
@@ -178,7 +178,7 @@ SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
 ### START TTL MERGES {#query_language-start-ttl-merges}
 
 Provides possibility to start background delete old data according to [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) for tables in the MergeTree family:
-Return `Ok.` even table doesn’t exists. Return error when database doesn’t exists:
+Returns `Ok.` even if table doesn’t exist. Returns error when database doesn’t exist:
 
 ``` sql
 SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
@@ -187,7 +187,7 @@ SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
 ### STOP MOVES {#query_language-stop-moves}
 
 Provides possibility to stop background move data according to [TTL table expression with TO VOLUME or TO DISK clause](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
-Return `Ok.` even table doesn’t exists. Return error when database doesn’t exists:
+Returns `Ok.` even if table doesn’t exist. Returns error when database doesn’t exist:
 
 ``` sql
 SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
@@ -196,7 +196,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
 ### START MOVES {#query_language-start-moves}
 
 Provides possibility to start background move data according to [TTL table expression with TO VOLUME and TO DISK clause](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
-Return `Ok.` even table doesn’t exists. Return error when database doesn’t exists:
+Returns `Ok.` even if table doesn’t exist. Returns error when database doesn’t exist:
 
 ``` sql
 SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
@@ -209,7 +209,7 @@ ClickHouse can manage background replication related processes in [ReplicatedMer
 ### STOP FETCHES {#query_language-system-stop-fetches}
 
 Provides possibility to stop background fetches for inserted parts for tables in the `ReplicatedMergeTree` family:
-Always returns `Ok.` regardless of the table engine and even table or database doesn’t exists.
+Always returns `Ok.` regardless of the table engine and even if table or database doesn’t exist.
 
 ``` sql
 SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
@@ -218,7 +218,7 @@ SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
 ### START FETCHES {#query_language-system-start-fetches}
 
 Provides possibility to start background fetches for inserted parts for tables in the `ReplicatedMergeTree` family:
-Always returns `Ok.` regardless of the table engine and even table or database doesn’t exists.
+Always returns `Ok.` regardless of the table engine and even if table or database doesn’t exist.
 
 ``` sql
 SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
@@ -264,7 +264,7 @@ Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a
 SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name
 ```
 
-After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from 
+After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from
 the common replicated log into its own replication queue, and then the query waits till the replica processes all
 of the fetched commands.
 
@@ -280,4 +280,3 @@ SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
 ### RESTART REPLICAS {#query_language-system-restart-replicas}
 
 Provides possibility to reinitialize Zookeeper sessions state for all `ReplicatedMergeTree` tables, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed
-

From 0cbfa5d125f808bf319264edc8221ae49ebff506 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 6 Apr 2021 11:19:28 +0300
Subject: [PATCH 077/152] Updated test

---
 tests/performance/direct_dictionary.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/direct_dictionary.xml b/tests/performance/direct_dictionary.xml
index dc7732ebb98..e827ea0a76f 100644
--- a/tests/performance/direct_dictionary.xml
+++ b/tests/performance/direct_dictionary.xml
@@ -113,7 +113,7 @@
         FORMAT Null;
     </query>
     <query>
-        SELECT dictGet('default.simple_key_direct_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), (number, toString(number)))
+        SELECT dictGet('default.complex_key_direct_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), (number, toString(number)))
         FROM system.numbers
         LIMIT {elements_count}
         FORMAT Null;

From 735154c81aeb6e5fe86e718cb2cbec5918d53b7c Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Tue, 6 Apr 2021 12:29:29 +0300
Subject: [PATCH 078/152] Handle not plain where tree in StorageMerge
 modifySelect

---
 src/Interpreters/CrossToInnerJoinVisitor.cpp  | 50 -------------------
 src/Interpreters/IdentifierSemantic.cpp       | 20 ++++++++
 src/Interpreters/IdentifierSemantic.h         |  3 ++
 src/Storages/StorageMerge.cpp                 | 36 ++++++-------
 ..._merge_engine_join_key_condition.reference |  1 +
 .../01783_merge_engine_join_key_condition.sql |  1 +
 6 files changed, 39 insertions(+), 72 deletions(-)

diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp
index c8195706f04..b6f977cd9b5 100644
--- a/src/Interpreters/CrossToInnerJoinVisitor.cpp
+++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp
@@ -81,56 +81,6 @@ private:
     ASTTableJoin * join = nullptr;
 };
 
-/// Collect all identifiers from ast
-class IdentifiersCollector
-{
-public:
-    using ASTIdentPtr = const ASTIdentifier *;
-    using ASTIdentifiers = std::vector<ASTIdentPtr>;
-    struct Data
-    {
-        ASTIdentifiers idents;
-    };
-
-    static void visit(const ASTPtr & node, Data & data)
-    {
-        if (const auto * ident = node->as<ASTIdentifier>())
-            data.idents.push_back(ident);
-    }
-
-    static bool needChildVisit(const ASTPtr &, const ASTPtr &)
-    {
-        return true;
-    }
-
-    static ASTIdentifiers collect(const ASTPtr & node)
-    {
-        IdentifiersCollector::Data ident_data;
-        ConstInDepthNodeVisitor<IdentifiersCollector, true> ident_visitor(ident_data);
-        ident_visitor.visit(node);
-        return ident_data.idents;
-    }
-};
-
-/// Split expression `expr_1 AND expr_2 AND ... AND expr_n` into vector `[expr_1, expr_2, ..., expr_n]`
-void collectConjunctions(const ASTPtr & node, std::vector<ASTPtr> & members)
-{
-    if (const auto * func = node->as<ASTFunction>(); func && func->name == NameAnd::name)
-    {
-        for (const auto & child : func->arguments->children)
-            collectConjunctions(child, members);
-        return;
-    }
-    members.push_back(node);
-}
-
-std::vector<ASTPtr> collectConjunctions(const ASTPtr & node)
-{
-    std::vector<ASTPtr> members;
-    collectConjunctions(node, members);
-    return members;
-}
-
 bool isAllowedToRewriteCrossJoin(const ASTPtr & node, const Aliases & aliases)
 {
     if (node->as<ASTFunction>())
diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp
index ad5598afb5b..ff1fbbc8e2d 100644
--- a/src/Interpreters/IdentifierSemantic.cpp
+++ b/src/Interpreters/IdentifierSemantic.cpp
@@ -3,6 +3,8 @@
 #include <Interpreters/IdentifierSemantic.h>
 #include <Interpreters/StorageID.h>
 
+#include <Parsers/ASTFunction.h>
+
 namespace DB
 {
 
@@ -313,4 +315,22 @@ std::optional<size_t> IdentifierMembershipCollector::getIdentsMembership(ASTPtr
     return IdentifierSemantic::getIdentsMembership(ast, tables, aliases);
 }
 
+static void collectConjunctions(const ASTPtr & node, std::vector<ASTPtr> & members)
+{
+    if (const auto * func = node->as<ASTFunction>(); func && func->name == "and")
+    {
+        for (const auto & child : func->arguments->children)
+            collectConjunctions(child, members);
+        return;
+    }
+    members.push_back(node);
+}
+
+std::vector<ASTPtr> collectConjunctions(const ASTPtr & node)
+{
+    std::vector<ASTPtr> members;
+    collectConjunctions(node, members);
+    return members;
+}
+
 }
diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h
index 3a99150b792..9f11d8bdb9d 100644
--- a/src/Interpreters/IdentifierSemantic.h
+++ b/src/Interpreters/IdentifierSemantic.h
@@ -107,4 +107,7 @@ private:
     Aliases aliases;
 };
 
+/// Split expression `expr_1 AND expr_2 AND ... AND expr_n` into vector `[expr_1, expr_2, ..., expr_n]`
+std::vector<ASTPtr> collectConjunctions(const ASTPtr & node);
+
 }
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index df176bd3bcf..7730ef98c93 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -71,31 +71,23 @@ TreeRewriterResult modifySelect(ASTSelectQuery & select, const TreeRewriterResul
                 return;
 
             const size_t left_table_pos = 0;
-            if (const auto * conjunctions = where->as<ASTFunction>(); conjunctions && conjunctions->name == "and")
+            /// Test each argument of `and` function and select ones related to only left table
+            std::shared_ptr<ASTFunction> new_conj = makeASTFunction("and");
+            for (const auto & node : collectConjunctions(where))
             {
-                /// Test each argument of `and` function and select related to only left table
-                std::shared_ptr<ASTFunction> new_conj = makeASTFunction("and");
-                for (const auto & node : conjunctions->arguments->children)
-                {
-                    if (membership_collector.getIdentsMembership(node) == left_table_pos)
-                        new_conj->arguments->children.push_back(std::move(node));
-                }
-                if (new_conj->arguments->children.empty())
-                    /// No identifiers from left table
-                    query.setExpression(expr, {});
-                else if (new_conj->arguments->children.size() == 1)
-                    /// Only one expression, lift from `and`
-                    query.setExpression(expr, std::move(new_conj->arguments->children[0]));
-                else
-                    /// Set new expression
-                    query.setExpression(expr, std::move(new_conj));
+                if (membership_collector.getIdentsMembership(node) == left_table_pos)
+                    new_conj->arguments->children.push_back(std::move(node));
             }
+
+            if (new_conj->arguments->children.empty())
+                /// No identifiers from left table
+                query.setExpression(expr, {});
+            else if (new_conj->arguments->children.size() == 1)
+                /// Only one expression, lift from `and`
+                query.setExpression(expr, std::move(new_conj->arguments->children[0]));
             else
-            {
-                /// Remove whole expression if not match to left table
-                if (membership_collector.getIdentsMembership(where) != left_table_pos)
-                    query.setExpression(expr, {});
-            }
+                /// Set new expression
+                query.setExpression(expr, std::move(new_conj));
         };
         replace_where(select,ASTSelectQuery::Expression::WHERE);
         replace_where(select,ASTSelectQuery::Expression::PREWHERE);
diff --git a/tests/queries/0_stateless/01783_merge_engine_join_key_condition.reference b/tests/queries/0_stateless/01783_merge_engine_join_key_condition.reference
index 9f7c2e7ee16..4068a6e00dd 100644
--- a/tests/queries/0_stateless/01783_merge_engine_join_key_condition.reference
+++ b/tests/queries/0_stateless/01783_merge_engine_join_key_condition.reference
@@ -2,3 +2,4 @@
 1	4
 1	4
 1	4
+1	4
diff --git a/tests/queries/0_stateless/01783_merge_engine_join_key_condition.sql b/tests/queries/0_stateless/01783_merge_engine_join_key_condition.sql
index 97a5f2f0ef7..115ee42fe11 100644
--- a/tests/queries/0_stateless/01783_merge_engine_join_key_condition.sql
+++ b/tests/queries/0_stateless/01783_merge_engine_join_key_condition.sql
@@ -14,6 +14,7 @@ SET force_primary_key = 1;
 
 SELECT * FROM foo_merge WHERE Val = 3 AND Id = 3;
 SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND Id = 3 AND t2.X == 4 GROUP BY X;
+SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND (Id = 3 AND t2.X == 4) GROUP BY X;
 SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND Id = 3 GROUP BY X;
 SELECT count(), X FROM (SELECT * FROM foo_merge) f JOIN t2 USING Val WHERE Val = 3 AND Id = 3 GROUP BY X;
 

From 6456a1507c3bf5170926a7a701d65c17a640ee89 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 6 Apr 2021 13:14:44 +0300
Subject: [PATCH 079/152] Support alter setting

---
 .../MergeTree/MergeTreeDeduplicationLog.cpp   | 86 +++++++++++++++----
 .../MergeTree/MergeTreeDeduplicationLog.h     | 18 +++-
 src/Storages/MergeTree/MergeTreeSettings.h    |  1 +
 src/Storages/StorageMergeTree.cpp             | 37 +++++---
 src/Storages/StorageMergeTree.h               |  4 +
 .../01781_merge_tree_deduplication.reference  | 35 ++++++++
 .../01781_merge_tree_deduplication.sql        | 82 ++++++++++++++++++
 7 files changed, 231 insertions(+), 32 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index 9cbffe977c4..0022a40eca3 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -80,13 +80,17 @@ MergeTreeDeduplicationLog::MergeTreeDeduplicationLog(
     , rotate_interval(deduplication_window_ * 2) /// actually it doesn't matter
     , format_version(format_version_)
     , deduplication_map(deduplication_window)
-{}
+{
+    namespace fs = std::filesystem;
+    if (deduplication_window != 0 && !fs::exists(logs_dir))
+        fs::create_directories(logs_dir);
+}
 
 void MergeTreeDeduplicationLog::load()
 {
     namespace fs = std::filesystem;
     if (!fs::exists(logs_dir))
-        fs::create_directories(logs_dir);
+        return;
 
     for (const auto & p : fs::directory_iterator(logs_dir))
     {
@@ -95,26 +99,33 @@ void MergeTreeDeduplicationLog::load()
         existing_logs[log_number] = {path, 0};
     }
 
-    /// Order important, we load history from the begging to the end
-    for (auto & [log_number, desc] : existing_logs)
+    /// We should know which logs are exist even in case
+    /// of deduplication_window = 0
+    if (!existing_logs.empty())
+        current_log_number = existing_logs.rbegin()->first;
+
+    if (deduplication_window != 0)
     {
-        try
+        /// Order important, we load history from the begging to the end
+        for (auto & [log_number, desc] : existing_logs)
         {
-            desc.entries_count = loadSingleLog(desc.path);
-            current_log_number = log_number;
-        }
-        catch (...)
-        {
-            tryLogCurrentException(__PRETTY_FUNCTION__, "Error while loading MergeTree deduplication log on path " + desc.path);
+            try
+            {
+                desc.entries_count = loadSingleLog(desc.path);
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__, "Error while loading MergeTree deduplication log on path " + desc.path);
+            }
         }
+
+        /// Start new log, drop previous
+        rotateAndDropIfNeeded();
+
+        /// Can happen in case we have unfinished log
+        if (!current_writer)
+            current_writer = std::make_unique<WriteBufferFromFile>(existing_logs.rbegin()->second.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
     }
-
-    /// Start new log, drop previous
-    rotateAndDropIfNeeded();
-
-    /// Can happen in case we have unfinished log
-    if (!current_writer)
-        current_writer = std::make_unique<WriteBufferFromFile>(existing_logs.rbegin()->second.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
 }
 
 size_t MergeTreeDeduplicationLog::loadSingleLog(const std::string & path)
@@ -137,6 +148,10 @@ size_t MergeTreeDeduplicationLog::loadSingleLog(const std::string & path)
 
 void MergeTreeDeduplicationLog::rotate()
 {
+    /// We don't deduplicate anything so we don't need any writers
+    if (deduplication_window == 0)
+        return;
+
     current_log_number++;
     auto new_path = getLogPath(logs_dir, current_log_number);
     MergeTreeDeduplicationLogNameDescription log_description{new_path, 0};
@@ -169,7 +184,7 @@ void MergeTreeDeduplicationLog::dropOutdatedLogs()
     /// If we found some logs to drop
     if (remove_from_value != 0)
     {
-        /// Go from beginning to the end and drop all outdated logs
+        /// Go from the beginning to the end and drop all outdated logs
         for (auto itr = existing_logs.begin(); itr != existing_logs.end();)
         {
             size_t number = itr->first;
@@ -196,6 +211,13 @@ std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std:
 {
     std::lock_guard lock(state_mutex);
 
+    /// We support zero case because user may want to disable deduplication with
+    /// ALTER MODIFY SETTING query. It's much more simplier to handle zero case
+    /// here then destroy whole object, check for null pointer from different
+    /// threads and so on.
+    if (deduplication_window == 0)
+        return std::make_pair(part_info, true);
+
     /// If we already have this block let's deduplicate it
     if (deduplication_map.contains(block_id))
     {
@@ -226,6 +248,13 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf
 {
     std::lock_guard lock(state_mutex);
 
+    /// We support zero case because user may want to disable deduplication with
+    /// ALTER MODIFY SETTING query. It's much more simplier to handle zero case
+    /// here then destroy whole object, check for null pointer from different
+    /// threads and so on.
+    if (deduplication_window == 0)
+        return;
+
     assert(current_writer != nullptr);
 
     for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); /* no increment here, we erasing from map */)
@@ -260,4 +289,23 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf
     }
 }
 
+void MergeTreeDeduplicationLog::setDeduplicationWindowSize(size_t deduplication_window_)
+{
+    std::lock_guard lock(state_mutex);
+
+    deduplication_window = deduplication_window_;
+    rotate_interval = deduplication_window * 2;
+
+    /// If settings was set for the first time with ALTER MODIFY SETTING query
+    if (deduplication_window != 0 && !std::filesystem::exists(logs_dir))
+        std::filesystem::create_directories(logs_dir);
+
+    deduplication_map.setMaxSize(deduplication_window);
+    rotateAndDropIfNeeded();
+
+    /// Can happen in case we have unfinished log
+    if (!current_writer)
+        current_writer = std::make_unique<WriteBufferFromFile>(existing_logs.rbegin()->second.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
+}
+
 }
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
index 643b2ef9fad..281a76050a2 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.h
@@ -39,7 +39,7 @@ private:
 
     Queue queue;
     IndexMap map;
-    const size_t max_size;
+    size_t max_size;
 public:
     using iterator = typename Queue::iterator;
     using const_iterator = typename Queue::const_iterator;
@@ -65,6 +65,16 @@ public:
         return queue.size();
     }
 
+    void setMaxSize(size_t max_size_)
+    {
+        max_size = max_size_;
+        while (size() > max_size)
+        {
+            map.erase(queue.front().key);
+            queue.pop_front();
+        }
+    }
+
     bool erase(const std::string & key)
     {
         auto it = map.find(key);
@@ -139,14 +149,16 @@ public:
 
     /// Load history from disk. Ignores broken logs.
     void load();
+
+    void setDeduplicationWindowSize(size_t deduplication_window_);
 private:
     const std::string logs_dir;
     /// Size of deduplication window
-    const size_t deduplication_window;
+    size_t deduplication_window;
 
     /// How often we create new logs. Not very important,
     /// default value equals deduplication_window * 2
-    const size_t rotate_interval;
+    size_t rotate_interval;
     const MergeTreeDataFormatVersion format_version;
 
     /// Current log number. Always growing number.
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 06d909eb912..f422f00f4dc 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -2,6 +2,7 @@
 
 #include <Core/Defines.h>
 #include <Core/BaseSettings.h>
+#include <Storages/MergeTree/MergeTreeDataFormatVersion.h>
 
 
 namespace Poco::Util
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 5f8032d7749..eeb8df4d329 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -94,16 +94,7 @@ StorageMergeTree::StorageMergeTree(
 
     loadMutations();
 
-    auto settings = getSettings();
-    if (settings->non_replicated_deduplication_window != 0)
-    {
-        if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
-            throw Exception("Deduplication for non-replicated MergeTree in old syntax is not supported", ErrorCodes::BAD_ARGUMENTS);
-
-        std::string path = getDataPaths()[0] + "/deduplication_logs";
-        deduplication_log = std::make_unique<MergeTreeDeduplicationLog>(path, settings->non_replicated_deduplication_window, format_version);
-        deduplication_log->load();
-    }
+    loadDeduplicationLog();
 }
 
 
@@ -276,6 +267,7 @@ void StorageMergeTree::alter(
     TableLockHolder & table_lock_holder)
 {
     auto table_id = getStorageID();
+    auto old_storage_settings = getSettings();
 
     StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
     StorageInMemoryMetadata old_metadata = getInMemoryMetadata();
@@ -310,6 +302,21 @@ void StorageMergeTree::alter(
         if (!maybe_mutation_commands.empty())
             waitForMutation(mutation_version, mutation_file_name);
     }
+
+    {
+        /// Some additional changes in settings
+        auto new_storage_settings = getSettings();
+
+        if (old_storage_settings->non_replicated_deduplication_window != new_storage_settings->non_replicated_deduplication_window)
+        {
+            /// We cannot place this check into settings sanityCheck because it depends on format_version.
+            /// sanityCheck must work event without storage.
+            if (new_storage_settings->non_replicated_deduplication_window != 0 && format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
+                throw Exception("Deduplication for non-replicated MergeTree in old syntax is not supported", ErrorCodes::BAD_ARGUMENTS);
+
+            deduplication_log->setDeduplicationWindowSize(new_storage_settings->non_replicated_deduplication_window);
+        }
+    }
 }
 
 
@@ -625,6 +632,16 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id)
     return CancellationCode::CancelSent;
 }
 
+void StorageMergeTree::loadDeduplicationLog()
+{
+    auto settings = getSettings();
+    if (settings->non_replicated_deduplication_window != 0 && format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
+        throw Exception("Deduplication for non-replicated MergeTree in old syntax is not supported", ErrorCodes::BAD_ARGUMENTS);
+
+    std::string path = getDataPaths()[0] + "/deduplication_logs";
+    deduplication_log = std::make_unique<MergeTreeDeduplicationLog>(path, settings->non_replicated_deduplication_window, format_version);
+    deduplication_log->load();
+}
 
 void StorageMergeTree::loadMutations()
 {
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index 3ac7b8a0270..1ae21608190 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -134,6 +134,10 @@ private:
 
     void loadMutations();
 
+    /// Load and initialize deduplication logs. Even if deduplication setting
+    /// equals zero creates object with deduplication window equals zero.
+    void loadDeduplicationLog();
+
     /** Determines what parts should be merged and merges it.
       * If aggressive - when selects parts don't takes into account their ratio size and novelty (used for OPTIMIZE query).
       * Returns true if merge is finished successfully.
diff --git a/tests/queries/0_stateless/01781_merge_tree_deduplication.reference b/tests/queries/0_stateless/01781_merge_tree_deduplication.reference
index 402a8919da5..cb5a3f1ff52 100644
--- a/tests/queries/0_stateless/01781_merge_tree_deduplication.reference
+++ b/tests/queries/0_stateless/01781_merge_tree_deduplication.reference
@@ -48,3 +48,38 @@
 88	11	11
 77	11	11
 77	12	12
+===============
+1	1	33
+1	1	33
+2	2	33
+3	3	33
+===============
+1	1	33
+1	1	33
+1	1	33
+1	1	33
+2	2	33
+3	3	33
+===============
+1	1	33
+1	1	33
+1	1	33
+1	1	33
+1	1	33
+2	2	33
+3	3	33
+===============
+1	1	44
+2	2	44
+3	3	44
+4	4	44
+===============
+1	1
+1	1
+===============
+1	1
+1	1
+1	1
+2	2
+3	3
+4	4
diff --git a/tests/queries/0_stateless/01781_merge_tree_deduplication.sql b/tests/queries/0_stateless/01781_merge_tree_deduplication.sql
index 7e4b6f7db2b..236f7b35b80 100644
--- a/tests/queries/0_stateless/01781_merge_tree_deduplication.sql
+++ b/tests/queries/0_stateless/01781_merge_tree_deduplication.sql
@@ -102,4 +102,86 @@ INSERT INTO merge_tree_deduplication (key, value) VALUES (12, '12'); -- not dedu
 
 SELECT part, key, value FROM merge_tree_deduplication ORDER BY key;
 
+-- Alters....
+
+ALTER TABLE merge_tree_deduplication MODIFY SETTING non_replicated_deduplication_window = 2;
+
+SELECT '===============';
+
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (1, '1', 33);
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (2, '2', 33);
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (3, '3', 33);
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (1, '1', 33);
+
+SELECT * FROM merge_tree_deduplication WHERE part = 33 ORDER BY key;
+
+SELECT '===============';
+
+ALTER TABLE merge_tree_deduplication MODIFY SETTING non_replicated_deduplication_window = 0;
+
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (1, '1', 33);
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (1, '1', 33);
+
+DETACH TABLE merge_tree_deduplication;
+ATTACH TABLE merge_tree_deduplication;
+
+SELECT * FROM merge_tree_deduplication WHERE part = 33 ORDER BY key;
+
+SELECT '===============';
+
+ALTER TABLE merge_tree_deduplication MODIFY SETTING non_replicated_deduplication_window = 3;
+
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (1, '1', 33);
+
+SELECT * FROM merge_tree_deduplication WHERE part = 33 ORDER BY key;
+
+SELECT '===============';
+
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (1, '1', 44);
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (2, '2', 44);
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (3, '3', 44);
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (1, '1', 44);
+
+INSERT INTO merge_tree_deduplication (key, value, part) VALUES (4, '4', 44);
+
+DETACH TABLE merge_tree_deduplication;
+ATTACH TABLE merge_tree_deduplication;
+
+SELECT * FROM merge_tree_deduplication WHERE part = 44 ORDER BY key;
+
 DROP TABLE IF EXISTS merge_tree_deduplication;
+
+SELECT '===============';
+
+DROP TABLE IF EXISTS merge_tree_no_deduplication;
+
+CREATE TABLE merge_tree_no_deduplication
+(
+    key UInt64,
+    value String
+)
+ENGINE=MergeTree()
+ORDER BY key;
+
+INSERT INTO merge_tree_no_deduplication (key, value) VALUES (1, '1');
+INSERT INTO merge_tree_no_deduplication (key, value) VALUES (1, '1');
+
+SELECT * FROM merge_tree_no_deduplication ORDER BY key;
+
+SELECT '===============';
+
+ALTER TABLE merge_tree_no_deduplication MODIFY SETTING non_replicated_deduplication_window = 3;
+
+INSERT INTO merge_tree_no_deduplication (key, value) VALUES (1, '1');
+INSERT INTO merge_tree_no_deduplication (key, value) VALUES (2, '2');
+INSERT INTO merge_tree_no_deduplication (key, value) VALUES (3, '3');
+
+DETACH TABLE merge_tree_no_deduplication;
+ATTACH TABLE merge_tree_no_deduplication;
+
+INSERT INTO merge_tree_no_deduplication (key, value) VALUES (1, '1');
+INSERT INTO merge_tree_no_deduplication (key, value) VALUES (4, '4');
+
+SELECT * FROM merge_tree_no_deduplication ORDER BY key;
+
+DROP TABLE IF EXISTS merge_tree_no_deduplication;

From 53a7836a9b2ea10c49421af9f91e7b5967305b3e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 6 Apr 2021 13:16:34 +0300
Subject: [PATCH 080/152] Fix typos

---
 src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index 0022a40eca3..33960e2e1ff 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -212,7 +212,7 @@ std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std:
     std::lock_guard lock(state_mutex);
 
     /// We support zero case because user may want to disable deduplication with
-    /// ALTER MODIFY SETTING query. It's much more simplier to handle zero case
+    /// ALTER MODIFY SETTING query. It's much more simpler to handle zero case
     /// here then destroy whole object, check for null pointer from different
     /// threads and so on.
     if (deduplication_window == 0)
@@ -249,7 +249,7 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf
     std::lock_guard lock(state_mutex);
 
     /// We support zero case because user may want to disable deduplication with
-    /// ALTER MODIFY SETTING query. It's much more simplier to handle zero case
+    /// ALTER MODIFY SETTING query. It's much more simpler to handle zero case
     /// here then destroy whole object, check for null pointer from different
     /// threads and so on.
     if (deduplication_window == 0)

From af19f892732960efc2f2f1b51ae2eeb8fc2632cf Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Tue, 6 Apr 2021 13:41:48 +0300
Subject: [PATCH 081/152] MemoryStorage sync comments and code

---
 src/Storages/StorageMemory.cpp | 6 +-----
 src/Storages/StorageMemory.h   | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index d98cd4212e9..5e4185e280b 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -26,10 +26,6 @@ class MemorySource : public SourceWithProgress
 {
     using InitializerFunc = std::function<void(std::shared_ptr<const Blocks> &)>;
 public:
-    /// Blocks are stored in std::list which may be appended in another thread.
-    /// We use pointer to the beginning of the list and its current size.
-    /// We don't need synchronisation in this reader, because while we hold SharedLock on storage,
-    /// only new elements can be added to the back of the list, so our iterators remain valid
 
     MemorySource(
         Names column_names_,
@@ -59,7 +55,7 @@ protected:
 
         size_t current_index = getAndIncrementExecutionIndex();
 
-        if (current_index >= data->size())
+        if (!data || current_index >= data->size())
         {
             return {};
         }
diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h
index b7fa4d7b222..bf009c40553 100644
--- a/src/Storages/StorageMemory.h
+++ b/src/Storages/StorageMemory.h
@@ -97,7 +97,7 @@ public:
     void delayReadForGlobalSubqueries() { delay_read_for_global_subqueries = true; }
 
 private:
-    /// MultiVersion data storage, so that we can copy the list of blocks to readers.
+    /// MultiVersion data storage, so that we can copy the vector of blocks to readers.
 
     MultiVersion<Blocks> data;
 

From a9a01df74ab9fd5e14edce90afeb836b6f4aa67b Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 6 Apr 2021 14:38:35 +0300
Subject: [PATCH 082/152] add test

---
 tests/queries/0_stateless/01787_map_remote.reference | 2 ++
 tests/queries/0_stateless/01787_map_remote.sql       | 1 +
 2 files changed, 3 insertions(+)
 create mode 100644 tests/queries/0_stateless/01787_map_remote.reference
 create mode 100644 tests/queries/0_stateless/01787_map_remote.sql

diff --git a/tests/queries/0_stateless/01787_map_remote.reference b/tests/queries/0_stateless/01787_map_remote.reference
new file mode 100644
index 00000000000..1c488d4418e
--- /dev/null
+++ b/tests/queries/0_stateless/01787_map_remote.reference
@@ -0,0 +1,2 @@
+{'a':1,'b':2}
+{'a':1,'b':2}
diff --git a/tests/queries/0_stateless/01787_map_remote.sql b/tests/queries/0_stateless/01787_map_remote.sql
new file mode 100644
index 00000000000..854eafa0a50
--- /dev/null
+++ b/tests/queries/0_stateless/01787_map_remote.sql
@@ -0,0 +1 @@
+SELECT map('a', 1, 'b', 2) FROM remote('127.0.0.{1,2}', system, one);
\ No newline at end of file

From 06e7a9f8f24d3eab1ecbaaf2a2491c703d6083b1 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 6 Apr 2021 14:43:27 +0300
Subject: [PATCH 083/152] fix window functions with multiple input streams and
 no sorting

---
 programs/client/QueryFuzzer.cpp             | 29 ++++++++++-----------
 src/Interpreters/InterpreterSelectQuery.cpp |  1 +
 src/Parsers/ExpressionElementParsers.cpp    | 12 ++++-----
 src/Processors/QueryPlan/WindowStep.cpp     |  5 ++++
 4 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/programs/client/QueryFuzzer.cpp b/programs/client/QueryFuzzer.cpp
index 0c8dc0731f9..6243e2c82ec 100644
--- a/programs/client/QueryFuzzer.cpp
+++ b/programs/client/QueryFuzzer.cpp
@@ -37,34 +37,33 @@ namespace ErrorCodes
 
 Field QueryFuzzer::getRandomField(int type)
 {
+    static constexpr Int64 bad_int64_values[]
+        = {-2, -1, 0, 1, 2, 3, 7, 10, 100, 255, 256, 257, 1023, 1024,
+           1025, 65535, 65536, 65537, 1024 * 1024 - 1, 1024 * 1024,
+           1024 * 1024 + 1, INT_MIN - 1ll, INT_MIN, INT_MIN + 1,
+           INT_MAX - 1, INT_MAX, INT_MAX + 1ll, INT64_MIN, INT64_MIN + 1,
+           INT64_MAX - 1, INT64_MAX};
     switch (type)
     {
     case 0:
     {
-        static constexpr Int64 values[]
-                = {-2, -1, 0, 1, 2, 3, 7, 10, 100, 255, 256, 257, 1023, 1024,
-                   1025, 65535, 65536, 65537, 1024 * 1024 - 1, 1024 * 1024,
-                   1024 * 1024 + 1, INT64_MIN, INT64_MAX};
-        return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
+        return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values)
+                / sizeof(*bad_int64_values))];
     }
     case 1:
     {
         static constexpr float values[]
-                = {NAN, INFINITY, -INFINITY, 0., 0.0001, 0.5, 0.9999,
-                   1., 1.0001, 2., 10.0001, 100.0001, 1000.0001};
-        return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
+                = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999,
+                   1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20,
+                  FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
     }
     case 2:
     {
-        static constexpr Int64 values[]
-                = {-2, -1, 0, 1, 2, 3, 7, 10, 100, 255, 256, 257, 1023, 1024,
-                   1025, 65535, 65536, 65537, 1024 * 1024 - 1, 1024 * 1024,
-                   1024 * 1024 + 1, INT64_MIN, INT64_MAX};
         static constexpr UInt64 scales[] = {0, 1, 2, 10};
         return DecimalField<Decimal64>(
-                    values[fuzz_rand() % (sizeof(values) / sizeof(*values))],
-                scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]
-                );
+            bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values)
+                / sizeof(*bad_int64_values))],
+            scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]);
     }
     default:
         assert(false);
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index b4f64528471..5ddb250c9a2 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1624,6 +1624,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
         && !query.limitBy()
         && query.limitLength()
         && !query_analyzer->hasAggregation()
+        && !query_analyzer->hasWindow()
         && limit_length <= std::numeric_limits<UInt64>::max() - limit_offset
         && limit_length + limit_offset < max_block_size)
     {
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index a54573432a1..30c5ce4fb68 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -592,11 +592,11 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
             // We can easily get a UINT64_MAX here, which doesn't even fit into
             // int64_t. Not sure what checks we are going to need here after we
             // support floats and dates.
-            if (node->frame.begin_offset > INT_MAX || node->frame.begin_offset < INT_MIN)
+            if (node->frame.begin_offset > INT_MAX)
             {
                 throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                    "Frame offset must be between {} and {}, but {} is given",
-                    INT_MAX, INT_MIN, node->frame.begin_offset);
+                    "Frame offset must be less than {}, but {} is given",
+                    INT_MAX, node->frame.begin_offset);
             }
 
             if (node->frame.begin_offset < 0)
@@ -661,11 +661,11 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
                 node->frame.end_offset = value.get<Int64>();
                 node->frame.end_type = WindowFrame::BoundaryType::Offset;
 
-                if (node->frame.end_offset > INT_MAX || node->frame.end_offset < INT_MIN)
+                if (node->frame.end_offset > INT_MAX)
                 {
                     throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "Frame offset must be between {} and {}, but {} is given",
-                        INT_MAX, INT_MIN, node->frame.end_offset);
+                        "Frame offset must be less than {}, but {} is given",
+                        INT_MAX, node->frame.end_offset);
                 }
 
                 if (node->frame.end_offset < 0)
diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index 76191eba51a..66c329acb4b 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -64,6 +64,11 @@ WindowStep::WindowStep(const DataStream & input_stream_,
 
 void WindowStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &)
 {
+    // This resize is needed for cases such as `over ()` when we don't have a
+    // sort node, and the input might have multiple streams. The sort node would
+    // have resized it.
+    pipeline.resize(1);
+
     pipeline.addSimpleTransform([&](const Block & /*header*/)
     {
         return std::make_shared<WindowTransform>(input_header,

From 4497b00ff58dc2b1eb605f2997b2cd4433fea62c Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 6 Apr 2021 14:57:32 +0300
Subject: [PATCH 084/152] the test

---
 .../0_stateless/01568_window_functions_distributed.reference | 5 +++++
 .../0_stateless/01568_window_functions_distributed.sql       | 4 ++++
 2 files changed, 9 insertions(+)
 create mode 100644 tests/queries/0_stateless/01568_window_functions_distributed.reference
 create mode 100644 tests/queries/0_stateless/01568_window_functions_distributed.sql

diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.reference b/tests/queries/0_stateless/01568_window_functions_distributed.reference
new file mode 100644
index 00000000000..b441189303d
--- /dev/null
+++ b/tests/queries/0_stateless/01568_window_functions_distributed.reference
@@ -0,0 +1,5 @@
+-- { echo }
+set allow_experimental_window_functions = 1;
+select row_number() over (order by dummy) from (select * from remote('127.0.0.{1,2}', system, one));
+1
+2
diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.sql b/tests/queries/0_stateless/01568_window_functions_distributed.sql
new file mode 100644
index 00000000000..754b996e00c
--- /dev/null
+++ b/tests/queries/0_stateless/01568_window_functions_distributed.sql
@@ -0,0 +1,4 @@
+-- { echo }
+set allow_experimental_window_functions = 1;
+
+select row_number() over (order by dummy) from (select * from remote('127.0.0.{1,2}', system, one));

From a40209e84d078ab6a26e7366f0513afba3803978 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 6 Apr 2021 15:25:15 +0300
Subject: [PATCH 085/152] Remove strange fsync on coordination logs rotation

---
 src/Coordination/Changelog.cpp    | 4 ----
 src/Coordination/KeeperServer.cpp | 6 +++---
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 227192fdf8a..871a7f8734a 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -357,10 +357,6 @@ void Changelog::readChangelogAndInitWriter(size_t last_commited_log_index, size_
 
 void Changelog::rotate(size_t new_start_log_index)
 {
-    //// doesn't exist on init
-    if (current_writer)
-        current_writer->flush();
-
     ChangelogFileDescription new_description;
     new_description.prefix = DEFAULT_PREFIX;
     new_description.from_log_index = new_start_log_index;
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index c7446c296f0..5d08c2825f5 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -192,15 +192,15 @@ bool KeeperServer::isLeaderAlive() const
 
 nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */)
 {
+    if (initialized_flag)
+        return nuraft::cb_func::ReturnCode::Ok;
+
     size_t last_commited = state_machine->last_commit_index();
     size_t next_index = state_manager->getLogStore()->next_slot();
     bool commited_store = false;
     if (next_index < last_commited || next_index - last_commited <= 1)
         commited_store = true;
 
-    if (initialized_flag)
-        return nuraft::cb_func::ReturnCode::Ok;
-
     auto set_initialized = [this] ()
     {
         std::unique_lock lock(initialized_mutex);

From f9b1661372f94b42aee66993d9fcbefa946c64ce Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 6 Apr 2021 15:44:06 +0300
Subject: [PATCH 086/152] fix test 01702_system_query_log

---
 .../01702_system_query_log.reference          | 26 +++++++------------
 .../0_stateless/01702_system_query_log.sql    | 26 +++++++------------
 2 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/tests/queries/0_stateless/01702_system_query_log.reference b/tests/queries/0_stateless/01702_system_query_log.reference
index 6d8908249bf..1f329feac22 100644
--- a/tests/queries/0_stateless/01702_system_query_log.reference
+++ b/tests/queries/0_stateless/01702_system_query_log.reference
@@ -42,23 +42,17 @@ Alter	ALTER TABLE sqllt.table DROP COLUMN the_new_col;
 Alter	ALTER TABLE sqllt.table UPDATE i = i + 1 WHERE 1;
 Alter	ALTER TABLE sqllt.table DELETE WHERE i > 65535;
 Select	-- not done, seems to hard, so I\'ve skipped queries of ALTER-X, where X is:\n-- PARTITION\n-- ORDER BY\n-- SAMPLE BY\n-- INDEX\n-- CONSTRAINT\n-- TTL\n-- USER\n-- QUOTA\n-- ROLE\n-- ROW POLICY\n-- SETTINGS PROFILE\n\nSELECT \'SYSTEM queries\';
-System	SYSTEM RELOAD EMBEDDED DICTIONARIES;
-System	SYSTEM RELOAD DICTIONARIES;
-System	SYSTEM DROP DNS CACHE;
-System	SYSTEM DROP MARK CACHE;
-System	SYSTEM DROP UNCOMPRESSED CACHE;
 System	SYSTEM FLUSH LOGS;
-System	SYSTEM RELOAD CONFIG;
-System	SYSTEM STOP MERGES;
-System	SYSTEM START MERGES;
-System	SYSTEM STOP TTL MERGES;
-System	SYSTEM START TTL MERGES;
-System	SYSTEM STOP MOVES;
-System	SYSTEM START MOVES;
-System	SYSTEM STOP FETCHES;
-System	SYSTEM START FETCHES;
-System	SYSTEM STOP REPLICATED SENDS;
-System	SYSTEM START REPLICATED SENDS;
+System	SYSTEM STOP MERGES sqllt.table
+System	SYSTEM START MERGES sqllt.table
+System	SYSTEM STOP TTL MERGES sqllt.table
+System	SYSTEM START TTL MERGES sqllt.table
+System	SYSTEM STOP MOVES sqllt.table
+System	SYSTEM START MOVES sqllt.table
+System	SYSTEM STOP FETCHES sqllt.table
+System	SYSTEM START FETCHES sqllt.table
+System	SYSTEM STOP REPLICATED SENDS sqllt.table
+System	SYSTEM START REPLICATED SENDS sqllt.table
 Select	-- SYSTEM RELOAD DICTIONARY sqllt.dictionary; -- temporary out of order: Code: 210, Connection refused (localhost:9001) (version 21.3.1.1)\n-- DROP REPLICA\n-- haha, no\n-- SYSTEM KILL;\n-- SYSTEM SHUTDOWN;\n\n-- Since we don\'t really care about the actual output, suppress it with `FORMAT Null`.\nSELECT \'SHOW queries\';
 	SHOW CREATE TABLE sqllt.table FORMAT Null;
 	SHOW CREATE DICTIONARY sqllt.dictionary FORMAT Null;
diff --git a/tests/queries/0_stateless/01702_system_query_log.sql b/tests/queries/0_stateless/01702_system_query_log.sql
index 5c3de9cf912..e3ebf97edb7 100644
--- a/tests/queries/0_stateless/01702_system_query_log.sql
+++ b/tests/queries/0_stateless/01702_system_query_log.sql
@@ -64,23 +64,17 @@ ALTER TABLE sqllt.table DELETE WHERE i > 65535;
 -- SETTINGS PROFILE
 
 SELECT 'SYSTEM queries';
-SYSTEM RELOAD EMBEDDED DICTIONARIES;
-SYSTEM RELOAD DICTIONARIES;
-SYSTEM DROP DNS CACHE;
-SYSTEM DROP MARK CACHE;
-SYSTEM DROP UNCOMPRESSED CACHE;
 SYSTEM FLUSH LOGS;
-SYSTEM RELOAD CONFIG;
-SYSTEM STOP MERGES;
-SYSTEM START MERGES;
-SYSTEM STOP TTL MERGES;
-SYSTEM START TTL MERGES;
-SYSTEM STOP MOVES;
-SYSTEM START MOVES;
-SYSTEM STOP FETCHES;
-SYSTEM START FETCHES;
-SYSTEM STOP REPLICATED SENDS;
-SYSTEM START REPLICATED SENDS;
+SYSTEM STOP MERGES sqllt.table;
+SYSTEM START MERGES sqllt.table;
+SYSTEM STOP TTL MERGES sqllt.table;
+SYSTEM START TTL MERGES sqllt.table;
+SYSTEM STOP MOVES sqllt.table;
+SYSTEM START MOVES sqllt.table;
+SYSTEM STOP FETCHES sqllt.table;
+SYSTEM START FETCHES sqllt.table;
+SYSTEM STOP REPLICATED SENDS sqllt.table;
+SYSTEM START REPLICATED SENDS sqllt.table;
 
 -- SYSTEM RELOAD DICTIONARY sqllt.dictionary; -- temporary out of order: Code: 210, Connection refused (localhost:9001) (version 21.3.1.1)
 -- DROP REPLICA

From d913f327ed546758e9e7fe7e63c300d0740a8295 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 6 Apr 2021 16:42:38 +0300
Subject: [PATCH 087/152] Buildable code

---
 src/Server/InterserverIOHTTPHandler.cpp | 4 ++--
 src/Server/InterserverIOHTTPHandler.h   | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index fcc26988f48..11d0beb21c2 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -104,7 +104,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
 
     try
     {
-        if (checkAuthentication(request))
+        if (auto [message, success] = checkAuthentication(request); success)
         {
             processQuery(request, response, used_output);
             used_output.out->finalize();
@@ -121,7 +121,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
     {
         if (e.code() == ErrorCodes::WRONG_PASSWORD)
         {
-            response.setStatusAndReason(Poco::Net::HTTPServerResponse::HTTP_UNAUTHORIZED);
+            response.setStatusAndReason(HTTPServerResponse::HTTP_UNAUTHORIZED);
             if (!response.sent())
                 writeString("Unauthorized.", *used_output.out);
             LOG_WARNING(log, "Query processing failed request: '{}' authentication failed", request.getURI());
diff --git a/src/Server/InterserverIOHTTPHandler.h b/src/Server/InterserverIOHTTPHandler.h
index 3701d3dd0f6..e1e5f0d6bc2 100644
--- a/src/Server/InterserverIOHTTPHandler.h
+++ b/src/Server/InterserverIOHTTPHandler.h
@@ -47,10 +47,9 @@ private:
 
     void processQuery(HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output);
 
-    bool checkAuthentication(Poco::Net::HTTPServerRequest & request) const;
+    std::pair<String, bool> checkAuthentication(HTTPServerRequest & request) const;
     const std::string default_user;
     const std::string default_password;
-    std::pair<String, bool> checkAuthentication(HTTPServerRequest & request) const;
 };
 
 }

From 4633afe96281f3fcef57ce0c10aae347bf9b363c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 6 Apr 2021 16:56:14 +0300
Subject: [PATCH 088/152] Some fixes

---
 src/Interpreters/InterserverCredentials.cpp |  2 +-
 src/Server/InterserverIOHTTPHandler.cpp     | 27 ++++++++++++++-------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp
index bfe11ca9521..141b8b681aa 100644
--- a/src/Interpreters/InterserverCredentials.cpp
+++ b/src/Interpreters/InterserverCredentials.cpp
@@ -54,7 +54,7 @@ std::pair<String, bool> ConfigInterserverCredentials::isValidUser(const std::pai
 {
     const auto & valid = store.find(credentials);
     if (valid == store.end())
-        throw Exception("Incorrect user or password in HTTP basic authentication: " + credentials.first, ErrorCodes::WRONG_PASSWORD);
+        return {"Incorrect user or password in HTTP basic authentication: " + credentials.first, false};
     return {"", true};
 }
 
diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index 11d0beb21c2..f85dac7d4c3 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -27,18 +27,27 @@ namespace ErrorCodes
 
 std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(HTTPServerRequest & request) const
 {
-    auto creds = server.context().getInterserverCredential();
-    if (!request.hasCredentials())
-        return creds->isValidUser(std::make_pair(default_user, default_password));
+    auto server_credentials = server.context().getInterserverCredential();
+    if (server_credentials)
+    {
+        if (!request.hasCredentials())
+            return server_credentials->isValidUser(std::make_pair(default_user, default_password));
 
-    String scheme, info;
-    request.getCredentials(scheme, info);
+        String scheme, info;
+        request.getCredentials(scheme, info);
 
-    if (scheme != "Basic")
-        throw Exception("Server requires HTTP Basic authentication but client provides another method", ErrorCodes::NOT_IMPLEMENTED);
+        if (scheme != "Basic")
+            return {"Server requires HTTP Basic authentication but client provides another method", false};
 
-    Poco::Net::HTTPBasicCredentials credentials(info);
-    return creds->isValidUser(std::make_pair(credentials.getUsername(), credentials.getPassword()));
+        Poco::Net::HTTPBasicCredentials credentials(info);
+        return server_credentials->isValidUser(std::make_pair(credentials.getUsername(), credentials.getPassword()));
+    }
+    else if (request.hasCredentials())
+    {
+        return {"Client requires HTTP Basic authentication, but server doesn't provide it", false};
+    }
+
+    return {"", true};
 }
 
 void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output)

From b4fc13de8e85d63449d68e14692e744885f5c988 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 6 Apr 2021 17:02:56 +0300
Subject: [PATCH 089/152] fix style

---
 src/Interpreters/InterserverCredentials.cpp | 1 -
 src/Server/InterserverIOHTTPHandler.cpp     | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp
index 141b8b681aa..f1c4b1923c6 100644
--- a/src/Interpreters/InterserverCredentials.cpp
+++ b/src/Interpreters/InterserverCredentials.cpp
@@ -6,7 +6,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NO_ELEMENTS_IN_CONFIG;
-    extern const int WRONG_PASSWORD;
 }
 
 std::shared_ptr<ConfigInterserverCredentials>
diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index f85dac7d4c3..a62a2db0a10 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -20,7 +20,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int ABORTED;
-    extern const int NOT_IMPLEMENTED;
     extern const int TOO_MANY_SIMULTANEOUS_QUERIES;
     extern const int WRONG_PASSWORD;
 }

From 80d441ec9536dd93b3db3d03434dfb684f3d45a2 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 6 Apr 2021 19:11:29 +0300
Subject: [PATCH 090/152] Reset timeouts to default

---
 tests/config/config.d/database_replicated.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/config/config.d/database_replicated.xml b/tests/config/config.d/database_replicated.xml
index c2e62f9645a..1e6871a525d 100644
--- a/tests/config/config.d/database_replicated.xml
+++ b/tests/config/config.d/database_replicated.xml
@@ -19,8 +19,8 @@
         <server_id>1</server_id>
 
         <coordination_settings>
-            <operation_timeout_ms>5000</operation_timeout_ms>
-            <session_timeout_ms>10000</session_timeout_ms>
+            <operation_timeout_ms>10000</operation_timeout_ms>
+            <session_timeout_ms>30000</session_timeout_ms>
             <heart_beat_interval_ms>1000</heart_beat_interval_ms>
             <election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
             <election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>

From d9ad06bd6d301476d637849af69b9e1118c24af8 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 6 Apr 2021 19:24:56 +0300
Subject: [PATCH 091/152] fix ubsan

---
 src/Interpreters/WindowDescription.cpp        | 32 +++++++++++++++++++
 src/Parsers/ExpressionElementParsers.cpp      | 24 --------------
 src/Processors/Transforms/WindowTransform.cpp | 14 ++++++--
 .../01591_window_functions.reference          | 10 ++++--
 .../0_stateless/01591_window_functions.sql    | 11 +++++--
 5 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp
index a97ef41204a..05d75d4647e 100644
--- a/src/Interpreters/WindowDescription.cpp
+++ b/src/Interpreters/WindowDescription.cpp
@@ -86,6 +86,38 @@ void WindowFrame::toString(WriteBuffer & buf) const
 
 void WindowFrame::checkValid() const
 {
+    // Check the validity of offsets.
+    if (type == WindowFrame::FrameType::Rows
+        || type == WindowFrame::FrameType::Groups)
+    {
+        if (begin_type == BoundaryType::Offset
+            && !((begin_offset.getType() == Field::Types::UInt64
+                    || begin_offset.getType() == Field::Types::Int64)
+                && begin_offset.get<Int64>() >= 0
+                && begin_offset.get<Int64>() < INT_MAX))
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given.",
+                toString(type),
+                applyVisitor(FieldVisitorToString(), begin_offset),
+                Field::Types::toString(begin_offset.getType()));
+        }
+
+        if (end_type == BoundaryType::Offset
+            && !((end_offset.getType() == Field::Types::UInt64
+                    || end_offset.getType() == Field::Types::Int64)
+                && end_offset.get<Int64>() >= 0
+                && end_offset.get<Int64>() < INT_MAX))
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given.",
+                toString(type),
+                applyVisitor(FieldVisitorToString(), end_offset),
+                Field::Types::toString(end_offset.getType()));
+        }
+    }
+
+    // Check relative positioning of offsets.
     // UNBOUNDED PRECEDING end and UNBOUNDED FOLLOWING start should have been
     // forbidden at the parsing level.
     assert(!(begin_type == BoundaryType::Unbounded && !begin_preceding));
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 84c178790b2..3e635b2accc 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -580,18 +580,6 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
         else if (parser_literal.parse(pos, ast_literal, expected))
         {
             const Field & value = ast_literal->as<ASTLiteral &>().value;
-            if ((node->frame.type == WindowFrame::FrameType::Rows
-                    || node->frame.type == WindowFrame::FrameType::Groups)
-                && !(value.getType() == Field::Types::UInt64
-                     || (value.getType() == Field::Types::Int64
-                            && value.get<Int64>() >= 0)))
-            {
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                    "Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.",
-                    WindowFrame::toString(node->frame.type),
-                    applyVisitor(FieldVisitorToString(), value),
-                    Field::Types::toString(value.getType()));
-            }
             node->frame.begin_offset = value;
             node->frame.begin_type = WindowFrame::BoundaryType::Offset;
         }
@@ -641,18 +629,6 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
             else if (parser_literal.parse(pos, ast_literal, expected))
             {
                 const Field & value = ast_literal->as<ASTLiteral &>().value;
-                if ((node->frame.type == WindowFrame::FrameType::Rows
-                        || node->frame.type == WindowFrame::FrameType::Groups)
-                    && !(value.getType() == Field::Types::UInt64
-                         || (value.getType() == Field::Types::Int64
-                                && value.get<Int64>() >= 0)))
-                {
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.",
-                        WindowFrame::toString(node->frame.type),
-                        applyVisitor(FieldVisitorToString(), value),
-                        Field::Types::toString(value.getType()));
-                }
                 node->frame.end_offset = value;
                 node->frame.end_type = WindowFrame::BoundaryType::Offset;
             }
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 16d028f0fc1..414e45dab38 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -257,10 +257,9 @@ WindowTransform::WindowTransform(const Block & input_header_,
         const IColumn * column = entry.column.get();
         APPLY_FOR_TYPES(compareValuesWithOffset)
 
-        // Check that the offset type matches the window type.
         // Convert the offsets to the ORDER BY column type. We can't just check
-        // that it matches, because e.g. the int literals are always (U)Int64,
-        // but the column might be Int8 and so on.
+        // that the type matches, because e.g. the int literals are always
+        // (U)Int64, but the column might be Int8 and so on.
         if (window_description.frame.begin_type
             == WindowFrame::BoundaryType::Offset)
         {
@@ -435,6 +434,9 @@ auto WindowTransform::moveRowNumberNoCheck(const RowNumber & _x, int offset) con
             assertValid(x);
             assert(offset <= 0);
 
+            // abs(offset) is less than INT_MAX, as checked in the parser, so
+            // this negation should always work.
+            assert(offset >= -INT_MAX);
             if (x.row >= static_cast<uint64_t>(-offset))
             {
                 x.row -= -offset;
@@ -1500,6 +1502,12 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction
                     "The offset for function {} must be nonnegative, {} given",
                     getName(), offset);
             }
+            if (offset > INT_MAX)
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "The offset for function {} must be less than {}, {} given",
+                    getName(), INT_MAX, offset);
+            }
         }
 
         const auto [target_row, offset_left] = transform->moveRowNumber(
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 9067ee8d955..afc20f67406 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -942,8 +942,9 @@ FROM numbers(2)
 ;
 1	0
 1	1
--- optimize_read_in_order conflicts with sorting for window functions, must
--- be disabled.
+-- optimize_read_in_order conflicts with sorting for window functions, check that
+-- it is disabled.
+drop table if exists window_mt;
 create table window_mt engine MergeTree order by number
     as select number, mod(number, 3) p from numbers(100);
 select number, count(*) over (partition by p)
@@ -1096,7 +1097,7 @@ select count() over (order by toInt64(number) range between -1 preceding and unb
 select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 }
 select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 }
 select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 }
----- a test with aggregate function that allocates memory in arena
+-- a test with aggregate function that allocates memory in arena
 select sum(a[length(a)])
 from (
     select groupArray(number) over (partition by modulo(number, 11)
@@ -1104,3 +1105,6 @@ from (
     from numbers_mt(10000)
 ) settings max_block_size = 7;
 49995000
+-- -INT_MIN row offset that can lead to problems with negation, found when fuzzing
+-- under UBSan. Should be limited to at most INT_MAX.
+select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 }
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 85856dd797d..44b0bba0b27 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -329,8 +329,9 @@ SELECT
 FROM numbers(2)
 ;
 
--- optimize_read_in_order conflicts with sorting for window functions, must
--- be disabled.
+-- optimize_read_in_order conflicts with sorting for window functions, check that
+-- it is disabled.
+drop table if exists window_mt;
 create table window_mt engine MergeTree order by number
     as select number, mod(number, 3) p from numbers(100);
 
@@ -402,10 +403,14 @@ select count() over (order by toInt64(number) range between -1 following and unb
 select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 }
 select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 }
 
----- a test with aggregate function that allocates memory in arena
+-- a test with aggregate function that allocates memory in arena
 select sum(a[length(a)])
 from (
     select groupArray(number) over (partition by modulo(number, 11)
             order by modulo(number, 1111), number) a
     from numbers_mt(10000)
 ) settings max_block_size = 7;
+
+-- -INT_MIN row offset that can lead to problems with negation, found when fuzzing
+-- under UBSan. Should be limited to at most INT_MAX.
+select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 }

From 95881bf2ddc62b30e61a8eca967183fa71e434be Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 6 Apr 2021 19:31:24 +0300
Subject: [PATCH 092/152] Fix duplicated headers

---
 src/Server/InterserverIOHTTPHandler.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Server/InterserverIOHTTPHandler.h b/src/Server/InterserverIOHTTPHandler.h
index e1e5f0d6bc2..b85343f2abf 100644
--- a/src/Server/InterserverIOHTTPHandler.h
+++ b/src/Server/InterserverIOHTTPHandler.h
@@ -1,8 +1,5 @@
 #pragma once
 
-#include <memory>
-#include <string>
-#include <Poco/Logger.h>
 #include <Server/HTTP/HTTPRequestHandler.h>
 #include <Common/CurrentMetrics.h>
 #include <Interpreters/InterserverCredentials.h>
@@ -10,6 +7,7 @@
 #include <Poco/Logger.h>
 
 #include <memory>
+#include <string>
 
 
 namespace CurrentMetrics

From f968f6bab69214000cef1307fd7865b47bc771f8 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 6 Apr 2021 19:46:24 +0300
Subject: [PATCH 093/152] return prlimit for gcc, it was not its fault

---
 CMakeLists.txt | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index acda71752ae..0d385f704ee 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -78,15 +78,20 @@ include (cmake/find/ccache.cmake)
 # Another way would be to use --ccache-skip option before clang++-11 to make
 # ccache ignore it.
 option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile too long or to take too much memory while compiling." OFF)
-# gcc10/gcc10/clang -fsanitize=memory is too heavy
-if (ENABLE_CHECK_HEAVY_BUILDS AND NOT (SANITIZE STREQUAL "memory" OR COMPILER_GCC))
+if (ENABLE_CHECK_HEAVY_BUILDS)
     # set DATA (since RSS does not work since 2.6.x+) to 2G
     set (RLIMIT_DATA 5000000000)
     # set VIRT (RLIMIT_AS) to 10G (DATA*10)
     set (RLIMIT_AS 10000000000)
     # set CPU time limit to 600 seconds
     set (RLIMIT_CPU 600)
-    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --verbose --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=${RLIMIT_CPU} ${CMAKE_CXX_COMPILER_LAUNCHER})
+
+    # gcc10/gcc10/clang -fsanitize=memory is too heavy
+    if (SANITIZE STREQUAL "memory" OR COMPILER_GCC)
+       set (RLIMIT_DATA 10000000000)
+    endif()
+
+    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=${RLIMIT_CPU} ${CMAKE_CXX_COMPILER_LAUNCHER})
 endif ()
 
 if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None")

From e7e17adc64b92914dd103938628b24d5941ddcf3 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Tue, 6 Apr 2021 19:46:47 +0300
Subject: [PATCH 094/152] Update CHANGELOG.md

---
 CHANGELOG.md | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 163 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8590fefa66d..af5c6aa4a33 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,166 @@
+## ClickHouse release 21.4
+
+### ClickHouse release 21.4.1 2021-04-02
+
+#### Backward Incompatible Change
+
+* The `toStartOfIntervalFunction` will align hour intervals to the midnight (in previous versions they were aligned to the start of unix epoch). For example, `toStartOfInterval(x, INTERVAL 11 HOUR)` will split every day into three intervals: `00:00:00..10:59:59`, `11:00:00..21:59:59` and `22:00:00..23:59:59`. This behaviour is more suited for practical needs. This closes [#9510](https://github.com/ClickHouse/ClickHouse/issues/9510). [#22060](https://github.com/ClickHouse/ClickHouse/pull/22060) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix `cutToFirstSignificantSubdomainCustom()`/`firstSignificantSubdomainCustom()` returning wrong result for 3+ level domains present in custom top-level domain list. For input domains matching these custom top-level domains, the third-level domain was considered to be the first significant one. This is now fixed. This change may introduce incompatibility if the function is used in e.g. the sharding key. [#21946](https://github.com/ClickHouse/ClickHouse/pull/21946) ([Azat Khuzhin](https://github.com/azat)).
+* Column `keys` in table `system.dictionaries` was replaced to columns  `key.names` and `key.types`. Columns `key.names`, `key.types`, `attribute.names`, `attribute.types` from `system.dictionaries` table does not require dictionary to be loaded. [#21884](https://github.com/ClickHouse/ClickHouse/pull/21884) ([Maksim Kita](https://github.com/kitaisreal)).
+* Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)).
+
+#### New Feature
+
+* Added function `dictGetOrNull`. It works like `dictGet`, but return `Null` in case key was not found in dictionary. Closes [#22375](https://github.com/ClickHouse/ClickHouse/issues/22375). [#22413](https://github.com/ClickHouse/ClickHouse/pull/22413) ([Maksim Kita](https://github.com/kitaisreal)).
+* Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)).
+* Add `prefer_column_name_to_alias` setting to use original column names instead of aliases. it is needed to be more compatible with common databases' aliasing rules. This is for [#9715](https://github.com/ClickHouse/ClickHouse/issues/9715) and [#9887](https://github.com/ClickHouse/ClickHouse/issues/9887). [#22044](https://github.com/ClickHouse/ClickHouse/pull/22044) ([Amos Bird](https://github.com/amosbird)).
+* Add function `timezoneOf` that returns the timezone name of `DateTime` or `DateTime64` data types. This does not close [#9959](https://github.com/ClickHouse/ClickHouse/issues/9959). Fix inconsistencies in function names: add aliases `timezone` and `timeZone` as well as `toTimezone` and `toTimeZone` and `timezoneOf` and `timeZoneOf`. [#22001](https://github.com/ClickHouse/ClickHouse/pull/22001) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Added table function `dictionary`. It works the same way as `Dictionary` engine. Closes [#21560](https://github.com/ClickHouse/ClickHouse/issues/21560). [#21910](https://github.com/ClickHouse/ClickHouse/pull/21910) ([Maksim Kita](https://github.com/kitaisreal)).
+* Support `Nullable` type for `PolygonDictionary` attribute. [#21890](https://github.com/ClickHouse/ClickHouse/pull/21890) ([Maksim Kita](https://github.com/kitaisreal)).
+* Functions `dictGet`, `dictHas` use current database name if it is not specified for dictionaries created with DDL. Closes [#21632](https://github.com/ClickHouse/ClickHouse/issues/21632). [#21859](https://github.com/ClickHouse/ClickHouse/pull/21859) ([Maksim Kita](https://github.com/kitaisreal)).
+* Add `ctime` option to `zookeeper-dump-tree`. It allows to dump node creation time. [#21842](https://github.com/ClickHouse/ClickHouse/pull/21842) ([Ilya](https://github.com/HumanUser)).
+* Add new optional clause `GRANTEES` for `CREATE/ALTER USER` commands. It specifies users or roles which are allowed to receive grants from this user on condition this user has also all required access granted with grant option. By default `GRANTEES ANY` is used which means a user with grant option can grant to anyone. Syntax: `CREATE USER ... GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]`. [#21641](https://github.com/ClickHouse/ClickHouse/pull/21641) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Add option `--backslash` for `clickhouse-format`, which can add a backslash at the end of each line of the formatted query. [#21494](https://github.com/ClickHouse/ClickHouse/pull/21494) ([flynn](https://github.com/ucasFL)).
+* Add new column `slowdowns_count` to `system.clusters`. When using hedged requests, it shows how many times we switched to another replica because this replica was responding slowly. Also show actual value of `errors_count` in `system.clusters`. [#21480](https://github.com/ClickHouse/ClickHouse/pull/21480) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add `_partition_id` virtual column for `MergeTree*` engines. Allow to prune partitions by `_partition_id`. Add `partitionID()` function to calculate partition id string. [#21401](https://github.com/ClickHouse/ClickHouse/pull/21401) ([Amos Bird](https://github.com/amosbird)).
+* Add function `isIPAddressInRange` to test if an IPv4 or IPv6 address is contained in a given CIDR network prefix. [#21329](https://github.com/ClickHouse/ClickHouse/pull/21329) ([PHO](https://github.com/depressed-pho)).
+* Added `ExecutablePool` dictionary source. Close [#14528](https://github.com/ClickHouse/ClickHouse/issues/14528). [#21321](https://github.com/ClickHouse/ClickHouse/pull/21321) ([Maksim Kita](https://github.com/kitaisreal)).
+* Added new SQL command `ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name'`. This command is needed to properly remove 'freezed' partitions from all disks. [#21142](https://github.com/ClickHouse/ClickHouse/pull/21142) ([Pavel Kovalenko](https://github.com/Jokser)).
+* Added `Grant,` `Revoke` and `System` values of `query_kind` column for corresponding queries in `system.query_log`. [#21102](https://github.com/ClickHouse/ClickHouse/pull/21102) ([Vasily Nemkov](https://github.com/Enmk)).
+* Added async update in `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for `Nullable` type in `Cache`, `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for multiple attributes fetch with `dictGet`, `dictGetOrDefault` functions. Fixes [#21517](https://github.com/ClickHouse/ClickHouse/issues/21517). [#20595](https://github.com/ClickHouse/ClickHouse/pull/20595) ([Maksim Kita](https://github.com/kitaisreal)).
+* Allow customizing timeouts for http connections used for replication independently from other http timeouts. [#20088](https://github.com/ClickHouse/ClickHouse/pull/20088) ([nvartolomei](https://github.com/nvartolomei)).
+* Supports implicit key type conversion for JOIN. [#19885](https://github.com/ClickHouse/ClickHouse/pull/19885) ([Vladimir](https://github.com/vdimir)).
+* Support `dictHas` function for `RangeHashedDictionary`. Fixes [#6680](https://github.com/ClickHouse/ClickHouse/issues/6680). [#19816](https://github.com/ClickHouse/ClickHouse/pull/19816) ([Maksim Kita](https://github.com/kitaisreal)).
+* Zero-copy replication for `ReplicatedMergeTree` over S3 storage. [#16240](https://github.com/ClickHouse/ClickHouse/pull/16240) ([ianton-ru](https://github.com/ianton-ru)).
+* Extended range of `DateTime64` to properly support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([Vasily Nemkov](https://github.com/Enmk)).
+* Support ThetaSketch to do set operations. [#22207](https://github.com/ClickHouse/ClickHouse/pull/22207) ([Ping Yu](https://github.com/pingyu)).
+
+#### Performance Improvement
+
+* Enable read with mmap IO for file ranges from 64 MiB (the settings `min_bytes_to_use_mmap_io`). It may lead to moderate performance improvement. [#22326](https://github.com/ClickHouse/ClickHouse/pull/22326) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Add cache for files read with `min_bytes_to_use_mmap_io` setting. It makes significant (2x and more) performance improvement when the value of the setting is small by avoiding frequent mmap/munmap calls and the consequent page faults. Note that mmap IO has major drawbacks that makes it less reliable in production (e.g. hung or SIGBUS on faulty disks; less controllable memory usage). Nevertheless it is good in benchmarks. [#22206](https://github.com/ClickHouse/ClickHouse/pull/22206) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Avoid unnecessary data copy when using codec `NONE`. Please note that codec `NONE` is mostly useless - it's recommended to always use compression (`LZ4` is by default). Despite the common belief, disabling compression may not improve performance (the opposite effect is possible). The `NONE` codec is useful in some cases: - when data is uncompressable; - for synthetic benchmarks. [#22145](https://github.com/ClickHouse/ClickHouse/pull/22145) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Faster `GROUP BY` with small `max_rows_to_group_by` and `group_by_overflow_mode='any'`. [#21856](https://github.com/ClickHouse/ClickHouse/pull/21856) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Optimize performance of queries like `SELECT ... FINAL ... WHERE`. Now in queries with `FINAL` it's allowed to move to `PREWHERE` columns, which are in sorting key. [#21830](https://github.com/ClickHouse/ClickHouse/pull/21830) ([foolchi](https://github.com/foolchi)).
+* Supported parallel formatting in `clickhouse-local` and everywhere else. [#21630](https://github.com/ClickHouse/ClickHouse/pull/21630) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Improved performance by replacing `memcpy` to another implementation. This closes [#18583](https://github.com/ClickHouse/ClickHouse/issues/18583). [#21520](https://github.com/ClickHouse/ClickHouse/pull/21520) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Support parallel parsing for `CSVWithNames` and `TSVWithNames` formats. This closes [#21085](https://github.com/ClickHouse/ClickHouse/issues/21085). [#21149](https://github.com/ClickHouse/ClickHouse/pull/21149) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+
+#### Improvement
+
+* Better exception message in client in case of exception while server is writing blocks. In previous versions client may get misleading message like `Data compressed with different methods`. [#22427](https://github.com/ClickHouse/ClickHouse/pull/22427) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix error `Directory tmp_fetch_XXX already exists` which could happen after failed fetch part. Delete temporary fetch directory if it already exists. Fixes [#14197](https://github.com/ClickHouse/ClickHouse/issues/14197). [#22411](https://github.com/ClickHouse/ClickHouse/pull/22411) ([nvartolomei](https://github.com/nvartolomei)).
+* Fix MSan report for function `range` with `UInt256` argument (support for large integers is experimental). This closes [#22157](https://github.com/ClickHouse/ClickHouse/issues/22157). [#22387](https://github.com/ClickHouse/ClickHouse/pull/22387) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Add `current_database` column to `system.processes` table. It contains the current database of the query. [#22365](https://github.com/ClickHouse/ClickHouse/pull/22365) ([Alexander Kuzmenkov](https://github.com/akuzm)).
+* Add case-insensitive history search/navigation and subword movement features to `clickhouse-client`. [#22105](https://github.com/ClickHouse/ClickHouse/pull/22105) ([Amos Bird](https://github.com/amosbird)).
+* Added possibility to migrate existing S3 disk to the schema with backup-restore capabilities. [#22070](https://github.com/ClickHouse/ClickHouse/pull/22070) ([Pavel Kovalenko](https://github.com/Jokser)).
+* If tuple of NULLs, e.g. `(NULL, NULL)` is on the left hand side of `IN` operator with tuples of non-NULLs on the right hand side, e.g. `SELECT (NULL, NULL) IN ((0, 0), (3, 1))` return 0 instead of throwing an exception about incompatible types. The expression may also appear due to optimization of something like `SELECT (NULL, NULL) = (8, 0) OR (NULL, NULL) = (3, 2) OR (NULL, NULL) = (0, 0) OR (NULL, NULL) = (3, 1)`. This closes [#22017](https://github.com/ClickHouse/ClickHouse/issues/22017). [#22063](https://github.com/ClickHouse/ClickHouse/pull/22063) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Convert `system.errors.stack_trace` from `String` into `Array(UInt64)` (This should decrease overhead for the errors collecting). [#22058](https://github.com/ClickHouse/ClickHouse/pull/22058) ([Azat Khuzhin](https://github.com/azat)).
+* Update used version of simdjson to 0.9.1. This fixes [#21984](https://github.com/ClickHouse/ClickHouse/issues/21984). [#22057](https://github.com/ClickHouse/ClickHouse/pull/22057) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Added case insensitive aliases for `CONNECTION_ID()` and `VERSION()` functions. This fixes [#22028](https://github.com/ClickHouse/ClickHouse/issues/22028). [#22042](https://github.com/ClickHouse/ClickHouse/pull/22042) ([Eugene Klimov](https://github.com/Slach)).
+* Add option `strict_increase` to `windowFunnel` function to calculate each event once (resolve [#21835](https://github.com/ClickHouse/ClickHouse/issues/21835)). [#22025](https://github.com/ClickHouse/ClickHouse/pull/22025) ([Vladimir](https://github.com/vdimir)).
+* If partition key of a `MergeTree` table does not include `Date` or `DateTime` columns but includes exactly one `DateTime64` column, expose its values in the `min_time` and `max_time` columns in `system.parts` and `system.parts_columns` tables. Add `min_time` and `max_time` columns to `system.parts_columns` table (these was inconsistency to the `system.parts` table). This closes [#18244](https://github.com/ClickHouse/ClickHouse/issues/18244). [#22011](https://github.com/ClickHouse/ClickHouse/pull/22011) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Supported `replication_alter_partitions_sync=1` setting for moving partitions from helping table to destination. Decreased default timeouts. Fixes [#21911](https://github.com/ClickHouse/ClickHouse/issues/21911). [#21912](https://github.com/ClickHouse/ClickHouse/pull/21912) ([turbo jason](https://github.com/songenjie)).
+* Show path to data directory of `EmbeddedRocksDB` tables in system tables. [#21903](https://github.com/ClickHouse/ClickHouse/pull/21903) ([tavplubix](https://github.com/tavplubix)).
+* Support `RANGE OFFSET` frame for floating point types. Implement `lagInFrame`/`leadInFrame` window functions, which are analogous to `lag`/`lead`, but respect the window frame. They are identical when the frame is `between unbounded preceding and unbounded following`. This closes [#5485](https://github.com/ClickHouse/ClickHouse/issues/5485). [#21895](https://github.com/ClickHouse/ClickHouse/pull/21895) ([Alexander Kuzmenkov](https://github.com/akuzm)).
+* Add profile event `HedgedRequestsChangeReplica`, change read data timeout from sec to ms. [#21886](https://github.com/ClickHouse/ClickHouse/pull/21886) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add connection pool for PostgreSQL table/database engine and dictionary source. Should fix [#21444](https://github.com/ClickHouse/ClickHouse/issues/21444). [#21839](https://github.com/ClickHouse/ClickHouse/pull/21839) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* DiskS3 (experimental feature under development). Fixed bug with the impossibility to move directory if the destination is not empty and cache disk is used. [#21837](https://github.com/ClickHouse/ClickHouse/pull/21837) ([Pavel Kovalenko](https://github.com/Jokser)).
+* Better formatting for `Array` and `Map` data types in Web UI. [#21798](https://github.com/ClickHouse/ClickHouse/pull/21798) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Support non-default table schema for postgres storage/table-function. Closes [#21701](https://github.com/ClickHouse/ClickHouse/issues/21701). [#21711](https://github.com/ClickHouse/ClickHouse/pull/21711) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Support replicas priority for postgres dictionary source. [#21710](https://github.com/ClickHouse/ClickHouse/pull/21710) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Update clusters only if their configurations were updated. [#21685](https://github.com/ClickHouse/ClickHouse/pull/21685) ([Kruglov Pavel](https://github.com/Avogar)).
+* Propagate query and session settings for distributed DDL queries. Set `distributed_ddl_entry_format_version` to 2 to enable this. Added `distributed_ddl_output_mode` setting. Supported modes: `none`, `throw` (default), `null_status_on_timeout` and `never_throw`. Miscellaneous fixes and improvements for `Replicated` database engine. [#21535](https://github.com/ClickHouse/ClickHouse/pull/21535) ([tavplubix](https://github.com/tavplubix)).
+* If `PODArray` was instantiated with element size that is neither a fraction or a multiple of 16, buffer overflow was possible. No bugs in current releases exist. [#21533](https://github.com/ClickHouse/ClickHouse/pull/21533) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Add `last_error_time`/`last_error_message`/`last_error_stacktrace`/`remote` columns for `system.errors`. [#21529](https://github.com/ClickHouse/ClickHouse/pull/21529) ([Azat Khuzhin](https://github.com/azat)).
+* Add aliases `simpleJSONExtract/simpleJSONHas` to `visitParam/visitParamExtract{UInt, Int, Bool, Float, Raw, String}`. Fixes #21383. [#21519](https://github.com/ClickHouse/ClickHouse/pull/21519) ([fastio](https://github.com/fastio)).
+* Add setting `optimize_skip_unused_shards_limit` to limit the number of sharding key values for `optimize_skip_unused_shards`. [#21512](https://github.com/ClickHouse/ClickHouse/pull/21512) ([Azat Khuzhin](https://github.com/azat)).
+* `Age` and `Precision` in graphite rollup configs should increase from retention to retention. Now it's checked and the wrong config raises an exception. [#21496](https://github.com/ClickHouse/ClickHouse/pull/21496) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Improve `clickhouse-format` to not throw exception when there are extra spaces or comment after the last query, and throw exception early with readable message when format `ASTInsertQuery` with data . [#21311](https://github.com/ClickHouse/ClickHouse/pull/21311) ([flynn](https://github.com/ucasFL)).
+* Improve support of integer keys in data type `Map`. [#21157](https://github.com/ClickHouse/ClickHouse/pull/21157) ([Anton Popov](https://github.com/CurtizJ)).
+* MaterializeMySQL: Attempt to reconnect to MySQL if the connection is lost. [#20961](https://github.com/ClickHouse/ClickHouse/pull/20961) ([Håvard Kvålen](https://github.com/havardk)).
+* Support more cases to rewrite `CROSS JOIN` to `INNER JOIN`. [#20392](https://github.com/ClickHouse/ClickHouse/pull/20392) ([Vladimir](https://github.com/vdimir)).
+* Do not create empty parts on INSERT when `optimize_on_insert` setting enabled. Fixes [#20304](https://github.com/ClickHouse/ClickHouse/issues/20304). [#20387](https://github.com/ClickHouse/ClickHouse/pull/20387) ([Kruglov Pavel](https://github.com/Avogar)).
+* `MaterializeMySQL`: add minmax skipping index for `_version` column. [#20382](https://github.com/ClickHouse/ClickHouse/pull/20382) ([Stig Bakken](https://github.com/stigsb)).
+* Improve performance of aggregation in order of sorting key (with enabled setting `optimize_aggregation_in_order`). [#19401](https://github.com/ClickHouse/ClickHouse/pull/19401) ([Anton Popov](https://github.com/CurtizJ)).
+* Introduce a new merge tree setting `min_bytes_to_rebalance_partition_over_jbod` which allows assigning new parts to different disks of a JBOD volume in a balanced way. [#16481](https://github.com/ClickHouse/ClickHouse/pull/16481) ([Amos Bird](https://github.com/amosbird)).
+
+#### Bug Fix
+
+* Remove socket from epoll before cancelling packet receiver in `HedgedConnections` to prevent possible race. I hope it fixes [#22161](https://github.com/ClickHouse/ClickHouse/issues/22161). [#22443](https://github.com/ClickHouse/ClickHouse/pull/22443) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add (missing) memory accounting in parallel parsing routines. In previous versions OOM was possible when the resultset contains very large blocks of data. This closes [#22008](https://github.com/ClickHouse/ClickHouse/issues/22008). [#22425](https://github.com/ClickHouse/ClickHouse/pull/22425) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed bug in S3 zero-copy replication for hybrid storage. [#22378](https://github.com/ClickHouse/ClickHouse/pull/22378) ([ianton-ru](https://github.com/ianton-ru)).
+* Now clickhouse will not throw `LOGICAL_ERROR` exception when we try to mutate the already covered part. Fixes [#22013](https://github.com/ClickHouse/ClickHouse/issues/22013). [#22291](https://github.com/ClickHouse/ClickHouse/pull/22291) ([alesapin](https://github.com/alesapin)).
+* Fix exception which may happen when `SELECT` has constant `WHERE` condition and source table has columns which names are digits. [#22270](https://github.com/ClickHouse/ClickHouse/pull/22270) ([LiuNeng](https://github.com/liuneng1994)).
+* Fix query cancellation with `use_hedged_requests=0` and `async_socket_for_remote=1`. [#22183](https://github.com/ClickHouse/ClickHouse/pull/22183) ([Azat Khuzhin](https://github.com/azat)).
+* Fix uncaught exception in `InterserverIOHTTPHandler`. [#22146](https://github.com/ClickHouse/ClickHouse/pull/22146) ([Azat Khuzhin](https://github.com/azat)).
+* Fix docker entrypoint in case `http_port` is not in the config. [#22132](https://github.com/ClickHouse/ClickHouse/pull/22132) ([Ewout](https://github.com/devwout)).
+* Fix error `Invalid number of rows in Chunk` in `JOIN` with `TOTALS` and `arrayJoin`. Closes [#19303](https://github.com/ClickHouse/ClickHouse/issues/19303). [#22129](https://github.com/ClickHouse/ClickHouse/pull/22129) ([Vladimir](https://github.com/vdimir)).
+* Fix the background thread pool name which used to poll message from Kafka. The Kafka engine with the broken thread pool will not consume the message from message queue. [#22122](https://github.com/ClickHouse/ClickHouse/pull/22122) ([fastio](https://github.com/fastio)).
+* Fix waiting for `OPTIMIZE` and `ALTER` queries for `ReplicatedMergeTree` table engines. Now the query will not hang when the table was detached or restarted. [#22118](https://github.com/ClickHouse/ClickHouse/pull/22118) ([alesapin](https://github.com/alesapin)).
+* Disable `async_socket_for_remote`/`use_hedged_requests` for buggy linux kernels. [#22109](https://github.com/ClickHouse/ClickHouse/pull/22109) ([Azat Khuzhin](https://github.com/azat)).
+* Docker entrypoint: avoid chown of `.` in case when `LOG_PATH` is empty. Closes [#22100](https://github.com/ClickHouse/ClickHouse/issues/22100). [#22102](https://github.com/ClickHouse/ClickHouse/pull/22102) ([filimonov](https://github.com/filimonov)).
+* The function `decrypt` was lacking a check for the minimal size of data encrypted in `AEAD` mode. This closes [#21897](https://github.com/ClickHouse/ClickHouse/issues/21897). [#22064](https://github.com/ClickHouse/ClickHouse/pull/22064) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* In rare case, merge for `CollapsingMergeTree` may create granule with `index_granularity + 1` rows. Because of this, internal check, added in [#18928](https://github.com/ClickHouse/ClickHouse/issues/18928) (affects 21.2 and 21.3), may fail with error `Incomplete granules are not allowed while blocks are granules size`. This error did not allow parts to merge. [#21976](https://github.com/ClickHouse/ClickHouse/pull/21976) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Reverted [#15454](https://github.com/ClickHouse/ClickHouse/issues/15454) that may cause significant increase in memory usage while loading external dictionaries of hashed type. This closes [#21935](https://github.com/ClickHouse/ClickHouse/issues/21935). [#21948](https://github.com/ClickHouse/ClickHouse/pull/21948) ([Maksim Kita](https://github.com/kitaisreal)).
+* Prevent hedged connections overlaps (`Unknown packet 9 from server` error). [#21941](https://github.com/ClickHouse/ClickHouse/pull/21941) ([Azat Khuzhin](https://github.com/azat)).
+* Fix reading the HTTP POST request with "multipart/form-data" content type. [#21936](https://github.com/ClickHouse/ClickHouse/pull/21936) ([Ivan](https://github.com/abyss7)).
+* Fix wrong `ORDER BY` results when a query contains window functions, and optimization for reading in primary key order is applied. Fixes [#21828](https://github.com/ClickHouse/ClickHouse/issues/21828). [#21915](https://github.com/ClickHouse/ClickHouse/pull/21915) ([Alexander Kuzmenkov](https://github.com/akuzm)).
+* Fix deadlock in first catboost model execution. Closes [#13832](https://github.com/ClickHouse/ClickHouse/issues/13832). [#21844](https://github.com/ClickHouse/ClickHouse/pull/21844) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix incorrect query result (and possible crash) which could happen when `WHERE` or `HAVING` condition is pushed before `GROUP BY`. Fixes [#21773](https://github.com/ClickHouse/ClickHouse/issues/21773). [#21841](https://github.com/ClickHouse/ClickHouse/pull/21841) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Better error handling and logging in `WriteBufferFromS3`. [#21836](https://github.com/ClickHouse/ClickHouse/pull/21836) ([Pavel Kovalenko](https://github.com/Jokser)).
+* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. This is a follow-up fix of [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) . Can only reproduced in production env. [#21818](https://github.com/ClickHouse/ClickHouse/pull/21818) ([Amos Bird](https://github.com/amosbird)).
+* Fix scalar subquery index analysis. This fixes [#21717](https://github.com/ClickHouse/ClickHouse/issues/21717) , which was introduced in [#18896](https://github.com/ClickHouse/ClickHouse/pull/18896). [#21766](https://github.com/ClickHouse/ClickHouse/pull/21766) ([Amos Bird](https://github.com/amosbird)).
+* Fix bug for `ReplicatedMerge` table engines when `ALTER MODIFY COLUMN` query doesn't change the type of decimal column if its size (32 bit or 64 bit) doesn't change. [#21728](https://github.com/ClickHouse/ClickHouse/pull/21728) ([alesapin](https://github.com/alesapin)).
+* Fix concurrent `OPTIMIZE` and `DROP` for `ReplicatedMergeTree`. [#21716](https://github.com/ClickHouse/ClickHouse/pull/21716) ([Azat Khuzhin](https://github.com/azat)).
+* Fix function `arrayElement` with type `Map` for constant integer arguments. [#21699](https://github.com/ClickHouse/ClickHouse/pull/21699) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix SIGSEGV on not existing attributes from `ip_trie` with `access_to_key_from_attributes`. [#21692](https://github.com/ClickHouse/ClickHouse/pull/21692) ([Azat Khuzhin](https://github.com/azat)).
+* Server now start accepting connections only after `DDLWorker` and dictionaries initialization. [#21676](https://github.com/ClickHouse/ClickHouse/pull/21676) ([Azat Khuzhin](https://github.com/azat)).
+* Add type conversion for `StorageJoin` keys (previously led to SIGSEGV). [#21646](https://github.com/ClickHouse/ClickHouse/pull/21646) ([Azat Khuzhin](https://github.com/azat)).
+* Fix distributed requests cancellation (for example simple select from multiple shards with limit, i.e. `select * from remote('127.{2,3}', system.numbers) limit 100`) with `async_socket_for_remote=1`. [#21643](https://github.com/ClickHouse/ClickHouse/pull/21643) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `fsync_part_directory` for horizontal merge. [#21642](https://github.com/ClickHouse/ClickHouse/pull/21642) ([Azat Khuzhin](https://github.com/azat)).
+* Remove unknown columns from joined table in `WHERE` for queries to external database engines (MySQL, PostgreSQL). close [#14614](https://github.com/ClickHouse/ClickHouse/issues/14614), close [#19288](https://github.com/ClickHouse/ClickHouse/issues/19288) (dup), close [#19645](https://github.com/ClickHouse/ClickHouse/issues/19645) (dup). [#21640](https://github.com/ClickHouse/ClickHouse/pull/21640) ([Vladimir](https://github.com/vdimir)).
+* `std::terminate` was called if there is an error writing data into s3. [#21624](https://github.com/ClickHouse/ClickHouse/pull/21624) ([Vladimir](https://github.com/vdimir)).
+* Fix possible error ` Cannot find column` when `optimize_skip_unused_shards` is enabled and zero shards are used. [#21579](https://github.com/ClickHouse/ClickHouse/pull/21579) ([Azat Khuzhin](https://github.com/azat)).
+* In case if query has constant `WHERE` condition, and setting `optimize_skip_unused_shards` enabled, all shards may be skipped and query could return incorrect empty result. [#21550](https://github.com/ClickHouse/ClickHouse/pull/21550) ([Amos Bird](https://github.com/amosbird)).
+* Fix incorrect `fd_ready` assignment in NuKeeperTCPHandler. [#21544](https://github.com/ClickHouse/ClickHouse/pull/21544) ([小路](https://github.com/nicelulu)).
+* Fix table function `clusterAllReplicas` returns wrong `_shard_num`. close [#21481](https://github.com/ClickHouse/ClickHouse/issues/21481). [#21498](https://github.com/ClickHouse/ClickHouse/pull/21498) ([flynn](https://github.com/ucasFL)).
+* Fix that S3 table holds old credentials after config update. [#21457](https://github.com/ClickHouse/ClickHouse/pull/21457) ([Grigory Pervakov](https://github.com/GrigoryPervakov)).
+* Fixed race on SSL object inside `SecureSocket` in Poco. [#21456](https://github.com/ClickHouse/ClickHouse/pull/21456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix `Avro` format parsing for `Kafka`. Fixes [#21437](https://github.com/ClickHouse/ClickHouse/issues/21437). [#21438](https://github.com/ClickHouse/ClickHouse/pull/21438) ([Ilya Golshtein](https://github.com/ilejn)).
+* Fix receive and send timeouts and non-blocking read in secure socket. [#21429](https://github.com/ClickHouse/ClickHouse/pull/21429) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix official website documents which introduced cluster secret feature. [#21331](https://github.com/ClickHouse/ClickHouse/pull/21331) ([Chao Ma](https://github.com/godliness)).
+* `force_drop_table` flag didn't work for `MATERIALIZED VIEW`, it's fixed. Fixes [#18943](https://github.com/ClickHouse/ClickHouse/issues/18943). [#20626](https://github.com/ClickHouse/ClickHouse/pull/20626) ([tavplubix](https://github.com/tavplubix)).
+* Fix name clashes in `PredicateRewriteVisitor`. It caused incorrect `WHERE` filtration after full join. Close [#20497](https://github.com/ClickHouse/ClickHouse/issues/20497). [#20622](https://github.com/ClickHouse/ClickHouse/pull/20622) ([Vladimir](https://github.com/vdimir)).
+* Fixed open behavior of remote host filter in case when there is `remote_url_allow_hosts` section in configuration but no entries there. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)).
+
+#### Build/Testing/Packaging Improvement
+
+* Enable the bundled openldap on `ppc64le`. [#22487](https://github.com/ClickHouse/ClickHouse/pull/22487) ([Kfir Itzhak](https://github.com/mastertheknife)).
+* Re-enable the S3 (AWS) library on `aarch64`. [#22484](https://github.com/ClickHouse/ClickHouse/pull/22484) ([Kfir Itzhak](https://github.com/mastertheknife)).
+* Enable compiling on `ppc64le` with Clang. [#22476](https://github.com/ClickHouse/ClickHouse/pull/22476) ([Kfir Itzhak](https://github.com/mastertheknife)).
+* Fix compiling boost on `ppc64le`. [#22474](https://github.com/ClickHouse/ClickHouse/pull/22474) ([Kfir Itzhak](https://github.com/mastertheknife)).
+* Fix CMake error about internal CMake variable `CMAKE_ASM_COMPILE_OBJECT` not set on `ppc64le`. [#22469](https://github.com/ClickHouse/ClickHouse/pull/22469) ([Kfir Itzhak](https://github.com/mastertheknife)).
+* Fix Fedora\RHEL\CentOS not finding `libclang_rt.builtins` on `ppc64le`. [#22458](https://github.com/ClickHouse/ClickHouse/pull/22458) ([Kfir Itzhak](https://github.com/mastertheknife)).
+* Enable building with `jemalloc` on `ppc64le`. [#22447](https://github.com/ClickHouse/ClickHouse/pull/22447) ([Kfir Itzhak](https://github.com/mastertheknife)).
+* Fix ClickHouse's config embedding and cctz's timezone embedding on `ppc64le`. [#22445](https://github.com/ClickHouse/ClickHouse/pull/22445) ([Kfir Itzhak](https://github.com/mastertheknife)).
+* Fixed compiling on `ppc64le` and use the correct instruction pointer register on `ppc64le`. [#22430](https://github.com/ClickHouse/ClickHouse/pull/22430) ([Kfir Itzhak](https://github.com/mastertheknife)).
+* Added a way to check memory info for the RBAC testflows tests. [#22403](https://github.com/ClickHouse/ClickHouse/pull/22403) ([MyroTk](https://github.com/MyroTk)).
+* Fix test with MaterializeMySQL. MySQL is started only once with MaterializeMySQL integration test. Fixes [#22289](https://github.com/ClickHouse/ClickHouse/issues/22289). [#22341](https://github.com/ClickHouse/ClickHouse/pull/22341) ([Winter Zhang](https://github.com/zhang2014)).
+* Run stateless tests in parallel in CI. Depends on [#22181](https://github.com/ClickHouse/ClickHouse/issues/22181). [#22300](https://github.com/ClickHouse/ClickHouse/pull/22300) ([alesapin](https://github.com/alesapin)).
+* Enable status check for SQLancer CI run. [#22015](https://github.com/ClickHouse/ClickHouse/pull/22015) ([Ilya Yatsishin](https://github.com/qoega)).
+* Add `tzdata` to Docker containers because reading `ORC` formats requires it. This closes [#14156](https://github.com/ClickHouse/ClickHouse/issues/14156). [#22000](https://github.com/ClickHouse/ClickHouse/pull/22000) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Introduce 2 arguments for `clickhouse-server` image Dockerfile: `deb_location` & `single_binary_location`. [#21977](https://github.com/ClickHouse/ClickHouse/pull/21977) ([filimonov](https://github.com/filimonov)).
+* Allow to use clang-tidy with release builds by enabling assertions if it is used. [#21914](https://github.com/ClickHouse/ClickHouse/pull/21914) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Remove decode method with python3. [#21832](https://github.com/ClickHouse/ClickHouse/pull/21832) ([kevin wan](https://github.com/MaxWk)).
+* Add [jepsen](https://github.com/jepsen-io/jepsen) tests for NuKeeper. [#21677](https://github.com/ClickHouse/ClickHouse/pull/21677) ([alesapin](https://github.com/alesapin)).
+* Updating TestFlows to 1.6.74. [#21673](https://github.com/ClickHouse/ClickHouse/pull/21673) ([vzakaznikov](https://github.com/vzakaznikov)).
+* Add llvm-12 binaries name to search in cmake scripts. Implicit constants conversions to mute clang warnings. Updated submodules to build with CMake 3.19. Mute recursion in macro expansion in `readpassphrase` library. Deprecated `-fuse-ld` changed to `--ld-path` for clang. [#21597](https://github.com/ClickHouse/ClickHouse/pull/21597) ([Ilya Yatsishin](https://github.com/qoega)).
+* Updating `docker/test/testflows/runner/dockerd-entrypoint.sh` to use Yandex dockerhub-proxy. [#21551](https://github.com/ClickHouse/ClickHouse/pull/21551) ([vzakaznikov](https://github.com/vzakaznikov)).
+* Fixing LDAP authentication performance test by removing assertion. [#21507](https://github.com/ClickHouse/ClickHouse/pull/21507) ([vzakaznikov](https://github.com/vzakaznikov)).
+* Added `ALL` and `NONE` privilege tests. [#21354](https://github.com/ClickHouse/ClickHouse/pull/21354) ([MyroTk](https://github.com/MyroTk)).
+* Fix macOS shared lib build. [#20184](https://github.com/ClickHouse/ClickHouse/pull/20184) ([nvartolomei](https://github.com/nvartolomei)).
+
 ## ClickHouse release 21.3 (LTS)
 
 ### ClickHouse release v21.3, 2021-03-12

From b25a4e066c983439f5b00f1fa55a357c20651d27 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 4 Apr 2021 12:23:04 +0300
Subject: [PATCH 095/152] Lock MEMORY_LIMIT_EXCEEDED in
 ThreadStatus::detachQuery()

Found with fuzzer [1]:

    <Fatal> BaseDaemon: (version 21.5.1.6440, build id: 3B097C902DDAA35688D90750552ED499DC5D10A0) (from thread 8012) Terminate called for uncaught exception:
    Code: 241, e.displayText() = DB::Exception: Memory limit (for user) exceeded: would use 153.51 MiB (attempt to allocate chunk of 4194368 bytes), maximum: 150.00 MiB, Stack trace (when copying this message, always include the lines below):

    0. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/exception:133: Poco::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int) @ 0x26fdcdd9 in /usr/bin/clickhouse
    1. ./obj-x86_64-linux-gnu/../src/Common/Exception.cpp:57: DB::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int, bool) @ 0xad0df02 in /usr/bin/clickhouse
    2. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/string:1444: DB::Exception::Exception<char const*, char const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, long&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, char const*&&, char const*&&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&&, long&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&&) @ 0xacc7bef in /usr/bin/clickhouse
    3. ./obj-x86_64-linux-gnu/../src/Common/MemoryTracker.cpp:219: MemoryTracker::alloc(long) @ 0xacc65eb in /usr/bin/clickhouse
    4. ./obj-x86_64-linux-gnu/../src/Common/MemoryTracker.cpp:0: MemoryTracker::alloc(long) @ 0xacc5dad in /usr/bin/clickhouse
    5. ./obj-x86_64-linux-gnu/../src/Common/MemoryTracker.cpp:0: MemoryTracker::alloc(long) @ 0xacc5dad in /usr/bin/clickhouse
    6. ./obj-x86_64-linux-gnu/../src/Common/AllocatorWithMemoryTracking.h:35: AllocatorWithMemoryTracking<DB::Field>::allocate(unsigned long) @ 0xad0a2fe in /usr/bin/clickhouse
    7. void std::__1::vector<DB::Field, AllocatorWithMemoryTracking<DB::Field> >::__push_back_slow_path<DB::Field>(DB::Field&&) @ 0x11712a51 in /usr/bin/clickhouse
    8. ./obj-x86_64-linux-gnu/../src/Interpreters/ThreadStatusExt.cpp:356: DB::ThreadStatus::detachQuery(bool, bool) @ 0x1f5d5237 in /usr/bin/clickhouse
    9. ./obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:0: void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<ThreadFromGlobalPool::ThreadFromGlobalPool<DB::PipelineExecutor::executeImpl(unsigned long)::$_4>(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'(), void ()> >(std::__1::__function::__policy_storage const*) @ 0x20c488e6 in /usr/bin/clickhouse
    10. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:0: ThreadPoolImpl<std::__1::thread>::worker(std::__1::__list_iterator<std::__1::thread, void*>) @ 0xad9f6cc in /usr/bin/clickhouse
    11. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/memory:1655: void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, int, std::__1::optional<unsigned long>)::'lambda1'()> >(void*) @ 0xada8264 in /usr/bin/clickhouse
    12. start_thread @ 0x9609 in /usr/lib/x86_64-linux-gnu/libpthread-2.31.so
    13. __clone @ 0x122293 in /usr/lib/x86_64-linux-gnu/libc-2.31.so
     (version 21.5.1.6440)

  [1]: https://clickhouse-test-reports.s3.yandex.net/22583/69296876005c0fa171c755f8b224e4d58192c402/stress_test_(address).html#fail1
---
 src/Interpreters/ThreadStatusExt.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 8a979721290..c45902ae497 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -316,6 +316,8 @@ void ThreadStatus::finalizeQueryProfiler()
 
 void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits)
 {
+    MemoryTracker::LockExceptionInThread lock;
+
     if (exit_if_already_detached && thread_state == ThreadState::DetachedFromQuery)
     {
         thread_state = thread_exits ? ThreadState::Died : ThreadState::DetachedFromQuery;

From 9394e6e5edc92a1de19957fabdc26a5f9ad94e27 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 4 Apr 2021 12:45:30 +0300
Subject: [PATCH 096/152] Add SCOPE_EXIT_SAFE/SCOPE_EXIT_MEMORY_SAFE helpers

---
 base/ext/scope_guard_safe.h | 66 +++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 base/ext/scope_guard_safe.h

diff --git a/base/ext/scope_guard_safe.h b/base/ext/scope_guard_safe.h
new file mode 100644
index 00000000000..7cfb3959a81
--- /dev/null
+++ b/base/ext/scope_guard_safe.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include <ext/scope_guard.h>
+#include <common/logger_useful.h>
+#include <Common/MemoryTracker.h>
+
+/// Same as SCOPE_EXIT() but block the MEMORY_LIMIT_EXCEEDED errors.
+///
+/// Typical example of SCOPE_EXIT_MEMORY() usage is when code under it may do
+/// some tiny allocations, that may fail under high memory pressure or/and low
+/// max_memory_usage (and related limits).
+///
+/// NOTE: it should be used with caution.
+#define SCOPE_EXIT_MEMORY(...) SCOPE_EXIT(                    \
+    MemoryTracker::LockExceptionInThread lock_memory_tracker; \
+    __VA_ARGS__;                                              \
+)
+
+/// Same as SCOPE_EXIT() but try/catch/tryLogCurrentException any exceptions.
+///
+/// SCOPE_EXIT_SAFE() should be used in case the exception during the code
+/// under SCOPE_EXIT() is not "that fatal" and error message in log is enough.
+///
+/// Good example is calling CurrentThread::detachQueryIfNotDetached().
+///
+/// Anti-pattern is calling WriteBuffer::finalize() under SCOPE_EXIT_SAFE()
+/// (since finalize() can do final write and it is better to fail abnormally
+/// instead of ignoring write error).
+///
+/// NOTE: it should be used with double caution.
+#define SCOPE_EXIT_SAFE(...) SCOPE_EXIT(             \
+    try                                              \
+    {                                                \
+        __VA_ARGS__;                                 \
+    }                                                \
+    catch (...)                                      \
+    {                                                \
+        tryLogCurrentException(__PRETTY_FUNCTION__); \
+    }                                                \
+)
+
+/// Same as SCOPE_EXIT() but:
+/// - block the MEMORY_LIMIT_EXCEEDED errors,
+/// - try/catch/tryLogCurrentException any exceptions.
+///
+/// SCOPE_EXIT_MEMORY_SAFE() can be used when the error can be ignored, and in
+/// addition to SCOPE_EXIT_SAFE() it will also lock MEMORY_LIMIT_EXCEEDED to
+/// avoid such exceptions.
+///
+/// It does exists as a separate helper, since you do not need to lock
+/// MEMORY_LIMIT_EXCEEDED always (there are cases when code under SCOPE_EXIT does
+/// not do any allocations, while LockExceptionInThread increment atomic
+/// variable).
+///
+/// NOTE: it should be used with triple caution.
+#define SCOPE_EXIT_MEMORY_SAFE(...) SCOPE_EXIT(                   \
+    try                                                           \
+    {                                                             \
+        MemoryTracker::LockExceptionInThread lock_memory_tracker; \
+        __VA_ARGS__;                                              \
+    }                                                             \
+    catch (...)                                                   \
+    {                                                             \
+        tryLogCurrentException(__PRETTY_FUNCTION__);              \
+    }                                                             \
+)

From f157278b729d3db6de33d7c0510690a1d2468956 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 4 Apr 2021 12:23:40 +0300
Subject: [PATCH 097/152] Safer SCOPE_EXIT

It executes the code in the dtor, that should never throw.
---
 programs/client/Client.cpp                      |  4 ++--
 programs/copier/ClusterCopierApp.cpp            |  3 ++-
 src/Databases/DatabaseLazy.cpp                  | 14 +++++++-------
 src/Functions/array/arrayReduce.cpp             |  4 ++--
 src/Functions/array/arrayReduceInRanges.cpp     |  6 +++---
 src/Functions/initializeAggregation.cpp         |  4 ++--
 src/Functions/runningAccumulate.cpp             |  4 ++--
 src/Interpreters/DDLWorker.cpp                  |  3 ++-
 src/Interpreters/InterpreterSelectQuery.cpp     |  4 ++--
 src/Processors/Executors/PipelineExecutor.cpp   | 17 +++++++++--------
 .../Executors/PullingAsyncPipelineExecutor.cpp  |  4 ++--
 .../Formats/Impl/ParallelParsingInputFormat.cpp |  6 +++---
 .../MergeTree/MergeTreeDataSelectExecutor.cpp   |  4 ++--
 src/Storages/StorageReplicatedMergeTree.cpp     |  7 ++++---
 14 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 0c5bbaf3edd..555b1adc414 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -21,7 +21,7 @@
 #include <unordered_set>
 #include <algorithm>
 #include <optional>
-#include <ext/scope_guard.h>
+#include <ext/scope_guard_safe.h>
 #include <boost/program_options.hpp>
 #include <boost/algorithm/string/replace.hpp>
 #include <Poco/String.h>
@@ -1610,7 +1610,7 @@ private:
         {
             /// Temporarily apply query settings to context.
             std::optional<Settings> old_settings;
-            SCOPE_EXIT({ if (old_settings) context.setSettings(*old_settings); });
+            SCOPE_EXIT_SAFE({ if (old_settings) context.setSettings(*old_settings); });
             auto apply_query_settings = [&](const IAST & settings_ast)
             {
                 if (!old_settings)
diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp
index e3169a49ecf..7dfadc87716 100644
--- a/programs/copier/ClusterCopierApp.cpp
+++ b/programs/copier/ClusterCopierApp.cpp
@@ -3,6 +3,7 @@
 #include <Common/TerminalSize.h>
 #include <IO/ConnectionTimeoutsContext.h>
 #include <Formats/registerFormats.h>
+#include <ext/scope_guard_safe.h>
 #include <unistd.h>
 
 
@@ -112,7 +113,7 @@ void ClusterCopierApp::mainImpl()
     SharedContextHolder shared_context = Context::createShared();
     auto context = std::make_unique<Context>(Context::createGlobal(shared_context.get()));
     context->makeGlobalContext();
-    SCOPE_EXIT(context->shutdown());
+    SCOPE_EXIT_SAFE(context->shutdown());
 
     context->setConfig(loaded_config.configuration);
     context->setApplicationType(Context::ApplicationType::LOCAL);
diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index f297bf2c82f..b1bc55bf58d 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -10,7 +10,7 @@
 #include <Storages/IStorage.h>
 
 #include <common/logger_useful.h>
-#include <ext/scope_guard.h>
+#include <ext/scope_guard_safe.h>
 #include <iomanip>
 #include <Poco/File.h>
 
@@ -61,7 +61,7 @@ void DatabaseLazy::createTable(
     const StoragePtr & table,
     const ASTPtr & query)
 {
-    SCOPE_EXIT({ clearExpiredTables(); });
+    SCOPE_EXIT_MEMORY_SAFE({ clearExpiredTables(); });
     if (!endsWith(table->getName(), "Log"))
         throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD);
     DatabaseOnDisk::createTable(context, table_name, table, query);
@@ -78,7 +78,7 @@ void DatabaseLazy::dropTable(
     const String & table_name,
     bool no_delay)
 {
-    SCOPE_EXIT({ clearExpiredTables(); });
+    SCOPE_EXIT_MEMORY_SAFE({ clearExpiredTables(); });
     DatabaseOnDisk::dropTable(context, table_name, no_delay);
 }
 
@@ -90,7 +90,7 @@ void DatabaseLazy::renameTable(
     bool exchange,
     bool dictionary)
 {
-    SCOPE_EXIT({ clearExpiredTables(); });
+    SCOPE_EXIT_MEMORY_SAFE({ clearExpiredTables(); });
     DatabaseOnDisk::renameTable(context, table_name, to_database, to_table_name, exchange, dictionary);
 }
 
@@ -115,14 +115,14 @@ void DatabaseLazy::alterTable(
 
 bool DatabaseLazy::isTableExist(const String & table_name) const
 {
-    SCOPE_EXIT({ clearExpiredTables(); });
+    SCOPE_EXIT_MEMORY_SAFE({ clearExpiredTables(); });
     std::lock_guard lock(mutex);
     return tables_cache.find(table_name) != tables_cache.end();
 }
 
 StoragePtr DatabaseLazy::tryGetTable(const String & table_name) const
 {
-    SCOPE_EXIT({ clearExpiredTables(); });
+    SCOPE_EXIT_MEMORY_SAFE({ clearExpiredTables(); });
     {
         std::lock_guard lock(mutex);
         auto it = tables_cache.find(table_name);
@@ -224,7 +224,7 @@ DatabaseLazy::~DatabaseLazy()
 
 StoragePtr DatabaseLazy::loadTable(const String & table_name) const
 {
-    SCOPE_EXIT({ clearExpiredTables(); });
+    SCOPE_EXIT_MEMORY_SAFE({ clearExpiredTables(); });
 
     LOG_DEBUG(log, "Load table {} to cache.", backQuote(table_name));
 
diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp
index 342a4b18854..0611a4f5d0c 100644
--- a/src/Functions/array/arrayReduce.cpp
+++ b/src/Functions/array/arrayReduce.cpp
@@ -11,7 +11,7 @@
 #include <AggregateFunctions/parseAggregateFunctionParameters.h>
 #include <Common/Arena.h>
 
-#include <ext/scope_guard.h>
+#include <ext/scope_guard_safe.h>
 
 
 namespace DB
@@ -172,7 +172,7 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume
         }
     }
 
-    SCOPE_EXIT({
+    SCOPE_EXIT_MEMORY_SAFE({
         for (size_t i = 0; i < input_rows_count; ++i)
             agg_func.destroy(places[i]);
     });
diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp
index 9839d2a8fe7..55e4d81f36c 100644
--- a/src/Functions/array/arrayReduceInRanges.cpp
+++ b/src/Functions/array/arrayReduceInRanges.cpp
@@ -13,7 +13,7 @@
 #include <AggregateFunctions/parseAggregateFunctionParameters.h>
 #include <Common/Arena.h>
 
-#include <ext/scope_guard.h>
+#include <ext/scope_guard_safe.h>
 
 
 namespace DB
@@ -252,7 +252,7 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl(const ColumnsWithTypeAndName
             }
         }
 
-        SCOPE_EXIT({
+        SCOPE_EXIT_MEMORY_SAFE({
             for (size_t j = 0; j < place_total; ++j)
                 agg_func.destroy(places[j]);
         });
@@ -331,7 +331,7 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl(const ColumnsWithTypeAndName
             AggregateDataPtr place = arena->alignedAlloc(agg_func.sizeOfData(), agg_func.alignOfData());
             agg_func.create(place);
 
-            SCOPE_EXIT({
+            SCOPE_EXIT_MEMORY_SAFE({
                 agg_func.destroy(place);
             });
 
diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp
index 4b90b7956c5..76a885fd730 100644
--- a/src/Functions/initializeAggregation.cpp
+++ b/src/Functions/initializeAggregation.cpp
@@ -9,7 +9,7 @@
 #include <AggregateFunctions/parseAggregateFunctionParameters.h>
 #include <Common/Arena.h>
 
-#include <ext/scope_guard.h>
+#include <ext/scope_guard_safe.h>
 
 
 namespace DB
@@ -132,7 +132,7 @@ ColumnPtr FunctionInitializeAggregation::executeImpl(const ColumnsWithTypeAndNam
         }
     }
 
-    SCOPE_EXIT({
+    SCOPE_EXIT_MEMORY_SAFE({
         for (size_t i = 0; i < input_rows_count; ++i)
             agg_func.destroy(places[i]);
     });
diff --git a/src/Functions/runningAccumulate.cpp b/src/Functions/runningAccumulate.cpp
index 1a01b88e22c..f3f142bb846 100644
--- a/src/Functions/runningAccumulate.cpp
+++ b/src/Functions/runningAccumulate.cpp
@@ -5,7 +5,7 @@
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <Common/AlignedBuffer.h>
 #include <Common/Arena.h>
-#include <ext/scope_guard.h>
+#include <ext/scope_guard_safe.h>
 
 
 namespace DB
@@ -104,7 +104,7 @@ public:
         const auto & states = column_with_states->getData();
 
         bool state_created = false;
-        SCOPE_EXIT({
+        SCOPE_EXIT_MEMORY_SAFE({
             if (state_created)
                 agg_func.destroy(place.data());
         });
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index eceb48ae773..9021f48057e 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -29,6 +29,7 @@
 #include <common/logger_useful.h>
 #include <random>
 #include <pcg_random.hpp>
+#include <ext/scope_guard_safe.h>
 
 namespace fs = std::filesystem;
 
@@ -820,7 +821,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
             zookeeper->set(tries_to_execute_path, toString(counter + 1));
 
             task.ops.push_back(create_shard_flag);
-            SCOPE_EXIT({ if (!executed_by_us && !task.ops.empty()) task.ops.pop_back(); });
+            SCOPE_EXIT_MEMORY({ if (!executed_by_us && !task.ops.empty()) task.ops.pop_back(); });
 
             /// If the leader will unexpectedly changed this method will return false
             /// and on the next iteration new leader will take lock
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 1f6b0c37437..1f440aef817 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -82,7 +82,7 @@
 #include <Common/typeid_cast.h>
 #include <Common/checkStackSize.h>
 #include <ext/map.h>
-#include <ext/scope_guard.h>
+#include <ext/scope_guard_safe.h>
 #include <memory>
 
 
@@ -1401,7 +1401,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
             AggregateDataPtr place = state.data();
 
             agg_count.create(place);
-            SCOPE_EXIT(agg_count.destroy(place));
+            SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place));
 
             agg_count.set(place, *num_rows);
 
diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp
index a724f22ed31..b1751dfd030 100644
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@@ -1,14 +1,15 @@
-#include <Processors/Executors/PipelineExecutor.h>
 #include <queue>
 #include <IO/WriteBufferFromString.h>
-#include <Processors/printPipeline.h>
 #include <Common/EventCounter.h>
-#include <ext/scope_guard.h>
 #include <Common/CurrentThread.h>
-#include <Processors/ISource.h>
 #include <Common/setThreadName.h>
+#include <Common/MemoryTracker.h>
+#include <Processors/Executors/PipelineExecutor.h>
+#include <Processors/printPipeline.h>
+#include <Processors/ISource.h>
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/OpenTelemetrySpanLog.h>
+#include <ext/scope_guard_safe.h>
 
 #ifndef NDEBUG
     #include <Common/Stopwatch.h>
@@ -740,7 +741,7 @@ void PipelineExecutor::executeImpl(size_t num_threads)
 
     bool finished_flag = false;
 
-    SCOPE_EXIT(
+    SCOPE_EXIT_SAFE(
         if (!finished_flag)
         {
             finish();
@@ -766,9 +767,9 @@ void PipelineExecutor::executeImpl(size_t num_threads)
                 if (thread_group)
                     CurrentThread::attachTo(thread_group);
 
-                SCOPE_EXIT(
-                        if (thread_group)
-                            CurrentThread::detachQueryIfNotDetached();
+                SCOPE_EXIT_SAFE(
+                    if (thread_group)
+                        CurrentThread::detachQueryIfNotDetached();
                 );
 
                 try
diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp
index f1626414375..9f1999bc4a3 100644
--- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp
+++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp
@@ -5,7 +5,7 @@
 #include <Processors/QueryPipeline.h>
 
 #include <Common/setThreadName.h>
-#include <ext/scope_guard.h>
+#include <ext/scope_guard_safe.h>
 
 namespace DB
 {
@@ -72,7 +72,7 @@ static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGrou
         if (thread_group)
             CurrentThread::attachTo(thread_group);
 
-        SCOPE_EXIT(
+        SCOPE_EXIT_SAFE(
             if (thread_group)
                 CurrentThread::detachQueryIfNotDetached();
         );
diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
index 1ad913a1a59..f295fe00299 100644
--- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
@@ -2,14 +2,14 @@
 #include <IO/ReadHelpers.h>
 #include <Common/CurrentThread.h>
 #include <Common/setThreadName.h>
-#include <ext/scope_guard.h>
+#include <ext/scope_guard_safe.h>
 
 namespace DB
 {
 
 void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupStatusPtr thread_group)
 {
-    SCOPE_EXIT(
+    SCOPE_EXIT_SAFE(
         if (thread_group)
             CurrentThread::detachQueryIfNotDetached();
     );
@@ -60,7 +60,7 @@ void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupStatusPtr
 
 void ParallelParsingInputFormat::parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number)
 {
-    SCOPE_EXIT(
+    SCOPE_EXIT_SAFE(
         if (thread_group)
             CurrentThread::detachQueryIfNotDetached();
     );
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index f3759107912..742ebafaf5c 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1,5 +1,5 @@
 #include <boost/rational.hpp>   /// For calculations related to sampling coefficients.
-#include <ext/scope_guard.h>
+#include <ext/scope_guard_safe.h>
 #include <optional>
 #include <unordered_set>
 
@@ -704,7 +704,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
 
             for (size_t part_index = 0; part_index < parts.size(); ++part_index)
                 pool.scheduleOrThrowOnError([&, part_index, thread_group = CurrentThread::getGroup()] {
-                    SCOPE_EXIT(
+                    SCOPE_EXIT_SAFE(
                         if (thread_group)
                             CurrentThread::detachQueryIfNotDetached();
                     );
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 1cc7c7299fa..73f0a7907e5 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -26,6 +26,7 @@
 #include <Storages/MergeTree/ReplicatedMergeTreeQuorumAddedParts.h>
 #include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
 #include <Storages/VirtualColumnUtils.h>
+#include <Storages/MergeTree/MergeTreeReaderCompact.h>
 
 
 #include <Databases/IDatabase.h>
@@ -59,7 +60,7 @@
 
 #include <ext/range.h>
 #include <ext/scope_guard.h>
-#include "Storages/MergeTree/MergeTreeReaderCompact.h"
+#include <ext/scope_guard_safe.h>
 
 #include <ctime>
 #include <thread>
@@ -3693,7 +3694,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora
         }
     }
 
-    SCOPE_EXIT
+    SCOPE_EXIT_MEMORY
     ({
         std::lock_guard lock(currently_fetching_parts_mutex);
         currently_fetching_parts.erase(part_name);
@@ -3901,7 +3902,7 @@ bool StorageReplicatedMergeTree::fetchExistsPart(const String & part_name, const
         }
     }
 
-    SCOPE_EXIT
+    SCOPE_EXIT_MEMORY
     ({
         std::lock_guard lock(currently_fetching_parts_mutex);
         currently_fetching_parts.erase(part_name);

From c4a7e8128700f999f30d1c8a348abdcdbade1178 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 6 Apr 2021 21:12:40 +0300
Subject: [PATCH 098/152] Add metric to track how much time is spend during
 waiting for Buffer layer lock

It uses very fast CLOCK_MONOTONIC_COARSE, so this should not be a
problem.
Also note that there is no sense in using microseconds/nanoseconds since
accuracy of CLOCK_MONOTONIC_COARSE usually milliseconds.
---
 src/Common/ProfileEvents.cpp   |  2 ++
 src/Storages/StorageBuffer.cpp | 48 ++++++++++++++++++++++++++++------
 src/Storages/StorageBuffer.h   |  8 ++++++
 3 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 35703790d82..d0876c5e69c 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -146,6 +146,8 @@
     M(StorageBufferPassedTimeMaxThreshold, "") \
     M(StorageBufferPassedRowsMaxThreshold, "") \
     M(StorageBufferPassedBytesMaxThreshold, "") \
+    M(StorageBufferLayerLockReadersWaitMilliseconds, "Time for waiting for Buffer layer during reading") \
+    M(StorageBufferLayerLockWritersWaitMilliseconds, "Time for waiting free Buffer layer to write to (can be used to tune Buffer layers)") \
     \
     M(DictCacheKeysRequested, "") \
     M(DictCacheKeysRequestedMiss, "") \
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index 6dc32f4c880..1e205c49d82 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -40,6 +40,8 @@ namespace ProfileEvents
     extern const Event StorageBufferPassedTimeMaxThreshold;
     extern const Event StorageBufferPassedRowsMaxThreshold;
     extern const Event StorageBufferPassedBytesMaxThreshold;
+    extern const Event StorageBufferLayerLockReadersWaitMilliseconds;
+    extern const Event StorageBufferLayerLockWritersWaitMilliseconds;
 }
 
 namespace CurrentMetrics
@@ -63,6 +65,36 @@ namespace ErrorCodes
 }
 
 
+std::unique_lock<std::mutex> StorageBuffer::Buffer::lockForReading() const
+{
+    return lockImpl(/* read= */true);
+}
+std::unique_lock<std::mutex> StorageBuffer::Buffer::lockForWriting() const
+{
+    return lockImpl(/* read= */false);
+}
+std::unique_lock<std::mutex> StorageBuffer::Buffer::tryLock() const
+{
+    std::unique_lock lock(mutex, std::try_to_lock);
+    return lock;
+}
+std::unique_lock<std::mutex> StorageBuffer::Buffer::lockImpl(bool read) const
+{
+    std::unique_lock lock(mutex, std::defer_lock);
+
+    Stopwatch watch(CLOCK_MONOTONIC_COARSE);
+    lock.lock();
+    UInt64 elapsed = watch.elapsedMilliseconds();
+
+    if (read)
+        ProfileEvents::increment(ProfileEvents::StorageBufferLayerLockReadersWaitMilliseconds, elapsed);
+    else
+        ProfileEvents::increment(ProfileEvents::StorageBufferLayerLockWritersWaitMilliseconds, elapsed);
+
+    return lock;
+}
+
+
 StorageBuffer::StorageBuffer(
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
@@ -111,7 +143,7 @@ protected:
             return res;
         has_been_read = true;
 
-        std::lock_guard lock(buffer.mutex);
+        std::unique_lock lock(buffer.lockForReading());
 
         if (!buffer.data.rows())
             return res;
@@ -528,7 +560,7 @@ public:
 
         for (size_t try_no = 0; try_no < storage.num_shards; ++try_no)
         {
-            std::unique_lock lock(storage.buffers[shard_num].mutex, std::try_to_lock);
+            std::unique_lock lock(storage.buffers[shard_num].tryLock());
 
             if (lock.owns_lock())
             {
@@ -548,7 +580,7 @@ public:
         if (!least_busy_buffer)
         {
             least_busy_buffer = &storage.buffers[start_shard_num];
-            least_busy_lock = std::unique_lock(least_busy_buffer->mutex);
+            least_busy_lock = least_busy_buffer->lockForWriting();
         }
         insertIntoBuffer(block, *least_busy_buffer);
         least_busy_lock.unlock();
@@ -740,9 +772,9 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds, bool loc
     size_t bytes = 0;
     time_t time_passed = 0;
 
-    std::unique_lock lock(buffer.mutex, std::defer_lock);
+    std::optional<std::unique_lock<std::mutex>> lock;
     if (!locked)
-        lock.lock();
+        lock.emplace(buffer.lockForReading());
 
     block_to_write = buffer.data.cloneEmpty();
 
@@ -910,7 +942,7 @@ void StorageBuffer::reschedule()
         /// try_to_lock is also ok for background flush, since if there is
         /// INSERT contended, then the reschedule will be done after
         /// INSERT will be done.
-        std::unique_lock lock(buffer.mutex, std::try_to_lock);
+        std::unique_lock lock(buffer.tryLock());
         if (lock.owns_lock())
         {
             min_first_write_time = buffer.first_write_time;
@@ -967,7 +999,7 @@ std::optional<UInt64> StorageBuffer::totalRows(const Settings & settings) const
     UInt64 rows = 0;
     for (const auto & buffer : buffers)
     {
-        std::lock_guard lock(buffer.mutex);
+        const auto lock(buffer.lockForReading());
         rows += buffer.data.rows();
     }
     return rows + *underlying_rows;
@@ -978,7 +1010,7 @@ std::optional<UInt64> StorageBuffer::totalBytes(const Settings & /*settings*/) c
     UInt64 bytes = 0;
     for (const auto & buffer : buffers)
     {
-        std::lock_guard lock(buffer.mutex);
+        const auto lock(buffer.lockForReading());
         bytes += buffer.data.allocatedBytes();
     }
     return bytes;
diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h
index f6904ddb0e4..5d72e32544c 100644
--- a/src/Storages/StorageBuffer.h
+++ b/src/Storages/StorageBuffer.h
@@ -118,7 +118,15 @@ private:
     {
         time_t first_write_time = 0;
         Block data;
+
+        std::unique_lock<std::mutex> lockForReading() const;
+        std::unique_lock<std::mutex> lockForWriting() const;
+        std::unique_lock<std::mutex> tryLock() const;
+
+    private:
         mutable std::mutex mutex;
+
+        std::unique_lock<std::mutex> lockImpl(bool read) const;
     };
 
     /// There are `num_shards` of independent buffers.

From 6f9bad682f399d402b2faf7f69c60902a83b36b1 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Thu, 25 Feb 2021 19:07:16 +0800
Subject: [PATCH 099/152] Only read needed columns for formats as parquet etc

---
 src/Storages/StorageFile.cpp             | 16 +------
 src/Storages/StorageInMemoryMetadata.cpp | 56 ++++++++++++++++++++++++
 src/Storages/StorageInMemoryMetadata.h   | 10 ++++-
 3 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 5524569e1f0..0267187b12d 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -243,20 +243,6 @@ public:
 
     using FilesInfoPtr = std::shared_ptr<FilesInfo>;
 
-    static Block getHeader(const StorageMetadataPtr & metadata_snapshot, bool need_path_column, bool need_file_column)
-    {
-        auto header = metadata_snapshot->getSampleBlock();
-
-        /// Note: AddingDefaultsBlockInputStream doesn't change header.
-
-        if (need_path_column)
-            header.insert({DataTypeString().createColumn(), std::make_shared<DataTypeString>(), "_path"});
-        if (need_file_column)
-            header.insert({DataTypeString().createColumn(), std::make_shared<DataTypeString>(), "_file"});
-
-        return header;
-    }
-
     StorageFileSource(
         std::shared_ptr<StorageFile> storage_,
         const StorageMetadataPtr & metadata_snapshot_,
@@ -264,7 +250,7 @@ public:
         UInt64 max_block_size_,
         FilesInfoPtr files_info_,
         ColumnsDescription columns_description_)
-        : SourceWithProgress(getHeader(metadata_snapshot_, files_info_->need_path_column, files_info_->need_file_column))
+        : SourceWithProgress(metadata_snapshot_->getSampleBlockForColumns(columns_description_.getNamesOfPhysical(), storage_->getVirtuals(), storage_->getStorageID()))
         , storage(std::move(storage_))
         , metadata_snapshot(metadata_snapshot_)
         , files_info(std::move(files_info_))
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index 871ff38c07f..10c5a9f26e2 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -106,6 +106,39 @@ const ColumnsDescription & StorageInMemoryMetadata::getColumns() const
     return columns;
 }
 
+const ColumnsDescription StorageInMemoryMetadata::getColumnsForNames(
+        const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const
+{
+    ColumnsDescription res;
+    std::unordered_map<String, DataTypePtr> columns_map;
+
+    NamesAndTypesList all_columns = getColumns().getAll();
+    for (const auto & elem : all_columns)
+        columns_map.emplace(elem.name, elem.type);
+
+    /// Virtual columns must be appended after ordinary, because user can
+    /// override them.
+    for (const auto & column : virtuals)
+        columns_map.emplace(column.name, column.type);
+
+    for (const auto & name : column_names)
+    {
+        auto it = columns_map.find(name);
+        if (it != columns_map.end())
+        {
+            res.add(ColumnDescription(it->first, it->second));
+        }
+        else
+        {
+            throw Exception(
+                    "Column " + backQuote(name) + " not found in table " + storage_id.getNameForLogs(),
+                    ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
+        }
+    }
+
+    return res;
+}
+
 const IndicesDescription & StorageInMemoryMetadata::getSecondaryIndices() const
 {
     return secondary_indices;
@@ -320,6 +353,29 @@ Block StorageInMemoryMetadata::getSampleBlockForColumns(
     return res;
 }
 
+Block StorageInMemoryMetadata::getSampleBlockForColumns(
+        const Names & column_names) const
+{
+    Block res;
+
+    std::unordered_map<String, DataTypePtr> columns_map;
+
+    NamesAndTypesList all_columns = getColumns().getAll();
+    for (const auto & elem : all_columns)
+        columns_map.emplace(elem.name, elem.type);
+
+    for (const auto & name : column_names)
+    {
+        auto it = columns_map.find(name);
+        if (it != columns_map.end())
+        {
+            res.insert({it->second->createColumn(), it->second, it->first});
+        }
+    }
+    return res;
+}
+
+
 const KeyDescription & StorageInMemoryMetadata::getPartitionKey() const
 {
     return partition_key;
diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index 038416aff7d..914d64340f1 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -85,6 +85,12 @@ struct StorageInMemoryMetadata
 
     /// Returns combined set of columns
     const ColumnsDescription & getColumns() const;
+
+    /// Returns combined set of columns from give column Names
+    const ColumnsDescription getColumnsForNames(
+            const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const;
+
+
     /// Returns secondary indices
 
     const IndicesDescription & getSecondaryIndices() const;
@@ -147,7 +153,9 @@ struct StorageInMemoryMetadata
     /// message.
     Block getSampleBlockForColumns(
         const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const;
-
+    /// Virtual columns not included. can be used in the InputFormat for reading for example
+    Block getSampleBlockForColumns(
+            const Names & column_names) const;
     /// Returns structure with partition key.
     const KeyDescription & getPartitionKey() const;
     /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none.

From de365a58526253653dbbb4e27a2ad06beef982bf Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Fri, 26 Feb 2021 00:05:04 +0800
Subject: [PATCH 100/152] fix

---
 src/Storages/StorageFile.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 0267187b12d..cfa78d92b0f 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -330,8 +330,9 @@ public:
                 }
 
                 read_buf = wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method);
-                auto format = FormatFactory::instance().getInput(
-                        storage->format_name, *read_buf, metadata_snapshot->getSampleBlock(), context, max_block_size, storage->format_settings);
+                auto format = FormatFactory::instance().getInput(storage->format_name, *read_buf,
+                                metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()),
+                                        context, max_block_size, storage->format_settings);
 
                 reader = std::make_shared<InputStreamFromInputFormat>(format);
 
@@ -444,8 +445,7 @@ Pipe StorageFile::read(
     for (size_t i = 0; i < num_streams; ++i)
     {
         pipes.emplace_back(std::make_shared<StorageFileSource>(
-            this_ptr, metadata_snapshot, context, max_block_size, files_info,
-            metadata_snapshot->getColumns()));
+            this_ptr, metadata_snapshot, context, max_block_size, files_info, metadata_snapshot->getColumnsForNames(column_names, getVirtuals(), getStorageID())));
     }
 
     return Pipe::unitePipes(std::move(pipes));

From 9d6e2b4bad1c3151941bccbb275694c300b08a96 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sun, 28 Feb 2021 11:28:53 +0800
Subject: [PATCH 101/152] update comments

---
 src/Storages/StorageInMemoryMetadata.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index 914d64340f1..6f5c49af4ac 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -153,9 +153,9 @@ struct StorageInMemoryMetadata
     /// message.
     Block getSampleBlockForColumns(
         const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const;
-    /// Virtual columns not included. can be used in the InputFormat for reading for example
-    Block getSampleBlockForColumns(
-            const Names & column_names) const;
+    /// Get the intersection between real columns in Storage and the given columns, no virtual columns included.
+    /// can be used in the InputFormat for reading data from corresponding storage.
+    Block getSampleBlockForColumns(const Names & column_names) const;
     /// Returns structure with partition key.
     const KeyDescription & getPartitionKey() const;
     /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none.

From 4eaf718fd2f351648f97f65e6766a014d96ecc39 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sun, 28 Feb 2021 23:35:38 +0800
Subject: [PATCH 102/152] fix the getColumsForNames() to bring the whole column
 info from metadata

---
 src/Storages/StorageInMemoryMetadata.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index 10c5a9f26e2..ee0f442a865 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -110,23 +110,21 @@ const ColumnsDescription StorageInMemoryMetadata::getColumnsForNames(
         const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const
 {
     ColumnsDescription res;
-    std::unordered_map<String, DataTypePtr> columns_map;
+    std::unordered_map<String, ColumnDescription> columns_map;
 
-    NamesAndTypesList all_columns = getColumns().getAll();
-    for (const auto & elem : all_columns)
-        columns_map.emplace(elem.name, elem.type);
+    for (const auto & column : columns)
+        columns_map.emplace(column.name, column);
 
-    /// Virtual columns must be appended after ordinary, because user can
-    /// override them.
+    /// Virtual columns also included.
     for (const auto & column : virtuals)
-        columns_map.emplace(column.name, column.type);
+        columns_map.emplace(column.name, ColumnDescription(column.name, column.type));
 
     for (const auto & name : column_names)
     {
         auto it = columns_map.find(name);
         if (it != columns_map.end())
         {
-            res.add(ColumnDescription(it->first, it->second));
+            res.add(it->second);
         }
         else
         {

From 139bb678a9909280786feb4be2a16d5b3f910946 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Mon, 1 Mar 2021 22:11:25 +0800
Subject: [PATCH 103/152] Branch with Format isColumnOriented() or not

---
 src/Storages/StorageFile.cpp | 31 +++++++++++++++++++++++++++----
 src/Storages/StorageFile.h   |  3 +++
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index cfa78d92b0f..c0b51c71f41 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -243,6 +243,20 @@ public:
 
     using FilesInfoPtr = std::shared_ptr<FilesInfo>;
 
+    static Block getHeader(const StorageMetadataPtr & metadata_snapshot, bool need_path_column, bool need_file_column)
+    {
+        auto header = metadata_snapshot->getSampleBlock();
+
+        /// Note: AddingDefaultsBlockInputStream doesn't change header.
+
+        if (need_path_column)
+            header.insert({DataTypeString().createColumn(), std::make_shared<DataTypeString>(), "_path"});
+        if (need_file_column)
+            header.insert({DataTypeString().createColumn(), std::make_shared<DataTypeString>(), "_file"});
+
+        return header;
+    }
+
     StorageFileSource(
         std::shared_ptr<StorageFile> storage_,
         const StorageMetadataPtr & metadata_snapshot_,
@@ -250,7 +264,8 @@ public:
         UInt64 max_block_size_,
         FilesInfoPtr files_info_,
         ColumnsDescription columns_description_)
-        : SourceWithProgress(metadata_snapshot_->getSampleBlockForColumns(columns_description_.getNamesOfPhysical(), storage_->getVirtuals(), storage_->getStorageID()))
+        : SourceWithProgress(storage_->isColumnOriented() ? metadata_snapshot_->getSampleBlockForColumns(columns_description_.getNamesOfPhysical(), storage_->getVirtuals(), storage_->getStorageID())
+                : getHeader(metadata_snapshot_, files_info_->need_path_column, files_info_->need_file_column))
         , storage(std::move(storage_))
         , metadata_snapshot(metadata_snapshot_)
         , files_info(std::move(files_info_))
@@ -331,8 +346,8 @@ public:
 
                 read_buf = wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method);
                 auto format = FormatFactory::instance().getInput(storage->format_name, *read_buf,
-                                metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()),
-                                        context, max_block_size, storage->format_settings);
+                                storage->isColumnOriented() ? metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical())
+                                : metadata_snapshot->getSampleBlock(), context, max_block_size, storage->format_settings);
 
                 reader = std::make_shared<InputStreamFromInputFormat>(format);
 
@@ -399,6 +414,12 @@ private:
     std::unique_lock<std::shared_timed_mutex> unique_lock;
 };
 
+bool StorageFile::isColumnOriented() const
+{
+    if (format_name == "Parquet" || format_name == "Arrow" || format_name == "Native" || format_name == "ORC")
+        return true;
+    return false;
+};
 
 Pipe StorageFile::read(
     const Names & column_names,
@@ -445,7 +466,9 @@ Pipe StorageFile::read(
     for (size_t i = 0; i < num_streams; ++i)
     {
         pipes.emplace_back(std::make_shared<StorageFileSource>(
-            this_ptr, metadata_snapshot, context, max_block_size, files_info, metadata_snapshot->getColumnsForNames(column_names, getVirtuals(), getStorageID())));
+            this_ptr, metadata_snapshot, context, max_block_size, files_info,
+            isColumnOriented() ? metadata_snapshot->getColumnsForNames(column_names, getVirtuals(), getStorageID())
+            : metadata_snapshot->getColumns()));
     }
 
     return Pipe::unitePipes(std::move(pipes));
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index c316412f808..3cccada5a26 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -64,6 +64,9 @@ public:
 
     static Strings getPathsList(const String & table_path, const String & user_files_path, const Context & context);
 
+    /// Check if the format is column-oriented
+    bool isColumnOriented() const;
+
 protected:
     friend class StorageFileSource;
     friend class StorageFileBlockOutputStream;

From b95f54edce10e5f099af92c4f6bc90584022b2ab Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Tue, 2 Mar 2021 16:25:17 +0800
Subject: [PATCH 104/152] Small fix

---
 src/Storages/StorageFile.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index c0b51c71f41..8c6a27a5694 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -416,9 +416,7 @@ private:
 
 bool StorageFile::isColumnOriented() const
 {
-    if (format_name == "Parquet" || format_name == "Arrow" || format_name == "Native" || format_name == "ORC")
-        return true;
-    return false;
+    return (format_name == "Parquet" || format_name == "Arrow" || format_name == "Native" || format_name == "ORC");
 };
 
 Pipe StorageFile::read(

From 3d3923b5180f74e9ab073950c16c57ca87a53e89 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Tue, 2 Mar 2021 16:42:50 +0800
Subject: [PATCH 105/152] update comments

---
 src/Storages/StorageFile.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index 3cccada5a26..a44ca5d6233 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -64,7 +64,9 @@ public:
 
     static Strings getPathsList(const String & table_path, const String & user_files_path, const Context & context);
 
-    /// Check if the format is column-oriented
+    /// Check if the format is column-oriented. Maybe it's more appropriate to put one flag in this class's private fields
+    /// to identify this format type. We can set it in new formats' register code which will be automatically set at initialization.
+    /// This will avoid updating this function frequently.
     bool isColumnOriented() const;
 
 protected:

From 94c8e7af1a251b009ed6f8a4d5af440c4f4af297 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Wed, 3 Mar 2021 10:11:50 +0800
Subject: [PATCH 106/152] remove const for value-copy-return

---
 src/Storages/StorageInMemoryMetadata.cpp | 2 +-
 src/Storages/StorageInMemoryMetadata.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index ee0f442a865..daec3aa2b71 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -106,7 +106,7 @@ const ColumnsDescription & StorageInMemoryMetadata::getColumns() const
     return columns;
 }
 
-const ColumnsDescription StorageInMemoryMetadata::getColumnsForNames(
+ColumnsDescription StorageInMemoryMetadata::getColumnsForNames(
         const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const
 {
     ColumnsDescription res;
diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index 6f5c49af4ac..e55e9d1cbe9 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -87,7 +87,7 @@ struct StorageInMemoryMetadata
     const ColumnsDescription & getColumns() const;
 
     /// Returns combined set of columns from give column Names
-    const ColumnsDescription getColumnsForNames(
+    ColumnsDescription getColumnsForNames(
             const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const;
 
 

From 1a4961499b39c1fa1c5df5f11e75999af5fb1efd Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Thu, 4 Mar 2021 17:52:22 +0800
Subject: [PATCH 107/152] the Native format is not supported after test

---
 src/Storages/StorageFile.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 8c6a27a5694..57c3f39d35a 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -416,7 +416,7 @@ private:
 
 bool StorageFile::isColumnOriented() const
 {
-    return (format_name == "Parquet" || format_name == "Arrow" || format_name == "Native" || format_name == "ORC");
+    return (format_name == "Parquet" || format_name == "Arrow" || format_name == "ORC");
 };
 
 Pipe StorageFile::read(

From 282775b10c994d188e252a3c950664eeed2ee3b2 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@mtlog-perftest03j.yandex.ru>
Date: Tue, 30 Mar 2021 20:57:21 +0300
Subject: [PATCH 108/152] save

---
 src/Storages/StorageFile.cpp             | 58 ++++++++++++++++++------
 src/Storages/StorageInMemoryMetadata.cpp | 36 ++-------------
 2 files changed, 49 insertions(+), 45 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 57c3f39d35a..6ae63ec3544 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -22,6 +22,9 @@
 #include <Common/escapeForFileName.h>
 #include <Common/typeid_cast.h>
 #include <Common/parseGlobs.h>
+#include "Storages/ColumnsDescription.h"
+#include "Storages/StorageInMemoryMetadata.h"
+#include "Storages/StorageMergeTree.h"
 
 #include <fcntl.h>
 #include <unistd.h>
@@ -227,6 +230,8 @@ static std::chrono::seconds getLockTimeout(const Context & context)
     return std::chrono::seconds{lock_timeout};
 }
 
+using StorageFilePtr = std::shared_ptr<StorageFile>;
+
 
 class StorageFileSource : public SourceWithProgress
 {
@@ -257,6 +262,19 @@ public:
         return header;
     }
 
+    static Block getBlockForSource(
+        const StorageFilePtr & storage,
+        const StorageMetadataPtr & metadata_snapshot,
+        const ColumnsDescription & columns_description,
+        const FilesInfoPtr & files_info)
+    {
+        if (FormatFactory::instance().checkIfFormatIsColumnOriented(storage->format_name)) {
+            return metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical(), storage->getVirtuals(), storage->getStorageID());
+        } else {
+            return getHeader(metadata_snapshot, files_info->need_path_column, files_info->need_file_column);
+        }
+    }
+
     StorageFileSource(
         std::shared_ptr<StorageFile> storage_,
         const StorageMetadataPtr & metadata_snapshot_,
@@ -264,8 +282,7 @@ public:
         UInt64 max_block_size_,
         FilesInfoPtr files_info_,
         ColumnsDescription columns_description_)
-        : SourceWithProgress(storage_->isColumnOriented() ? metadata_snapshot_->getSampleBlockForColumns(columns_description_.getNamesOfPhysical(), storage_->getVirtuals(), storage_->getStorageID())
-                : getHeader(metadata_snapshot_, files_info_->need_path_column, files_info_->need_file_column))
+        : SourceWithProgress(getBlockForSource(storage_, metadata_snapshot_, columns_description_, files_info_))
         , storage(std::move(storage_))
         , metadata_snapshot(metadata_snapshot_)
         , files_info(std::move(files_info_))
@@ -345,9 +362,16 @@ public:
                 }
 
                 read_buf = wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method);
-                auto format = FormatFactory::instance().getInput(storage->format_name, *read_buf,
-                                storage->isColumnOriented() ? metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical())
-                                : metadata_snapshot->getSampleBlock(), context, max_block_size, storage->format_settings);
+
+                auto get_block_for_format = [&]() -> Block
+                {
+                    if (FormatFactory::instance().checkIfFormatIsColumnOriented(storage->format_name))
+                        return metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
+                    return metadata_snapshot->getSampleBlock();
+                };
+
+                auto format = FormatFactory::instance().getInput(
+                    storage->format_name, *read_buf, get_block_for_format(), context, max_block_size, storage->format_settings);
 
                 reader = std::make_shared<InputStreamFromInputFormat>(format);
 
@@ -414,11 +438,6 @@ private:
     std::unique_lock<std::shared_timed_mutex> unique_lock;
 };
 
-bool StorageFile::isColumnOriented() const
-{
-    return (format_name == "Parquet" || format_name == "Arrow" || format_name == "ORC");
-};
-
 Pipe StorageFile::read(
     const Names & column_names,
     const StorageMetadataPtr & metadata_snapshot,
@@ -428,6 +447,12 @@ Pipe StorageFile::read(
     size_t max_block_size,
     unsigned num_streams)
 {
+
+    std::cout << "Names" << std::endl;
+    for (const auto & name : column_names) {
+        std::cout << name << std::endl;
+    }
+    std::cout << "-----" << std::endl;
     BlockInputStreams blocks_input;
 
     if (use_table_fd)   /// need to call ctr BlockInputStream
@@ -463,10 +488,15 @@ Pipe StorageFile::read(
 
     for (size_t i = 0; i < num_streams; ++i)
     {
+        const auto get_columns_for_format = [&]() -> ColumnsDescription
+        {
+            if (FormatFactory::instance().checkIfFormatIsColumnOriented(format_name))
+                return metadata_snapshot->getColumnsForNames(column_names, getVirtuals(), getStorageID());
+            else
+                return metadata_snapshot->getColumns();
+        };
         pipes.emplace_back(std::make_shared<StorageFileSource>(
-            this_ptr, metadata_snapshot, context, max_block_size, files_info,
-            isColumnOriented() ? metadata_snapshot->getColumnsForNames(column_names, getVirtuals(), getStorageID())
-            : metadata_snapshot->getColumns()));
+            this_ptr, metadata_snapshot, context, max_block_size, files_info, get_columns_for_format()));
     }
 
     return Pipe::unitePipes(std::move(pipes));
@@ -754,4 +784,4 @@ NamesAndTypesList StorageFile::getVirtuals() const
         {"_file", std::make_shared<DataTypeString>()}
     };
 }
-}
+}
\ No newline at end of file
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index daec3aa2b71..14d80708c8c 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -322,9 +322,10 @@ Block StorageInMemoryMetadata::getSampleBlockForColumns(
 {
     Block res;
 
-    std::unordered_map<String, DataTypePtr> columns_map;
-
     auto all_columns = getColumns().getAllWithSubcolumns();
+    std::unordered_map<String, DataTypePtr> columns_map;
+    columns_map.reserve(all_columns.size());
+
     for (const auto & elem : all_columns)
         columns_map.emplace(elem.name, elem.type);
 
@@ -337,43 +338,16 @@ Block StorageInMemoryMetadata::getSampleBlockForColumns(
     {
         auto it = columns_map.find(name);
         if (it != columns_map.end())
-        {
             res.insert({it->second->createColumn(), it->second, it->first});
-        }
         else
-        {
             throw Exception(
-                "Column " + backQuote(name) + " not found in table " + storage_id.getNameForLogs(),
+                "Column " + backQuote(name) + " not found in table " + (storage_id.empty() ? "" : storage_id.getNameForLogs()),
                 ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
-        }
     }
 
     return res;
 }
 
-Block StorageInMemoryMetadata::getSampleBlockForColumns(
-        const Names & column_names) const
-{
-    Block res;
-
-    std::unordered_map<String, DataTypePtr> columns_map;
-
-    NamesAndTypesList all_columns = getColumns().getAll();
-    for (const auto & elem : all_columns)
-        columns_map.emplace(elem.name, elem.type);
-
-    for (const auto & name : column_names)
-    {
-        auto it = columns_map.find(name);
-        if (it != columns_map.end())
-        {
-            res.insert({it->second->createColumn(), it->second, it->first});
-        }
-    }
-    return res;
-}
-
-
 const KeyDescription & StorageInMemoryMetadata::getPartitionKey() const
 {
     return partition_key;
@@ -661,4 +635,4 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const
 }
 
 
-}
+}
\ No newline at end of file

From 37f48d13b4a224d240a1842a28f66a89e0e69217 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@mtlog-perftest03j.yandex.ru>
Date: Wed, 31 Mar 2021 00:25:37 +0300
Subject: [PATCH 109/152] add test

---
 src/Formats/FormatFactory.cpp                 | 16 ++++++++++++++-
 src/Formats/FormatFactory.h                   |  4 ++++
 .../Formats/Impl/ArrowBlockInputFormat.cpp    |  4 ++--
 .../Formats/Impl/ORCBlockInputFormat.cpp      |  1 +
 .../Formats/Impl/ParquetBlockInputFormat.cpp  |  1 +
 src/Storages/StorageFile.cpp                  | 13 +++---------
 src/Storages/StorageInMemoryMetadata.cpp      |  2 +-
 src/Storages/StorageInMemoryMetadata.h        |  5 +----
 .../00163_column_oriented_formats.reference   | 12 +++++++++++
 .../00163_column_oriented_formats.sh          | 20 +++++++++++++++++++
 10 files changed, 60 insertions(+), 18 deletions(-)
 create mode 100644 tests/queries/1_stateful/00163_column_oriented_formats.reference
 create mode 100755 tests/queries/1_stateful/00163_column_oriented_formats.sh

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index f7f32cf9b6f..76b0af0ed63 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -412,11 +412,25 @@ void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & na
 {
     auto & target = dict[name].supports_parallel_formatting;
     if (target)
-        throw Exception("FormatFactory: Output format " + name + " is already marked as supporting parallel formatting.", ErrorCodes::LOGICAL_ERROR);
+        throw Exception("FormatFactory: Output format " + name + " is already marked as supporting parallel formatting", ErrorCodes::LOGICAL_ERROR);
     target = true;
 }
 
 
+void FormatFactory::markFormatAsColumnOriented(const String & name)
+{
+    auto & target = dict[name].is_column_oriented;
+    if (target)
+        throw Exception("FormatFactory: Format " + name + " is already marked as column oriented", ErrorCodes::LOGICAL_ERROR);
+}
+
+
+bool FormatFactory::checkIfFormatIsColumnOriented(const String & name)
+{
+    const auto & target = getCreators(name);
+    return target.is_column_oriented;
+}
+
 FormatFactory & FormatFactory::instance()
 {
     static FormatFactory ret;
diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h
index 4fa7e9a0c01..3a21ef91d35 100644
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@@ -101,6 +101,7 @@ private:
         OutputProcessorCreator output_processor_creator;
         FileSegmentationEngine file_segmentation_engine;
         bool supports_parallel_formatting{false};
+        bool is_column_oriented{false};
     };
 
     using FormatsDictionary = std::unordered_map<String, Creators>;
@@ -155,6 +156,9 @@ public:
     void registerOutputFormatProcessor(const String & name, OutputProcessorCreator output_creator);
 
     void markOutputFormatSupportsParallelFormatting(const String & name);
+    void markFormatAsColumnOriented(const String & name);
+
+    bool checkIfFormatIsColumnOriented(const String & name);
 
     const FormatsDictionary & getAllFormats() const
     {
diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
index 4edef1f1365..6a5c9718278 100644
--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
@@ -101,7 +101,7 @@ void ArrowBlockInputFormat::prepareReader()
     record_batch_current = 0;
 }
 
-void registerInputFormatProcessorArrow(FormatFactory &factory)
+void registerInputFormatProcessorArrow(FormatFactory & factory)
 {
     factory.registerInputFormatProcessor(
         "Arrow",
@@ -112,7 +112,7 @@ void registerInputFormatProcessorArrow(FormatFactory &factory)
         {
             return std::make_shared<ArrowBlockInputFormat>(buf, sample, false);
         });
-
+    factory.markFormatAsColumnOriented("Arrow");
     factory.registerInputFormatProcessor(
         "ArrowStream",
         [](ReadBuffer & buf,
diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
index 7776a904f1c..9af03e93c32 100644
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
@@ -64,6 +64,7 @@ void registerInputFormatProcessorORC(FormatFactory &factory)
             {
                 return std::make_shared<ORCBlockInputFormat>(buf, sample);
             });
+    factory.markFormatAsColumnOriented("ORC");
 }
 
 }
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index bb55c71b7ca..162185e75b8 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -94,6 +94,7 @@ void registerInputFormatProcessorParquet(FormatFactory &factory)
             {
                 return std::make_shared<ParquetBlockInputFormat>(buf, sample);
             });
+    factory.markFormatAsColumnOriented("Parquet");
 }
 
 }
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 6ae63ec3544..36da091bcea 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -268,11 +268,10 @@ public:
         const ColumnsDescription & columns_description,
         const FilesInfoPtr & files_info)
     {
-        if (FormatFactory::instance().checkIfFormatIsColumnOriented(storage->format_name)) {
+        if (FormatFactory::instance().checkIfFormatIsColumnOriented(storage->format_name))
             return metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical(), storage->getVirtuals(), storage->getStorageID());
-        } else {
+        else
             return getHeader(metadata_snapshot, files_info->need_path_column, files_info->need_file_column);
-        }
     }
 
     StorageFileSource(
@@ -447,12 +446,6 @@ Pipe StorageFile::read(
     size_t max_block_size,
     unsigned num_streams)
 {
-
-    std::cout << "Names" << std::endl;
-    for (const auto & name : column_names) {
-        std::cout << name << std::endl;
-    }
-    std::cout << "-----" << std::endl;
     BlockInputStreams blocks_input;
 
     if (use_table_fd)   /// need to call ctr BlockInputStream
@@ -784,4 +777,4 @@ NamesAndTypesList StorageFile::getVirtuals() const
         {"_file", std::make_shared<DataTypeString>()}
     };
 }
-}
\ No newline at end of file
+}
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index 14d80708c8c..aa427928b1d 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -635,4 +635,4 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const
 }
 
 
-}
\ No newline at end of file
+}
diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index e55e9d1cbe9..4eb097f6c7b 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -152,10 +152,7 @@ struct StorageInMemoryMetadata
     /// Storage metadata. StorageID required only for more clear exception
     /// message.
     Block getSampleBlockForColumns(
-        const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const;
-    /// Get the intersection between real columns in Storage and the given columns, no virtual columns included.
-    /// can be used in the InputFormat for reading data from corresponding storage.
-    Block getSampleBlockForColumns(const Names & column_names) const;
+        const Names & column_names, const NamesAndTypesList & virtuals = {}, const StorageID & storage_id = StorageID::createEmpty()) const;
     /// Returns structure with partition key.
     const KeyDescription & getPartitionKey() const;
     /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none.
diff --git a/tests/queries/1_stateful/00163_column_oriented_formats.reference b/tests/queries/1_stateful/00163_column_oriented_formats.reference
new file mode 100644
index 00000000000..cb20aca4392
--- /dev/null
+++ b/tests/queries/1_stateful/00163_column_oriented_formats.reference
@@ -0,0 +1,12 @@
+Parquet
+6b397d4643bc1f920f3eb8aa87ee180c  -
+7fe6d8c57ddc5fe37bbdcb7f73c5fa78  -
+d8746733270cbeff7ab3550c9b944fb6  -
+Arrow
+6b397d4643bc1f920f3eb8aa87ee180c  -
+7fe6d8c57ddc5fe37bbdcb7f73c5fa78  -
+d8746733270cbeff7ab3550c9b944fb6  -
+ORC
+6b397d4643bc1f920f3eb8aa87ee180c  -
+7fe6d8c57ddc5fe37bbdcb7f73c5fa78  -
+d8746733270cbeff7ab3550c9b944fb6  -
diff --git a/tests/queries/1_stateful/00163_column_oriented_formats.sh b/tests/queries/1_stateful/00163_column_oriented_formats.sh
new file mode 100755
index 00000000000..1363ccf3c00
--- /dev/null
+++ b/tests/queries/1_stateful/00163_column_oriented_formats.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+FORMATS=('Parquet' 'Arrow' 'ORC')
+
+for format in "${FORMATS[@]}"
+do
+    echo $format
+    $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS 00163_column_oriented SYNC"
+    $CLICKHOUSE_CLIENT -q "CREATE TABLE 00163_column_oriented(ClientEventTime DateTime, MobilePhoneModel String, ClientIP6 FixedString(16)) ENGINE=File($format)"
+    $CLICKHOUSE_CLIENT -q "INSERT INTO 00163_column_oriented SELECT ClientEventTime, MobilePhoneModel, ClientIP6 FROM test.hits ORDER BY ClientEventTime, MobilePhoneModel, ClientIP6 LIMIT 100"
+    $CLICKHOUSE_CLIENT -q "SELECT ClientEventTime from 00163_column_oriented" | md5sum
+    $CLICKHOUSE_CLIENT -q "SELECT MobilePhoneModel from 00163_column_oriented" | md5sum
+    $CLICKHOUSE_CLIENT -q "SELECT ClientIP6 from 00163_column_oriented" | md5sum
+    $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS 00163_column_oriented SYNC"
+done

From af85ce2fd1d522ab506c5424604e4daeac728818 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@mtlog-perftest03j.yandex.ru>
Date: Wed, 31 Mar 2021 00:28:23 +0300
Subject: [PATCH 110/152] better

---
 src/Storages/StorageFile.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 36da091bcea..c89e5447af3 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -22,9 +22,8 @@
 #include <Common/escapeForFileName.h>
 #include <Common/typeid_cast.h>
 #include <Common/parseGlobs.h>
-#include "Storages/ColumnsDescription.h"
-#include "Storages/StorageInMemoryMetadata.h"
-#include "Storages/StorageMergeTree.h"
+#include <Storages/ColumnsDescription.h>
+#include <Storages/StorageInMemoryMetadata.h>
 
 #include <fcntl.h>
 #include <unistd.h>

From 71a2c85d9ad63c83747dd224028343b139fb35bc Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@mtlog-perftest03j.yandex.ru>
Date: Wed, 31 Mar 2021 17:21:19 +0300
Subject: [PATCH 111/152] better

---
 src/Storages/StorageFile.cpp             | 14 ++++++++---
 src/Storages/StorageFile.h               |  7 +++---
 src/Storages/StorageInMemoryMetadata.cpp | 31 ------------------------
 src/Storages/StorageInMemoryMetadata.h   |  7 +-----
 4 files changed, 15 insertions(+), 44 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index c89e5447af3..56f08802bc4 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -151,6 +151,11 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
     return paths;
 }
 
+bool StorageFile::isColumnOriented() const
+{
+    return format_name != "Distributed" && FormatFactory::instance().checkIfFormatIsColumnOriented(format_name);
+}
+
 StorageFile::StorageFile(int table_fd_, CommonArguments args)
     : StorageFile(args)
 {
@@ -267,7 +272,7 @@ public:
         const ColumnsDescription & columns_description,
         const FilesInfoPtr & files_info)
     {
-        if (FormatFactory::instance().checkIfFormatIsColumnOriented(storage->format_name))
+        if (storage->isColumnOriented())
             return metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical(), storage->getVirtuals(), storage->getStorageID());
         else
             return getHeader(metadata_snapshot, files_info->need_path_column, files_info->need_file_column);
@@ -363,7 +368,7 @@ public:
 
                 auto get_block_for_format = [&]() -> Block
                 {
-                    if (FormatFactory::instance().checkIfFormatIsColumnOriented(storage->format_name))
+                    if (storage->isColumnOriented())
                         return metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
                     return metadata_snapshot->getSampleBlock();
                 };
@@ -482,8 +487,9 @@ Pipe StorageFile::read(
     {
         const auto get_columns_for_format = [&]() -> ColumnsDescription
         {
-            if (FormatFactory::instance().checkIfFormatIsColumnOriented(format_name))
-                return metadata_snapshot->getColumnsForNames(column_names, getVirtuals(), getStorageID());
+            if (isColumnOriented())
+                return ColumnsDescription{
+                    metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()).getNamesAndTypesList()};
             else
                 return metadata_snapshot->getColumns();
         };
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index a44ca5d6233..27f7419321c 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -64,9 +64,10 @@ public:
 
     static Strings getPathsList(const String & table_path, const String & user_files_path, const Context & context);
 
-    /// Check if the format is column-oriented. Maybe it's more appropriate to put one flag in this class's private fields
-    /// to identify this format type. We can set it in new formats' register code which will be automatically set at initialization.
-    /// This will avoid updating this function frequently.
+    /// Check if the format is column-oriented.
+    /// Is is useful because column oriented formats could effectively skip unknown columns
+    /// So we can create a header of only required columns in read method and ask
+    /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV.
     bool isColumnOriented() const;
 
 protected:
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index aa427928b1d..2f4a24a5c60 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -106,37 +106,6 @@ const ColumnsDescription & StorageInMemoryMetadata::getColumns() const
     return columns;
 }
 
-ColumnsDescription StorageInMemoryMetadata::getColumnsForNames(
-        const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const
-{
-    ColumnsDescription res;
-    std::unordered_map<String, ColumnDescription> columns_map;
-
-    for (const auto & column : columns)
-        columns_map.emplace(column.name, column);
-
-    /// Virtual columns also included.
-    for (const auto & column : virtuals)
-        columns_map.emplace(column.name, ColumnDescription(column.name, column.type));
-
-    for (const auto & name : column_names)
-    {
-        auto it = columns_map.find(name);
-        if (it != columns_map.end())
-        {
-            res.add(it->second);
-        }
-        else
-        {
-            throw Exception(
-                    "Column " + backQuote(name) + " not found in table " + storage_id.getNameForLogs(),
-                    ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
-        }
-    }
-
-    return res;
-}
-
 const IndicesDescription & StorageInMemoryMetadata::getSecondaryIndices() const
 {
     return secondary_indices;
diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index 4eb097f6c7b..00fb944c0b5 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -86,14 +86,9 @@ struct StorageInMemoryMetadata
     /// Returns combined set of columns
     const ColumnsDescription & getColumns() const;
 
-    /// Returns combined set of columns from give column Names
-    ColumnsDescription getColumnsForNames(
-            const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const;
-
-
     /// Returns secondary indices
-
     const IndicesDescription & getSecondaryIndices() const;
+
     /// Has at least one non primary index
     bool hasSecondaryIndices() const;
 

From 5d672d45298b47c7988b1e48f02b74ef37780806 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 1 Apr 2021 03:08:02 +0300
Subject: [PATCH 112/152] Update FormatFactory.cpp

---
 src/Formats/FormatFactory.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 76b0af0ed63..0591fe003fa 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -422,6 +422,7 @@ void FormatFactory::markFormatAsColumnOriented(const String & name)
     auto & target = dict[name].is_column_oriented;
     if (target)
         throw Exception("FormatFactory: Format " + name + " is already marked as column oriented", ErrorCodes::LOGICAL_ERROR);
+    target = true;
 }
 
 

From 7a318d25aebf49122ee7ddc00d0458e31585b233 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Thu, 1 Apr 2021 15:48:31 +0300
Subject: [PATCH 113/152] add perf test

---
 tests/performance/parse_engine_file.xml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/performance/parse_engine_file.xml b/tests/performance/parse_engine_file.xml
index d0226c3bb68..3f402d6abfa 100644
--- a/tests/performance/parse_engine_file.xml
+++ b/tests/performance/parse_engine_file.xml
@@ -22,6 +22,9 @@
             <value>Native</value>
             <value>Avro</value>
             <value>MsgPack</value>
+            <value>Parquet</value>
+            <value>Arrow</value>
+            <value>ORC</value>
         </values>
     </substitution>
 </substitutions>
@@ -32,6 +35,8 @@
 
 <query short="'{format}' == 'Native'">SELECT * FROM table_{format} FORMAT Null</query>
 
+<query>SELECT ClientEventTime, MobilePhoneModel, ClientIP6 FROM table_{format} FORMAT Null</query>
+
 <drop_query>DROP TABLE IF EXISTS table_{format}</drop_query>
 
 </test>

From f3f550e81192f9260071bfee504ac1ccaac08587 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Fri, 2 Apr 2021 16:51:51 +0300
Subject: [PATCH 114/152] better perf test

---
 tests/performance/parse_engine_file.xml       |  5 ----
 .../parse_engine_file_column_oriented.xml     | 28 +++++++++++++++++++
 2 files changed, 28 insertions(+), 5 deletions(-)
 create mode 100644 tests/performance/parse_engine_file_column_oriented.xml

diff --git a/tests/performance/parse_engine_file.xml b/tests/performance/parse_engine_file.xml
index 3f402d6abfa..d0226c3bb68 100644
--- a/tests/performance/parse_engine_file.xml
+++ b/tests/performance/parse_engine_file.xml
@@ -22,9 +22,6 @@
             <value>Native</value>
             <value>Avro</value>
             <value>MsgPack</value>
-            <value>Parquet</value>
-            <value>Arrow</value>
-            <value>ORC</value>
         </values>
     </substitution>
 </substitutions>
@@ -35,8 +32,6 @@
 
 <query short="'{format}' == 'Native'">SELECT * FROM table_{format} FORMAT Null</query>
 
-<query>SELECT ClientEventTime, MobilePhoneModel, ClientIP6 FROM table_{format} FORMAT Null</query>
-
 <drop_query>DROP TABLE IF EXISTS table_{format}</drop_query>
 
 </test>
diff --git a/tests/performance/parse_engine_file_column_oriented.xml b/tests/performance/parse_engine_file_column_oriented.xml
new file mode 100644
index 00000000000..874572cfe5e
--- /dev/null
+++ b/tests/performance/parse_engine_file_column_oriented.xml
@@ -0,0 +1,28 @@
+<test max_ignored_relative_change="0.2">
+
+<preconditions>
+    <table_exists>test.hits</table_exists>
+</preconditions>
+
+<substitutions>
+    <substitution>
+        <name>format</name>
+        <values>
+            <value>Parquet</value>
+            <value>Arrow</value>
+            <value>ORC</value>
+        </values>
+    </substitution>
+</substitutions>
+    
+<create_query>CREATE TABLE IF NOT EXISTS table_{format}(ClientEventTime DateTime, MobilePhoneModel String, ClientIP6 FixedString(16)) ENGINE=File({format})</create_query>
+
+<fill_query>INSERT INTO table_{format} SELECT ClientEventTime, MobilePhoneModel, ClientIP6 FROM test.hits ORDER BY ClientEventTime, MobilePhoneModel, ClientIP6 LIMIT 100000</fill_query>
+
+<query>SELECT ClientEventTime FROM table_{format} FORMAT Null</query>
+<query>SELECT MobilePhoneModel FROM table_{format} FORMAT Null</query>
+<query>SELECT ClientIP6 FROM table_{format} FORMAT Null</query>
+
+<drop_query>DROP TABLE IF EXISTS table_{format}</drop_query>
+
+</test>

From 0d96ed99b7862020f1f55af03f417f784038ed7d Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Mon, 5 Apr 2021 15:56:31 +0300
Subject: [PATCH 115/152] update_perf_test

---
 .../parse_engine_file_column_oriented.xml            | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/performance/parse_engine_file_column_oriented.xml b/tests/performance/parse_engine_file_column_oriented.xml
index 874572cfe5e..d3f7faa0c1a 100644
--- a/tests/performance/parse_engine_file_column_oriented.xml
+++ b/tests/performance/parse_engine_file_column_oriented.xml
@@ -14,14 +14,14 @@
         </values>
     </substitution>
 </substitutions>
-    
-<create_query>CREATE TABLE IF NOT EXISTS table_{format}(ClientEventTime DateTime, MobilePhoneModel String, ClientIP6 FixedString(16)) ENGINE=File({format})</create_query>
 
-<fill_query>INSERT INTO table_{format} SELECT ClientEventTime, MobilePhoneModel, ClientIP6 FROM test.hits ORDER BY ClientEventTime, MobilePhoneModel, ClientIP6 LIMIT 100000</fill_query>
 
-<query>SELECT ClientEventTime FROM table_{format} FORMAT Null</query>
-<query>SELECT MobilePhoneModel FROM table_{format} FORMAT Null</query>
-<query>SELECT ClientIP6 FROM table_{format} FORMAT Null</query>
+<!--This is needed only because Array is not supported with Parquet format-->
+<create_query>CREATE TABLE IF NOT EXISTS table_{format} ( WatchID UInt64,  JavaEnable UInt8,  Title String,  GoodEvent Int16,  EventTime DateTime,  EventDate Date,  CounterID UInt32,  ClientIP UInt32,  ClientIP6 FixedString(16),  RegionID UInt32,  UserID UInt64,  CounterClass Int8,  OS UInt8,  UserAgent UInt8,  URL String,  Referer String,  URLDomain String,  RefererDomain String,  Refresh UInt8,  IsRobot UInt8,  ResolutionWidth UInt16,  ResolutionHeight UInt16,  ResolutionDepth UInt8,  FlashMajor UInt8, FlashMinor UInt8,  FlashMinor2 String,  NetMajor UInt8,  NetMinor UInt8, UserAgentMajor UInt16,  UserAgentMinor FixedString(2),  CookieEnable UInt8, JavascriptEnable UInt8,  IsMobile UInt8,  MobilePhone UInt8,  MobilePhoneModel String,  Params String,  IPNetworkID UInt32,  TraficSourceID Int8, SearchEngineID UInt16,  SearchPhrase String,  AdvEngineID UInt8,  IsArtifical UInt8,  WindowClientWidth UInt16,  WindowClientHeight UInt16,  ClientTimeZone Int16,  ClientEventTime DateTime,  SilverlightVersion1 UInt8, SilverlightVersion2 UInt8,  SilverlightVersion3 UInt32,  SilverlightVersion4 UInt16,  PageCharset String,  CodeVersion UInt32,  IsLink UInt8,  IsDownload UInt8,  IsNotBounce UInt8,  FUniqID UInt64,  HID UInt32,  IsOldCounter UInt8, IsEvent UInt8,  IsParameter UInt8,  DontCountHits UInt8,  WithHash UInt8, HitColor FixedString(1),  UTCEventTime DateTime,  Age UInt8,  Sex UInt8,  Income UInt8,  Interests UInt16,  Robotness UInt8,  RemoteIP UInt32,  RemoteIP6 FixedString(16),  WindowName Int32,  OpenerName Int32,  HistoryLength Int16,  BrowserLanguage FixedString(2),  BrowserCountry FixedString(2),  SocialNetwork String,  SocialAction String,  HTTPError UInt16, SendTiming Int32,  DNSTiming Int32,  ConnectTiming Int32,  ResponseStartTiming Int32,  ResponseEndTiming Int32,  FetchTiming Int32,  RedirectTiming Int32, DOMInteractiveTiming Int32,  DOMContentLoadedTiming Int32,  DOMCompleteTiming Int32,  LoadEventStartTiming Int32,  LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32,  FirstPaintTiming Int32,  RedirectCount Int8, SocialSourceNetworkID UInt8,  SocialSourcePage String,  ParamPrice Int64, ParamOrderID String,  ParamCurrency FixedString(3),  ParamCurrencyID UInt16,  OpenstatServiceName String,  OpenstatCampaignID String,  OpenstatAdID String,  OpenstatSourceID String,  UTMSource String, UTMMedium String,  UTMCampaign String,  UTMContent String,  UTMTerm String, FromTag String,  HasGCLID UInt8,  RefererHash UInt64,  URLHash UInt64,  CLID UInt32,  YCLID UInt64,  ShareService String,  ShareURL String,  ShareTitle String,  IslandID FixedString(16),  RequestNum UInt32,  RequestTry UInt8) ENGINE = File({format})</create_query>
+
+<fill_query>INSERT INTO table_{format} SELECT WatchID,  JavaEnable,  Title,  GoodEvent,  EventTime,  EventDate,  CounterID,  ClientIP,  ClientIP6,  RegionID,  UserID,  CounterClass,  OS,  UserAgent,  URL,  Referer,  URLDomain,  RefererDomain,  Refresh,  IsRobot,  ResolutionWidth,  ResolutionHeight,  ResolutionDepth,  FlashMajor, FlashMinor,  FlashMinor2,  NetMajor,  NetMinor, UserAgentMajor,  UserAgentMinor,  CookieEnable, JavascriptEnable,  IsMobile,  MobilePhone,  MobilePhoneModel,  Params,  IPNetworkID,  TraficSourceID, SearchEngineID,  SearchPhrase,  AdvEngineID,  IsArtifical,  WindowClientWidth,  WindowClientHeight,  ClientTimeZone,  ClientEventTime,  SilverlightVersion1, SilverlightVersion2,  SilverlightVersion3,  SilverlightVersion4,  PageCharset,  CodeVersion,  IsLink,  IsDownload,  IsNotBounce,  FUniqID,  HID,  IsOldCounter, IsEvent,  IsParameter,  DontCountHits,  WithHash, HitColor,  UTCEventTime,  Age,  Sex,  Income,  Interests,  Robotness, RemoteIP,  RemoteIP6,  WindowName,  OpenerName,  HistoryLength,  BrowserLanguage,  BrowserCountry,  SocialNetwork,  SocialAction,  HTTPError, SendTiming,  DNSTiming,  ConnectTiming,  ResponseStartTiming,  ResponseEndTiming,  FetchTiming,  RedirectTiming, DOMInteractiveTiming,  DOMContentLoadedTiming,  DOMCompleteTiming,  LoadEventStartTiming,  LoadEventEndTiming, NSToDOMContentLoadedTiming,  FirstPaintTiming,  RedirectCount, SocialSourceNetworkID,  SocialSourcePage,  ParamPrice, ParamOrderID,  ParamCurrency,  ParamCurrencyID,  OpenstatServiceName,  OpenstatCampaignID,  OpenstatAdID,  OpenstatSourceID,  UTMSource, UTMMedium,  UTMCampaign,  UTMContent,  UTMTerm, FromTag,  HasGCLID,  RefererHash,  URLHash,  CLID,  YCLID,  ShareService,  ShareURL,  ShareTitle,  IslandID,  RequestNum,  RequestTry FROM test.hits LIMIT 100000</fill_query>
+
+<query short='1'>SELECT ClientEventTime, MobilePhoneModel, ClientIP6, Sex, SearchEngineID, SocialNetwork FROM table_{format} FORMAT Null</query>
 
 <drop_query>DROP TABLE IF EXISTS table_{format}</drop_query>
 

From b87af2c4a8f1509097ef3108e7f2b60dc347473c Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Wed, 7 Apr 2021 05:31:13 +0300
Subject: [PATCH 116/152] Fixed typos

---
 docs/en/sql-reference/data-types/datetime64.md | 2 +-
 docs/ru/sql-reference/data-types/datetime64.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md
index 32cdbb8aaa9..1d3725b9fb3 100644
--- a/docs/en/sql-reference/data-types/datetime64.md
+++ b/docs/en/sql-reference/data-types/datetime64.md
@@ -17,7 +17,7 @@ DateTime64(precision, [timezone])
 
 Internally, stores data as a number of ‘ticks’ since epoch start (1970-01-01 00:00:00 UTC) as Int64. The tick resolution is determined by the precision parameter. Additionally, the `DateTime64` type can store time zone that is the same for the entire column, that affects how the values of the `DateTime64` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01.000’). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. See details in [DateTime](../../sql-reference/data-types/datetime.md).
 
-Supported range from January 1, 1925 till December 31, 2238.
+Supported range from January 1, 1925 till December 31, 2283.
 
 ## Examples {#examples}
 
diff --git a/docs/ru/sql-reference/data-types/datetime64.md b/docs/ru/sql-reference/data-types/datetime64.md
index ae478045e73..3a08da75bb7 100644
--- a/docs/ru/sql-reference/data-types/datetime64.md
+++ b/docs/ru/sql-reference/data-types/datetime64.md
@@ -17,7 +17,7 @@ DateTime64(precision, [timezone])
 
 Данные хранятся в виде количества ‘тиков’, прошедших с момента начала эпохи (1970-01-01 00:00:00 UTC), в Int64. Размер тика определяется параметром precision. Дополнительно, тип `DateTime64` позволяет хранить часовой пояс, единый для всей колонки, который влияет на то, как будут отображаться значения типа `DateTime64` в текстовом виде и как будут парситься значения заданные в виде строк (‘2020-01-01 05:00:01.000’). Часовой пояс не хранится в строках таблицы (выборки), а хранится в метаданных колонки. Подробнее см. [DateTime](datetime.md).
 
-Поддерживаются значения от 1 января 1925 г. и до 31 декабря 2238 г.
+Поддерживаются значения от 1 января 1925 г. и до 31 декабря 2283 г.
 
 ## Примеры {#examples}
 

From 05eeec16c1842a57396f564795e73131e920006c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 7 Apr 2021 11:49:10 +0300
Subject: [PATCH 117/152] Fix potential segfault on Keeper startup

---
 src/Coordination/KeeperServer.cpp | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 5d08c2825f5..3f99e9728ad 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -39,7 +39,6 @@ KeeperServer::KeeperServer(
 
 void KeeperServer::startup()
 {
-
     state_machine->init();
 
     state_manager->loadLogStore(state_machine->last_commit_index() + 1, coordination_settings->reserved_log_items);
@@ -73,15 +72,22 @@ void KeeperServer::startup()
 
     nuraft::asio_service::options asio_opts{};
     nuraft::raft_server::init_options init_options;
+
     init_options.skip_initial_election_timeout_ = state_manager->shouldStartAsFollower();
     init_options.raft_callback_ = [this] (nuraft::cb_func::Type type, nuraft::cb_func::Param * param)
     {
         return callbackFunc(type, param);
     };
 
-    raft_instance = launcher.init(
-        state_machine, state_manager, nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level), state_manager->getPort(),
-        asio_opts, params, init_options);
+    {
+        /// We use this lock here because NuRaft start background threads in
+        /// raft_server constructor. These threads may call raft_callback
+        /// (callbackFunc) before raft_instance object fully constructed.
+        std::lock_guard lock(initialized_mutex);
+        raft_instance = launcher.init(
+            state_machine, state_manager, nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level), state_manager->getPort(),
+            asio_opts, params, init_options);
+    }
 
     if (!raft_instance)
         throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
@@ -201,9 +207,12 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
     if (next_index < last_commited || next_index - last_commited <= 1)
         commited_store = true;
 
+    /// We use this lock here because NuRaft starts background threads in
+    /// raft_server constructor. So this callback can be called before
+    /// raft_instance object fully initialized. This lock allows to avoid this.
+    std::unique_lock lock(initialized_mutex);
     auto set_initialized = [this] ()
     {
-        std::unique_lock lock(initialized_mutex);
         initialized_flag = true;
         initialized_cv.notify_all();
     };

From 1364d939a19738678adaad3269a4ad5052a5af5d Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Wed, 7 Apr 2021 17:04:35 +0800
Subject: [PATCH 118/152] Avoid using harmful function rand()

---
 contrib/librdkafka-cmake/config.h.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/contrib/librdkafka-cmake/config.h.in b/contrib/librdkafka-cmake/config.h.in
index 80b6ea61b6e..b3450c40a60 100644
--- a/contrib/librdkafka-cmake/config.h.in
+++ b/contrib/librdkafka-cmake/config.h.in
@@ -75,6 +75,8 @@
 #define HAVE_STRNDUP 1
 // strerror_r
 #define HAVE_STRERROR_R 1
+// rand_r
+#define HAVE_RAND_R 1
 
 #ifdef __APPLE__
 // pthread_setname_np

From a020b357f08393d3dd1d39c124e91e99a1c06b81 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Wed, 7 Apr 2021 12:47:25 +0300
Subject: [PATCH 119/152] Update version_date.tsv after release 21.3.5.42

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index f6fd4a80c1b..3e030c14bb0 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v21.3.5.42-lts	2021-04-07
 v21.3.4.25-lts	2021-03-28
 v21.3.3.14-lts	2021-03-19
 v21.3.2.5-lts	2021-03-12

From 36c0e601a942f331959a25ce6e1a73aec6bb0dc1 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 7 Apr 2021 13:18:07 +0300
Subject: [PATCH 120/152] Better non-dirty fix

---
 contrib/NuRaft                    |  2 +-
 src/Coordination/KeeperServer.cpp | 93 ++++++++++++++++++++++++-------
 src/Coordination/KeeperServer.h   | 17 +++++-
 3 files changed, 89 insertions(+), 23 deletions(-)

diff --git a/contrib/NuRaft b/contrib/NuRaft
index 241fd3754a1..c35819f2c8a 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 241fd3754a1eb4d82ab68a9a875dc99391ec9f02
+Subproject commit c35819f2c8a378d4ba88cc930c17bc20aeb875eb
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 3f99e9728ad..54407acb32f 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -32,9 +32,10 @@ KeeperServer::KeeperServer(
                         coordination_settings))
     , state_manager(nuraft::cs_new<KeeperStateManager>(server_id, "keeper_server", config, coordination_settings))
     , responses_queue(responses_queue_)
+    , log(&Poco::Logger::get("KeeperServer"))
 {
     if (coordination_settings->quorum_reads)
-        LOG_WARNING(&Poco::Logger::get("KeeperServer"), "Quorum reads enabled, Keeper will work slower.");
+        LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower.");
 }
 
 void KeeperServer::startup()
@@ -74,32 +75,87 @@ void KeeperServer::startup()
     nuraft::raft_server::init_options init_options;
 
     init_options.skip_initial_election_timeout_ = state_manager->shouldStartAsFollower();
+    init_options.start_server_in_constructor_ = false;
     init_options.raft_callback_ = [this] (nuraft::cb_func::Type type, nuraft::cb_func::Param * param)
     {
         return callbackFunc(type, param);
     };
 
-    {
-        /// We use this lock here because NuRaft start background threads in
-        /// raft_server constructor. These threads may call raft_callback
-        /// (callbackFunc) before raft_instance object fully constructed.
-        std::lock_guard lock(initialized_mutex);
-        raft_instance = launcher.init(
-            state_machine, state_manager, nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level), state_manager->getPort(),
-            asio_opts, params, init_options);
-    }
+    launchRaftServer(params, asio_opts, init_options);
 
     if (!raft_instance)
         throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
 }
 
+void KeeperServer::launchRaftServer(
+    const nuraft::raft_params & params,
+    const nuraft::asio_service::options & asio_opts,
+    const nuraft::raft_server::init_options & init_opts)
+{
+    nuraft::ptr<nuraft::logger> logger = nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level);
+    asio_service = nuraft::cs_new<nuraft::asio_service>(asio_opts, logger);
+    asio_listener = asio_service->create_rpc_listener(state_manager->getPort(), logger);
+
+    if (!asio_listener)
+        return;
+
+    nuraft::ptr<nuraft::delayed_task_scheduler> scheduler = asio_service;
+    nuraft::ptr<nuraft::rpc_client_factory> rpc_cli_factory = asio_service;
+
+    nuraft::ptr<nuraft::state_mgr> casted_state_manager = state_manager;
+    nuraft::ptr<nuraft::state_machine> casted_state_machine = state_machine;
+
+    /// raft_server creates unique_ptr from it
+    nuraft::context * ctx = new nuraft::context(
+        casted_state_manager, casted_state_machine,
+        asio_listener, logger, rpc_cli_factory, scheduler, params);
+
+    raft_instance = nuraft::cs_new<nuraft::raft_server>(ctx, init_opts);
+
+    raft_instance->start_server(init_opts.skip_initial_election_timeout_);
+    asio_listener->listen(raft_instance);
+}
+
+void KeeperServer::shutdownRaftServer()
+{
+    size_t timeout = coordination_settings->shutdown_timeout.totalSeconds();
+
+    if (!raft_instance)
+    {
+        LOG_INFO(log, "RAFT doesn't start, shutdown not required");
+        return;
+    }
+
+    raft_instance->shutdown();
+    raft_instance.reset();
+
+    if (asio_listener)
+    {
+        asio_listener->stop();
+        asio_listener->shutdown();
+    }
+
+    if (asio_service)
+    {
+        asio_service->stop();
+        size_t count = 0;
+        while (asio_service->get_active_workers() != 0 && count < timeout * 100)
+        {
+            std::this_thread::sleep_for(std::chrono::milliseconds(10));
+            count++;
+        }
+    }
+
+    if (asio_service->get_active_workers() != 0)
+        LOG_WARNING(log, "Failed to shutdown RAFT server in {} seconds", timeout);
+}
+
+
 void KeeperServer::shutdown()
 {
     state_machine->shutdownStorage();
     state_manager->flushLogStore();
-    auto timeout = coordination_settings->shutdown_timeout.totalSeconds();
-    if (!launcher.shutdown(timeout))
-        LOG_WARNING(&Poco::Logger::get("KeeperServer"), "Failed to shutdown RAFT server in {} seconds", timeout);
+    shutdownRaftServer();
 }
 
 namespace
@@ -196,7 +252,7 @@ bool KeeperServer::isLeaderAlive() const
     return raft_instance->is_leader_alive();
 }
 
-nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */)
+nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param)
 {
     if (initialized_flag)
         return nuraft::cb_func::ReturnCode::Ok;
@@ -207,12 +263,9 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
     if (next_index < last_commited || next_index - last_commited <= 1)
         commited_store = true;
 
-    /// We use this lock here because NuRaft starts background threads in
-    /// raft_server constructor. So this callback can be called before
-    /// raft_instance object fully initialized. This lock allows to avoid this.
-    std::unique_lock lock(initialized_mutex);
     auto set_initialized = [this] ()
     {
+        std::unique_lock lock(initialized_mutex);
         initialized_flag = true;
         initialized_cv.notify_all();
     };
@@ -229,7 +282,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
         case nuraft::cb_func::BecomeFollower:
         case nuraft::cb_func::GotAppendEntryReqFromLeader:
         {
-            if (isLeaderAlive())
+            if (param->leaderId != -1)
             {
                 auto leader_index = raft_instance->get_leader_committed_log_idx();
                 auto our_index = raft_instance->get_committed_log_idx();
@@ -249,7 +302,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
         }
         case nuraft::cb_func::InitialBatchCommited:
         {
-            if (isLeader()) /// We have committed our log store and we are leader, ready to serve requests.
+            if (param->myId == param->leaderId) /// We have committed our log store and we are leader, ready to serve requests.
                 set_initialized();
             initial_batch_committed = true;
             return nuraft::cb_func::ReturnCode::Ok;
diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h
index 9a0260cda94..645d9242f93 100644
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@@ -7,6 +7,7 @@
 #include <Coordination/KeeperStorage.h>
 #include <Coordination/CoordinationSettings.h>
 #include <unordered_map>
+#include <common/logger_useful.h>
 
 namespace DB
 {
@@ -22,9 +23,9 @@ private:
 
     nuraft::ptr<KeeperStateManager> state_manager;
 
-    nuraft::raft_launcher launcher;
-
     nuraft::ptr<nuraft::raft_server> raft_instance;
+    nuraft::ptr<nuraft::asio_service> asio_service;
+    nuraft::ptr<nuraft::rpc_listener> asio_listener;
 
     std::mutex append_entries_mutex;
 
@@ -36,8 +37,20 @@ private:
     std::atomic<bool> initial_batch_committed = false;
     std::atomic<size_t> active_session_id_requests = 0;
 
+    Poco::Logger * log;
+
     nuraft::cb_func::ReturnCode callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param);
 
+    /// Almost copy-paste from nuraft::launcher, but with separated server init and start
+    /// Allows to avoid race conditions.
+    void launchRaftServer(
+        const nuraft::raft_params & params,
+        const nuraft::asio_service::options & asio_opts,
+        const nuraft::raft_server::init_options & init_opts);
+
+    void shutdownRaftServer();
+
+
 public:
     KeeperServer(
         int server_id_,

From a1164a7e4c1e4224fb39360eecff4125baa2b4f1 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 7 Apr 2021 13:21:53 +0300
Subject: [PATCH 121/152] More consistent

---
 src/Coordination/KeeperServer.cpp | 26 +++++++++++++-------------
 src/Coordination/KeeperServer.h   |  3 +--
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 54407acb32f..478b3aa3d17 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -72,6 +72,17 @@ void KeeperServer::startup()
     params.return_method_ = nuraft::raft_params::blocking;
 
     nuraft::asio_service::options asio_opts{};
+
+    launchRaftServer(params, asio_opts);
+
+    if (!raft_instance)
+        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
+}
+
+void KeeperServer::launchRaftServer(
+    const nuraft::raft_params & params,
+    const nuraft::asio_service::options & asio_opts)
+{
     nuraft::raft_server::init_options init_options;
 
     init_options.skip_initial_election_timeout_ = state_manager->shouldStartAsFollower();
@@ -81,17 +92,6 @@ void KeeperServer::startup()
         return callbackFunc(type, param);
     };
 
-    launchRaftServer(params, asio_opts, init_options);
-
-    if (!raft_instance)
-        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
-}
-
-void KeeperServer::launchRaftServer(
-    const nuraft::raft_params & params,
-    const nuraft::asio_service::options & asio_opts,
-    const nuraft::raft_server::init_options & init_opts)
-{
     nuraft::ptr<nuraft::logger> logger = nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level);
     asio_service = nuraft::cs_new<nuraft::asio_service>(asio_opts, logger);
     asio_listener = asio_service->create_rpc_listener(state_manager->getPort(), logger);
@@ -110,9 +110,9 @@ void KeeperServer::launchRaftServer(
         casted_state_manager, casted_state_machine,
         asio_listener, logger, rpc_cli_factory, scheduler, params);
 
-    raft_instance = nuraft::cs_new<nuraft::raft_server>(ctx, init_opts);
+    raft_instance = nuraft::cs_new<nuraft::raft_server>(ctx, init_options);
 
-    raft_instance->start_server(init_opts.skip_initial_election_timeout_);
+    raft_instance->start_server(init_options.skip_initial_election_timeout_);
     asio_listener->listen(raft_instance);
 }
 
diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h
index 645d9242f93..5af948305ef 100644
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@@ -45,8 +45,7 @@ private:
     /// Allows to avoid race conditions.
     void launchRaftServer(
         const nuraft::raft_params & params,
-        const nuraft::asio_service::options & asio_opts,
-        const nuraft::raft_server::init_options & init_opts);
+        const nuraft::asio_service::options & asio_opts);
 
     void shutdownRaftServer();
 

From 19b147c7acb1ace46ab9b18c638b7dfa3083899e Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <avogar@sandbox-633380738>
Date: Wed, 7 Apr 2021 13:30:47 +0300
Subject: [PATCH 122/152] Allow more replica change events in hedged tests due
 to high server load during testing

---
 tests/integration/test_hedged_requests/test.py          | 5 ++++-
 tests/integration/test_hedged_requests_parallel/test.py | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_hedged_requests/test.py b/tests/integration/test_hedged_requests/test.py
index a1693206ecc..e40b3109c44 100644
--- a/tests/integration/test_hedged_requests/test.py
+++ b/tests/integration/test_hedged_requests/test.py
@@ -87,7 +87,10 @@ def check_settings(node_name, sleep_in_send_tables_status_ms, sleep_in_send_data
 
 def check_changing_replica_events(expected_count):
     result = NODES['node'].query("SELECT value FROM system.events WHERE event='HedgedRequestsChangeReplica'")
-    assert int(result) == expected_count
+
+    # If server load is high we can see more than expected
+    # replica change events, but never less than expected
+    assert int(result) >= expected_count
 
 
 def update_configs(node_1_sleep_in_send_tables_status=0, node_1_sleep_in_send_data=0,
diff --git a/tests/integration/test_hedged_requests_parallel/test.py b/tests/integration/test_hedged_requests_parallel/test.py
index 33f70da00ca..7abc2eb1d2a 100644
--- a/tests/integration/test_hedged_requests_parallel/test.py
+++ b/tests/integration/test_hedged_requests_parallel/test.py
@@ -88,7 +88,10 @@ def check_settings(node_name, sleep_in_send_tables_status_ms, sleep_in_send_data
 
 def check_changing_replica_events(expected_count):
     result = NODES['node'].query("SELECT value FROM system.events WHERE event='HedgedRequestsChangeReplica'")
-    assert int(result) == expected_count
+
+    # If server load is high we can see more than expected
+    # replica change events, but never less than expected
+    assert int(result) >= expected_count
 
 
 def update_configs(node_1_sleep_in_send_tables_status=0, node_1_sleep_in_send_data=0,

From 190846c652a1e6919af34517c2ff6f6b9a79a86b Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Wed, 7 Apr 2021 14:53:21 +0300
Subject: [PATCH 123/152] Update version_date.tsv after release 21.2.8.31

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 3e030c14bb0..28350298c82 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -2,6 +2,7 @@ v21.3.5.42-lts	2021-04-07
 v21.3.4.25-lts	2021-03-28
 v21.3.3.14-lts	2021-03-19
 v21.3.2.5-lts	2021-03-12
+v21.2.8.31-stable	2021-04-07
 v21.2.7.11-stable	2021-03-28
 v21.2.6.1-stable	2021-03-15
 v21.2.5.5-stable	2021-03-02

From ee13dbc1e8e1a815ae459b2d99e168c648370438 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Wed, 7 Apr 2021 15:41:04 +0300
Subject: [PATCH 124/152] Update version_date.tsv after release 21.1.8.30

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 28350298c82..06fd86ad7f5 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -9,6 +9,7 @@ v21.2.5.5-stable	2021-03-02
 v21.2.4.6-stable	2021-02-20
 v21.2.3.15-stable	2021-02-14
 v21.2.2.8-stable	2021-02-07
+v21.1.8.30-stable	2021-04-07
 v21.1.7.1-stable	2021-03-15
 v21.1.6.13-stable	2021-03-02
 v21.1.5.4-stable	2021-02-20

From 97b9f0221b783cad1b4a8da52dfa509bb095fb21 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Wed, 7 Apr 2021 16:00:25 +0300
Subject: [PATCH 125/152] delete perft test

---
 .../parse_engine_file_column_oriented.xml     | 28 -------------------
 1 file changed, 28 deletions(-)
 delete mode 100644 tests/performance/parse_engine_file_column_oriented.xml

diff --git a/tests/performance/parse_engine_file_column_oriented.xml b/tests/performance/parse_engine_file_column_oriented.xml
deleted file mode 100644
index d3f7faa0c1a..00000000000
--- a/tests/performance/parse_engine_file_column_oriented.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<test max_ignored_relative_change="0.2">
-
-<preconditions>
-    <table_exists>test.hits</table_exists>
-</preconditions>
-
-<substitutions>
-    <substitution>
-        <name>format</name>
-        <values>
-            <value>Parquet</value>
-            <value>Arrow</value>
-            <value>ORC</value>
-        </values>
-    </substitution>
-</substitutions>
-
-
-<!--This is needed only because Array is not supported with Parquet format-->
-<create_query>CREATE TABLE IF NOT EXISTS table_{format} ( WatchID UInt64,  JavaEnable UInt8,  Title String,  GoodEvent Int16,  EventTime DateTime,  EventDate Date,  CounterID UInt32,  ClientIP UInt32,  ClientIP6 FixedString(16),  RegionID UInt32,  UserID UInt64,  CounterClass Int8,  OS UInt8,  UserAgent UInt8,  URL String,  Referer String,  URLDomain String,  RefererDomain String,  Refresh UInt8,  IsRobot UInt8,  ResolutionWidth UInt16,  ResolutionHeight UInt16,  ResolutionDepth UInt8,  FlashMajor UInt8, FlashMinor UInt8,  FlashMinor2 String,  NetMajor UInt8,  NetMinor UInt8, UserAgentMajor UInt16,  UserAgentMinor FixedString(2),  CookieEnable UInt8, JavascriptEnable UInt8,  IsMobile UInt8,  MobilePhone UInt8,  MobilePhoneModel String,  Params String,  IPNetworkID UInt32,  TraficSourceID Int8, SearchEngineID UInt16,  SearchPhrase String,  AdvEngineID UInt8,  IsArtifical UInt8,  WindowClientWidth UInt16,  WindowClientHeight UInt16,  ClientTimeZone Int16,  ClientEventTime DateTime,  SilverlightVersion1 UInt8, SilverlightVersion2 UInt8,  SilverlightVersion3 UInt32,  SilverlightVersion4 UInt16,  PageCharset String,  CodeVersion UInt32,  IsLink UInt8,  IsDownload UInt8,  IsNotBounce UInt8,  FUniqID UInt64,  HID UInt32,  IsOldCounter UInt8, IsEvent UInt8,  IsParameter UInt8,  DontCountHits UInt8,  WithHash UInt8, HitColor FixedString(1),  UTCEventTime DateTime,  Age UInt8,  Sex UInt8,  Income UInt8,  Interests UInt16,  Robotness UInt8,  RemoteIP UInt32,  RemoteIP6 FixedString(16),  WindowName Int32,  OpenerName Int32,  HistoryLength Int16,  BrowserLanguage FixedString(2),  BrowserCountry FixedString(2),  SocialNetwork String,  SocialAction String,  HTTPError UInt16, SendTiming Int32,  DNSTiming Int32,  ConnectTiming Int32,  ResponseStartTiming Int32,  ResponseEndTiming Int32,  FetchTiming Int32,  RedirectTiming Int32, DOMInteractiveTiming Int32,  DOMContentLoadedTiming Int32,  DOMCompleteTiming Int32,  LoadEventStartTiming Int32,  LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32,  FirstPaintTiming Int32,  RedirectCount Int8, SocialSourceNetworkID UInt8,  SocialSourcePage String,  ParamPrice Int64, ParamOrderID String,  ParamCurrency FixedString(3),  ParamCurrencyID UInt16,  OpenstatServiceName String,  OpenstatCampaignID String,  OpenstatAdID String,  OpenstatSourceID String,  UTMSource String, UTMMedium String,  UTMCampaign String,  UTMContent String,  UTMTerm String, FromTag String,  HasGCLID UInt8,  RefererHash UInt64,  URLHash UInt64,  CLID UInt32,  YCLID UInt64,  ShareService String,  ShareURL String,  ShareTitle String,  IslandID FixedString(16),  RequestNum UInt32,  RequestTry UInt8) ENGINE = File({format})</create_query>
-
-<fill_query>INSERT INTO table_{format} SELECT WatchID,  JavaEnable,  Title,  GoodEvent,  EventTime,  EventDate,  CounterID,  ClientIP,  ClientIP6,  RegionID,  UserID,  CounterClass,  OS,  UserAgent,  URL,  Referer,  URLDomain,  RefererDomain,  Refresh,  IsRobot,  ResolutionWidth,  ResolutionHeight,  ResolutionDepth,  FlashMajor, FlashMinor,  FlashMinor2,  NetMajor,  NetMinor, UserAgentMajor,  UserAgentMinor,  CookieEnable, JavascriptEnable,  IsMobile,  MobilePhone,  MobilePhoneModel,  Params,  IPNetworkID,  TraficSourceID, SearchEngineID,  SearchPhrase,  AdvEngineID,  IsArtifical,  WindowClientWidth,  WindowClientHeight,  ClientTimeZone,  ClientEventTime,  SilverlightVersion1, SilverlightVersion2,  SilverlightVersion3,  SilverlightVersion4,  PageCharset,  CodeVersion,  IsLink,  IsDownload,  IsNotBounce,  FUniqID,  HID,  IsOldCounter, IsEvent,  IsParameter,  DontCountHits,  WithHash, HitColor,  UTCEventTime,  Age,  Sex,  Income,  Interests,  Robotness, RemoteIP,  RemoteIP6,  WindowName,  OpenerName,  HistoryLength,  BrowserLanguage,  BrowserCountry,  SocialNetwork,  SocialAction,  HTTPError, SendTiming,  DNSTiming,  ConnectTiming,  ResponseStartTiming,  ResponseEndTiming,  FetchTiming,  RedirectTiming, DOMInteractiveTiming,  DOMContentLoadedTiming,  DOMCompleteTiming,  LoadEventStartTiming,  LoadEventEndTiming, NSToDOMContentLoadedTiming,  FirstPaintTiming,  RedirectCount, SocialSourceNetworkID,  SocialSourcePage,  ParamPrice, ParamOrderID,  ParamCurrency,  ParamCurrencyID,  OpenstatServiceName,  OpenstatCampaignID,  OpenstatAdID,  OpenstatSourceID,  UTMSource, UTMMedium,  UTMCampaign,  UTMContent,  UTMTerm, FromTag,  HasGCLID,  RefererHash,  URLHash,  CLID,  YCLID,  ShareService,  ShareURL,  ShareTitle,  IslandID,  RequestNum,  RequestTry FROM test.hits LIMIT 100000</fill_query>
-
-<query short='1'>SELECT ClientEventTime, MobilePhoneModel, ClientIP6, Sex, SearchEngineID, SocialNetwork FROM table_{format} FORMAT Null</query>
-
-<drop_query>DROP TABLE IF EXISTS table_{format}</drop_query>
-
-</test>

From fecd5f34350614f452c3c99c6e2dad20bd1979b2 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 7 Apr 2021 16:16:19 +0300
Subject: [PATCH 126/152] more debug messages

---
 src/Interpreters/SystemLog.h | 60 +++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index aa3dc113e44..e1d7e4d2a79 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -184,12 +184,13 @@ private:
     // synchronous log flushing for SYSTEM FLUSH LOGS.
     uint64_t queue_front_index = 0;
     bool is_shutdown = false;
+    // A flag that says we must create the tables even if the queue is empty.
     bool is_force_prepare_tables = false;
     std::condition_variable flush_event;
     // Requested to flush logs up to this index, exclusive
-    uint64_t requested_flush_before = 0;
+    uint64_t requested_flush_up_to = 0;
     // Flushed log up to this index, exclusive
-    uint64_t flushed_before = 0;
+    uint64_t flushed_up_to = 0;
     // Logged overflow message at this queue front index
     uint64_t logged_queue_full_at_index = -1;
 
@@ -267,8 +268,8 @@ void SystemLog<LogElement>::add(const LogElement & element)
             // It is enough to only wake the flushing thread once, after the message
             // count increases past half available size.
             const uint64_t queue_end = queue_front_index + queue.size();
-            if (requested_flush_before < queue_end)
-                requested_flush_before = queue_end;
+            if (requested_flush_up_to < queue_end)
+                requested_flush_up_to = queue_end;
 
             flush_event.notify_all();
         }
@@ -304,24 +305,34 @@ void SystemLog<LogElement>::add(const LogElement & element)
 template <typename LogElement>
 void SystemLog<LogElement>::flush(bool force)
 {
-    std::unique_lock lock(mutex);
+    uint64_t this_thread_requested_offset;
 
-    if (is_shutdown)
-        return;
-
-    const uint64_t queue_end = queue_front_index + queue.size();
-
-    is_force_prepare_tables = force;
-    if (requested_flush_before < queue_end || force)
     {
-        requested_flush_before = queue_end;
+        std::unique_lock lock(mutex);
+
+        if (is_shutdown)
+            return;
+
+        this_thread_requested_offset = queue_front_index + queue.size();
+
+        // Publish our flush request, taking care not to overwrite the requests
+        // made by other threads.
+        is_force_prepare_tables |= force;
+        requested_flush_up_to = std::max(requested_flush_up_to,
+            this_thread_requested_offset);
+
         flush_event.notify_all();
     }
 
+    LOG_DEBUG(log, "Requested flush up to offset {}",
+        this_thread_requested_offset);
+
     // Use an arbitrary timeout to avoid endless waiting.
     const int timeout_seconds = 60;
+    std::unique_lock lock(mutex);
     bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds),
-        [&] { return flushed_before >= queue_end && !is_force_prepare_tables; });
+        [&] { return flushed_up_to >= this_thread_requested_offset
+                && !is_force_prepare_tables; });
 
     if (!result)
     {
@@ -371,6 +382,8 @@ void SystemLog<LogElement>::savingThreadFunction()
             // The end index (exclusive, like std end()) of the messages we are
             // going to flush.
             uint64_t to_flush_end = 0;
+            // Should we prepare table even if there are no new messages.
+            bool should_prepare_tables_anyway = false;
 
             {
                 std::unique_lock lock(mutex);
@@ -378,7 +391,7 @@ void SystemLog<LogElement>::savingThreadFunction()
                     std::chrono::milliseconds(flush_interval_milliseconds),
                     [&] ()
                     {
-                        return requested_flush_before > flushed_before || is_shutdown || is_force_prepare_tables;
+                        return requested_flush_up_to > flushed_up_to || is_shutdown || is_force_prepare_tables;
                     }
                 );
 
@@ -389,18 +402,14 @@ void SystemLog<LogElement>::savingThreadFunction()
                 to_flush.resize(0);
                 queue.swap(to_flush);
 
+                should_prepare_tables_anyway = is_force_prepare_tables;
+
                 exit_this_thread = is_shutdown;
             }
 
             if (to_flush.empty())
             {
-                bool force;
-                {
-                    std::lock_guard lock(mutex);
-                    force = is_force_prepare_tables;
-                }
-
-                if (force)
+                if (should_prepare_tables_anyway)
                 {
                     prepareTable();
                     LOG_TRACE(log, "Table created (force)");
@@ -429,7 +438,8 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
 {
     try
     {
-        LOG_TRACE(log, "Flushing system log, {} entries to flush", to_flush.size());
+        LOG_TRACE(log, "Flushing system log, {} entries to flush up to offset {}",
+            to_flush.size(), to_flush_end);
 
         /// We check for existence of the table and create it as needed at every
         /// flush. This is done to allow user to drop the table at any moment
@@ -468,12 +478,12 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
 
     {
         std::lock_guard lock(mutex);
-        flushed_before = to_flush_end;
+        flushed_up_to = to_flush_end;
         is_force_prepare_tables = false;
         flush_event.notify_all();
     }
 
-    LOG_TRACE(log, "Flushed system log");
+    LOG_TRACE(log, "Flushed system log up to offset {}", to_flush_end);
 }
 
 

From aa47eb7d5e6b15a7963eae32bf39f316f064327d Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Wed, 7 Apr 2021 16:22:38 +0300
Subject: [PATCH 127/152] Fix checkpoint position after putting it on buffer
 end (#22518)

---
 src/IO/PeekableReadBuffer.cpp   | 2 +-
 src/Server/HTTP/ReadHeaders.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/IO/PeekableReadBuffer.cpp b/src/IO/PeekableReadBuffer.cpp
index 15fdd9448ec..c7cef777afc 100644
--- a/src/IO/PeekableReadBuffer.cpp
+++ b/src/IO/PeekableReadBuffer.cpp
@@ -175,7 +175,7 @@ bool PeekableReadBuffer::nextImpl()
 
     if (checkpoint_at_end)
     {
-        checkpoint.emplace(working_buffer.begin());
+        checkpoint.emplace(position());
         peeked_size = 0;
         checkpoint_in_own_memory = false;
     }
diff --git a/src/Server/HTTP/ReadHeaders.cpp b/src/Server/HTTP/ReadHeaders.cpp
index 77ec48c11b1..2fc2de8321a 100644
--- a/src/Server/HTTP/ReadHeaders.cpp
+++ b/src/Server/HTTP/ReadHeaders.cpp
@@ -51,7 +51,7 @@ void readHeaders(
             if (name.size() > max_name_length)
                 throw Poco::Net::MessageException("Field name is too long");
             if (ch != ':')
-                throw Poco::Net::MessageException("Field name is invalid or no colon found");
+                throw Poco::Net::MessageException(fmt::format("Field name is invalid or no colon found: \"{}\"", name));
         }
 
         in.ignore();

From 2987bbc9487f4d57fb75213480cd7aae5eb39133 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 7 Apr 2021 16:52:11 +0300
Subject: [PATCH 128/152] Small improvements

---
 programs/server/Server.cpp                    |   2 +-
 src/Interpreters/Context.cpp                  |  36 +------
 src/Interpreters/Context.h                    |   8 +-
 src/Interpreters/InterserverCredentials.cpp   |  67 +++++++-----
 src/Interpreters/InterserverCredentials.h     | 102 +++++++-----------
 src/Server/InterserverIOHTTPHandler.cpp       |   4 +-
 src/Server/InterserverIOHTTPHandler.h         |   2 -
 src/Storages/StorageReplicatedMergeTree.cpp   |  20 ++--
 .../test_replication_credentials/test.py      |  35 +++++-
 9 files changed, 132 insertions(+), 144 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 14af0ab1e29..61e440bcfe1 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -689,7 +689,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
         }
     }
 
-    LOG_DEBUG(log, "Initiaializing InterserverCredentials.");
+    LOG_DEBUG(log, "Initiailizing interserver credentials.");
     global_context->updateInterserverCredentials(config());
 
     if (config().has("macros"))
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 82802b5d9fb..a914b62e99b 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -324,8 +324,7 @@ struct ContextShared
     String interserver_io_host;                             /// The host name by which this server is available for other servers.
     UInt16 interserver_io_port = 0;                         /// and port.
     String interserver_scheme;                              /// http or https
-    mutable std::mutex interserver_io_credentials_mutex;
-    std::shared_ptr<BaseInterserverCredentials> interserver_io_credentials;
+    MultiVersion<InterserverCredentials> interserver_io_credentials;
 
     String path;                                            /// Path to the data directory, with a slash at the end.
     String flags_path;                                      /// Path to the directory with some control flags for server maintenance.
@@ -1735,40 +1734,15 @@ bool Context::hasAuxiliaryZooKeeper(const String & name) const
     return getConfigRef().has("auxiliary_zookeepers." + name);
 }
 
-std::shared_ptr<BaseInterserverCredentials> Context::getInterserverCredential()
+InterserverCredentialsPtr Context::getInterserverCredentials()
 {
-    std::lock_guard lock(shared->interserver_io_credentials_mutex);
-    return shared->interserver_io_credentials;
-}
-
-void Context::setInterserverCredentials(std::shared_ptr<BaseInterserverCredentials> credentials)
-{
-    std::lock_guard lock(shared->interserver_io_credentials_mutex);
-    shared->interserver_io_credentials = credentials;
+    return shared->interserver_io_credentials.get();
 }
 
 void Context::updateInterserverCredentials(const Poco::Util::AbstractConfiguration & config)
 {
-    std::shared_ptr<BaseInterserverCredentials> interserver_credentials = nullptr;
-
-    if (config.has("interserver_http_credentials"))
-    {
-        interserver_credentials = ConfigInterserverCredentials::make(config, "interserver_http_credentials");
-    }
-    else
-    {
-        interserver_credentials = NullInterserverCredentials::make();
-    }
-
-    global_context->setInterserverCredentials(interserver_credentials);
-}
-
-std::pair<String, String> Context::getInterserverCredentials() const
-{
-    std::lock_guard lock(shared->interserver_io_credentials_mutex);
-    auto & credentials = shared->interserver_io_credentials;
-
-    return { credentials->getUser(), credentials->getPassword() };
+    auto credentials = InterserverCredentials::make(config, "interserver_http_credentials");
+    shared->interserver_io_credentials.set(std::move(credentials));
 }
 
 void Context::setInterserverIOAddress(const String & host, UInt16 port)
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index ce0e72082a4..8cc3b2f22d2 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -61,7 +61,8 @@ class AccessRightsElements;
 class EmbeddedDictionaries;
 class ExternalDictionariesLoader;
 class ExternalModelsLoader;
-class BaseInterserverCredentials;
+class InterserverCredentials;
+using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>;
 class InterserverIOHandler;
 class BackgroundSchedulePool;
 class MergeList;
@@ -524,8 +525,7 @@ public:
 
     /// Credentials which server will use to communicate with others
     void updateInterserverCredentials(const Poco::Util::AbstractConfiguration & config);
-    std::shared_ptr<BaseInterserverCredentials> getInterserverCredential();
-    std::pair<String, String> getInterserverCredentials() const;
+    InterserverCredentialsPtr getInterserverCredentials();
 
     /// Interserver requests scheme (http or https)
     void setInterserverScheme(const String & scheme);
@@ -793,8 +793,6 @@ private:
 
     /// If the password is not set, the password will not be checked
     void setUserImpl(const String & name, const std::optional<String> & password, const Poco::Net::SocketAddress & address);
-
-    void setInterserverCredentials(std::shared_ptr<BaseInterserverCredentials> credentials);
 };
 
 
diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp
index f1c4b1923c6..bab47c32d8c 100644
--- a/src/Interpreters/InterserverCredentials.cpp
+++ b/src/Interpreters/InterserverCredentials.cpp
@@ -1,59 +1,76 @@
 #include <Interpreters/InterserverCredentials.h>
 #include <common/logger_useful.h>
+#include <Common/StringUtils/StringUtils.h>
 
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int NO_ELEMENTS_IN_CONFIG;
 }
 
-std::shared_ptr<ConfigInterserverCredentials>
-ConfigInterserverCredentials::make(const Poco::Util::AbstractConfiguration & config, const std::string root_tag)
+std::unique_ptr<InterserverCredentials>
+InterserverCredentials::make(const Poco::Util::AbstractConfiguration & config, const std::string & root_tag)
 {
-    const auto user = config.getString(root_tag + ".user", "");
-    const auto password = config.getString(root_tag + ".password", "");
+    if (config.has("user") && !config.has("password"))
+        throw Exception("Configuration parameter interserver_http_credentials.password can't be empty", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
 
-    if (user.empty())
-        throw Exception("Configuration parameter interserver_http_credentials user can't be empty", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
+    if (!config.has("user") && config.has("password"))
+        throw Exception("Configuration parameter interserver_http_credentials.user can't be empty if user specified", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
 
-    auto store = makeCredentialStore(user, password, config, root_tag);
+    /// They both can be empty
+    auto user = config.getString(root_tag + ".user", "");
+    auto password = config.getString(root_tag + ".password", "");
 
-    return std::make_shared<ConfigInterserverCredentials>(user, password, store);
+    auto store = parseCredentialsFromConfig(user, password, config, root_tag);
+
+    return std::make_unique<InterserverCredentials>(user, password, store);
 }
 
-ConfigInterserverCredentials::Store ConfigInterserverCredentials::makeCredentialStore(
-    const std::string current_user_,
-    const std::string current_password_,
+InterserverCredentials::CurrentCredentials InterserverCredentials::parseCredentialsFromConfig(
+    const std::string & current_user_,
+    const std::string & current_password_,
     const Poco::Util::AbstractConfiguration & config,
-    const std::string root_tag)
+    const std::string & root_tag)
 {
-    Store store;
-    store.insert({{current_user_, current_password_}, true});
-    if (config.has(root_tag + ".allow_empty") && config.getBool(root_tag + ".allow_empty"))
+    auto * log = &Poco::Logger::get("InterserverCredentials");
+    CurrentCredentials store;
+    store.emplace_back(current_user_, current_password_);
+    if (config.getBool(root_tag + ".allow_empty", false))
     {
+        LOG_DEBUG(log, "Allowing empty credentials");
         /// Allow empty credential to support migrating from no auth
-        store.insert({{"", ""}, true});
+        store.emplace_back("", "");
     }
 
+    Poco::Util::AbstractConfiguration::Keys old_users;
+    config.keys(root_tag, old_users);
 
-    Poco::Util::AbstractConfiguration::Keys users;
-    config.keys(root_tag + ".users", users);
-    for (const auto & user : users)
+    for (const auto & user_key : old_users)
     {
-        LOG_DEBUG(&Poco::Logger::get("InterserverCredentials"), "Adding credential for {}", user);
-        const auto password = config.getString(root_tag + ".users." + user);
-        store.insert({{user, password}, true});
+        if (startsWith(user_key, "old"))
+        {
+            std::string full_prefix = root_tag + "." + user_key;
+            std::string old_user_name = config.getString(full_prefix + ".user");
+            LOG_DEBUG(log, "Adding credentials for old user {}", old_user_name);
+
+            std::string old_user_password =  config.getString(full_prefix + ".password");
+
+            store.emplace_back(old_user_name, old_user_password);
+        }
     }
 
     return store;
 }
 
-std::pair<String, bool> ConfigInterserverCredentials::isValidUser(const std::pair<std::string, std::string> credentials)
+InterserverCredentials::CheckResult InterserverCredentials::isValidUser(const UserWithPassword & credentials) const
 {
-    const auto & valid = store.find(credentials);
-    if (valid == store.end())
+    auto itr = std::find(all_users_store.begin(), all_users_store.end(), credentials);
+
+    if (itr == all_users_store.end())
         return {"Incorrect user or password in HTTP basic authentication: " + credentials.first, false};
+
     return {"", true};
 }
 
diff --git a/src/Interpreters/InterserverCredentials.h b/src/Interpreters/InterserverCredentials.h
index e51ea12df25..6a7cec532e0 100644
--- a/src/Interpreters/InterserverCredentials.h
+++ b/src/Interpreters/InterserverCredentials.h
@@ -3,92 +3,68 @@
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/Exception.h>
 #include <common/logger_useful.h>
+#include <unordered_set>
 
 namespace DB
 {
-/// InterserverCredentials holds credentials for server (store) and client
-/// credentials (current_*). The container is constructed through `make` and a
-/// shared_ptr is captured inside Context.
-class BaseInterserverCredentials
-{
-public:
-    BaseInterserverCredentials(std::string current_user_, std::string current_password_)
-        : current_user(current_user_), current_password(current_password_)
-            { }
 
-    virtual ~BaseInterserverCredentials() { }
-
-    /// isValidUser returns true or throws WRONG_PASSWORD
-    virtual std::pair<String, bool> isValidUser(const std::pair<std::string, std::string> credentials) = 0;
-
-    std::string getUser() { return current_user; }
-
-    std::string getPassword() { return current_password; }
-
-
-protected:
-    std::string current_user;
-    std::string current_password;
-};
-
-
-/// NullInterserverCredentials are used when authentication is not configured
-class NullInterserverCredentials : public BaseInterserverCredentials
-{
-public:
-    NullInterserverCredentials(const NullInterserverCredentials &) = delete;
-    NullInterserverCredentials() : BaseInterserverCredentials("", "") { }
-
-    ~NullInterserverCredentials() override { }
-
-    static std::shared_ptr<NullInterserverCredentials> make() { return std::make_shared<NullInterserverCredentials>(); }
-
-    std::pair<String, bool> isValidUser(const std::pair<std::string, std::string> credentials) override
-    {
-        std::ignore = credentials;
-        return {"", true};
-    }
-};
-
-
-/// ConfigInterserverCredentials implements authentication using a Store, which
+/// InterserverCredentials implements authentication using a CurrentCredentials, which
 /// is configured, e.g.
 ///    <interserver_http_credentials>
 ///        <user>admin</user>
 ///        <password>222</password>
 ///        <!-- To support mix of un/authenticated clients -->
 ///        <!-- <allow_empty>true</allow_empty> -->
-///        <users>
+///        <old>
 ///            <!-- Allow authentication using previous passwords during rotation -->
-///            <admin>111</admin>
-///        </users>
+///            <user>admin</user>
+///            <password>qqq</password>
+///        </old>
+///        <old>
+///            <!-- Allow authentication using previous users during rotation -->
+///            <user>johny</user>
+///            <password>333</password>
+///        </old>
 ///    </interserver_http_credentials>
-class ConfigInterserverCredentials : public BaseInterserverCredentials
+class InterserverCredentials
 {
 public:
-    using Store = std::map<std::pair<std::string, std::string>, bool>;
+    using UserWithPassword = std::pair<std::string, std::string>;
+    using CheckResult = std::pair<std::string, bool>;
+    using CurrentCredentials = std::vector<UserWithPassword>;
 
-    ConfigInterserverCredentials(const ConfigInterserverCredentials &) = delete;
+    InterserverCredentials(const InterserverCredentials &) = delete;
 
-    static std::shared_ptr<ConfigInterserverCredentials> make(const Poco::Util::AbstractConfiguration & config, const std::string root_tag);
+    static std::unique_ptr<InterserverCredentials> make(const Poco::Util::AbstractConfiguration & config, const std::string & root_tag);
 
-    ~ConfigInterserverCredentials() override { }
+    InterserverCredentials(const std::string & current_user_, const std::string & current_password_, const CurrentCredentials & all_users_store_)
+        : current_user(current_user_)
+        , current_password(current_password_)
+        , all_users_store(all_users_store_)
+    {}
 
-    ConfigInterserverCredentials(const std::string current_user_, const std::string current_password_, const Store & store_)
-        : BaseInterserverCredentials(current_user_, current_password_), store(std::move(store_))
-    {
-    }
+    CheckResult isValidUser(const UserWithPassword & credentials) const;
+
+    std::string getUser() const { return current_user; }
+
+    std::string getPassword() const { return current_password; }
 
-    std::pair<String, bool> isValidUser(const std::pair<std::string, std::string> credentials) override;
 
 private:
-    Store store;
+    std::string current_user;
+    std::string current_password;
 
-    static Store makeCredentialStore(
-        const std::string current_user_,
-        const std::string current_password_,
+    /// In common situation this store contains one record
+    CurrentCredentials all_users_store;
+
+
+    static CurrentCredentials parseCredentialsFromConfig(
+        const std::string & current_user_,
+        const std::string & current_password_,
         const Poco::Util::AbstractConfiguration & config,
-        const std::string root_tag);
+        const std::string & root_tag);
 };
 
+using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>;
+
 }
diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index a62a2db0a10..72e06860839 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -26,11 +26,11 @@ namespace ErrorCodes
 
 std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(HTTPServerRequest & request) const
 {
-    auto server_credentials = server.context().getInterserverCredential();
+    auto server_credentials = server.context().getInterserverCredentials();
     if (server_credentials)
     {
         if (!request.hasCredentials())
-            return server_credentials->isValidUser(std::make_pair(default_user, default_password));
+            return server_credentials->isValidUser(std::make_pair("", ""));
 
         String scheme, info;
         request.getCredentials(scheme, info);
diff --git a/src/Server/InterserverIOHTTPHandler.h b/src/Server/InterserverIOHTTPHandler.h
index b85343f2abf..c0d776115e1 100644
--- a/src/Server/InterserverIOHTTPHandler.h
+++ b/src/Server/InterserverIOHTTPHandler.h
@@ -46,8 +46,6 @@ private:
     void processQuery(HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output);
 
     std::pair<String, bool> checkAuthentication(HTTPServerRequest & request) const;
-    const std::string default_user;
-    const std::string default_password;
 };
 
 }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 73f0a7907e5..f75070c247d 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -52,6 +52,7 @@
 #include <Interpreters/PartLog.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/DDLTask.h>
+#include <Interpreters/InterserverCredentials.h>
 
 #include <DataStreams/RemoteBlockInputStream.h>
 #include <DataStreams/copyData.h>
@@ -2401,7 +2402,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
             ReplicatedMergeTreeAddress address(getZooKeeper()->get(source_replica_path + "/host"));
             auto timeouts = getFetchPartHTTPTimeouts(global_context);
 
-            auto [user, password] = global_context.getInterserverCredentials();
+            auto credentials = global_context.getInterserverCredentials();
             String interserver_scheme = global_context.getInterserverScheme();
 
             if (interserver_scheme != address.scheme)
@@ -2409,7 +2410,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
 
             part_desc->res_part = fetcher.fetchPart(
                 metadata_snapshot, part_desc->found_new_part_name, source_replica_path,
-                address.host, address.replication_port, timeouts, user, password, interserver_scheme, false, TMP_PREFIX + "fetch_");
+                address.host, address.replication_port, timeouts, credentials->getUser(), credentials->getPassword(), interserver_scheme, false, TMP_PREFIX + "fetch_");
 
             /// TODO: check columns_version of fetched part
 
@@ -3755,7 +3756,6 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora
 
     ReplicatedMergeTreeAddress address;
     ConnectionTimeouts timeouts;
-    std::pair<String, String> user_password;
     String interserver_scheme;
     std::optional<CurrentlySubmergingEmergingTagger> tagger_ptr;
     std::function<MutableDataPartPtr()> get_part;
@@ -3772,10 +3772,10 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora
         address.fromString(zookeeper->get(source_replica_path + "/host"));
         timeouts = getFetchPartHTTPTimeouts(global_context);
 
-        user_password = global_context.getInterserverCredentials();
+        auto credentials = global_context.getInterserverCredentials();
         interserver_scheme = global_context.getInterserverScheme();
 
-        get_part = [&, address, timeouts, user_password, interserver_scheme]()
+        get_part = [&, address, timeouts, credentials, interserver_scheme]()
         {
             if (interserver_scheme != address.scheme)
                 throw Exception("Interserver schemes are different: '" + interserver_scheme
@@ -3789,8 +3789,8 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora
                 address.host,
                 address.replication_port,
                 timeouts,
-                user_password.first,
-                user_password.second,
+                credentials->getUser(),
+                credentials->getPassword(),
                 interserver_scheme,
                 to_detached,
                 "",
@@ -3928,10 +3928,10 @@ bool StorageReplicatedMergeTree::fetchExistsPart(const String & part_name, const
 
     ReplicatedMergeTreeAddress address(zookeeper->get(source_replica_path + "/host"));
     auto timeouts = ConnectionTimeouts::getHTTPTimeouts(global_context);
-    auto user_password = global_context.getInterserverCredentials();
+    auto credentials = global_context.getInterserverCredentials();
     String interserver_scheme = global_context.getInterserverScheme();
 
-    get_part = [&, address, timeouts, user_password, interserver_scheme]()
+    get_part = [&, address, timeouts, interserver_scheme, credentials]()
     {
         if (interserver_scheme != address.scheme)
             throw Exception("Interserver schemes are different: '" + interserver_scheme
@@ -3941,7 +3941,7 @@ bool StorageReplicatedMergeTree::fetchExistsPart(const String & part_name, const
         return fetcher.fetchPart(
             metadata_snapshot, part_name, source_replica_path,
             address.host, address.replication_port,
-            timeouts, user_password.first, user_password.second, interserver_scheme, false, "", nullptr, true,
+            timeouts, credentials->getUser(), credentials->getPassword(), interserver_scheme, false, "", nullptr, true,
             replaced_disk);
     };
 
diff --git a/tests/integration/test_replication_credentials/test.py b/tests/integration/test_replication_credentials/test.py
index 82c1bcad7b0..9181c515adf 100644
--- a/tests/integration/test_replication_credentials/test.py
+++ b/tests/integration/test_replication_credentials/test.py
@@ -113,6 +113,32 @@ def test_different_credentials(different_credentials_cluster):
     assert node5.query("SELECT id FROM test_table order by id") == '111\n'
     assert node6.query("SELECT id FROM test_table order by id") == '222\n'
 
+    add_old = """
+    <yandex>
+        <interserver_http_port>9009</interserver_http_port>
+        <interserver_http_credentials>
+            <user>admin</user>
+            <password>222</password>
+            <old>
+                <user>root</user>
+                <password>111</password>
+            </old>
+            <old>
+                <user>aaa</user>
+                <password>333</password>
+            </old>
+        </interserver_http_credentials>
+    </yandex>
+    """
+
+    node5.replace_config("/etc/clickhouse-server/config.d/credentials1.xml", add_old)
+
+    node5.query("SYSTEM RELOAD CONFIG")
+    node5.query("INSERT INTO test_table values('2017-06-21', 333, 1)")
+    node6.query("SYSTEM SYNC REPLICA test_table", timeout=10)
+
+    assert node6.query("SELECT id FROM test_table order by id") == '111\n222\n333\n'
+
 
 node7 = cluster.add_instance('node7', main_configs=['configs/remote_servers.xml', 'configs/credentials1.xml'],
                              with_zookeeper=True)
@@ -134,7 +160,6 @@ def credentials_and_no_credentials_cluster():
 
 
 def test_credentials_and_no_credentials(credentials_and_no_credentials_cluster):
-    # Initial state: node7 requires auth; node8 open
     node7.query("insert into test_table values ('2017-06-21', 111, 0)")
     time.sleep(1)
 
@@ -144,7 +169,7 @@ def test_credentials_and_no_credentials(credentials_and_no_credentials_cluster):
     node8.query("insert into test_table values ('2017-06-22', 222, 1)")
     time.sleep(1)
 
-    assert node7.query("SELECT id FROM test_table order by id") == '111\n222\n'
+    assert node7.query("SELECT id FROM test_table order by id") == '111\n'
     assert node8.query("SELECT id FROM test_table order by id") == '222\n'
 
     allow_empty = """
@@ -161,8 +186,8 @@ def test_credentials_and_no_credentials(credentials_and_no_credentials_cluster):
     # change state: Flip node7 to mixed auth/non-auth (allow node8)
     node7.replace_config("/etc/clickhouse-server/config.d/credentials1.xml",
                          allow_empty)
+
+    node7.query("SYSTEM RELOAD CONFIG")
     node7.query("insert into test_table values ('2017-06-22', 333, 1)")
-    node8.query("DETACH TABLE test_table")
-    node8.query("ATTACH TABLE test_table")
-    time.sleep(3)
+    node8.query("SYSTEM SYNC REPLICA test_table", timeout=10)
     assert node8.query("SELECT id FROM test_table order by id") == '111\n222\n333\n'

From 2a6560b1c6bdb0288107bd42e4063c08c3a22b53 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 7 Apr 2021 16:59:18 +0300
Subject: [PATCH 129/152] Better interface

---
 src/Interpreters/InterserverCredentials.cpp | 5 +++++
 src/Interpreters/InterserverCredentials.h   | 2 +-
 src/Server/InterserverIOHTTPHandler.cpp     | 4 ++--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp
index bab47c32d8c..e241a06b5a1 100644
--- a/src/Interpreters/InterserverCredentials.cpp
+++ b/src/Interpreters/InterserverCredentials.cpp
@@ -74,4 +74,9 @@ InterserverCredentials::CheckResult InterserverCredentials::isValidUser(const Us
     return {"", true};
 }
 
+InterserverCredentials::CheckResult InterserverCredentials::isValidUser(const std::string & user, const std::string & password) const
+{
+    return isValidUser(std::make_pair(user, password));
+}
+
 }
diff --git a/src/Interpreters/InterserverCredentials.h b/src/Interpreters/InterserverCredentials.h
index 6a7cec532e0..ffe568868dd 100644
--- a/src/Interpreters/InterserverCredentials.h
+++ b/src/Interpreters/InterserverCredentials.h
@@ -44,6 +44,7 @@ public:
     {}
 
     CheckResult isValidUser(const UserWithPassword & credentials) const;
+    CheckResult isValidUser(const std::string & user, const std::string & password) const;
 
     std::string getUser() const { return current_user; }
 
@@ -57,7 +58,6 @@ private:
     /// In common situation this store contains one record
     CurrentCredentials all_users_store;
 
-
     static CurrentCredentials parseCredentialsFromConfig(
         const std::string & current_user_,
         const std::string & current_password_,
diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index 72e06860839..fac8ff1d36c 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -30,7 +30,7 @@ std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(HTTPServer
     if (server_credentials)
     {
         if (!request.hasCredentials())
-            return server_credentials->isValidUser(std::make_pair("", ""));
+            return server_credentials->isValidUser("", "");
 
         String scheme, info;
         request.getCredentials(scheme, info);
@@ -39,7 +39,7 @@ std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(HTTPServer
             return {"Server requires HTTP Basic authentication but client provides another method", false};
 
         Poco::Net::HTTPBasicCredentials credentials(info);
-        return server_credentials->isValidUser(std::make_pair(credentials.getUsername(), credentials.getPassword()));
+        return server_credentials->isValidUser(credentials.getUsername(), credentials.getPassword());
     }
     else if (request.hasCredentials())
     {

From 98871f8c5a67438014af64e5f9b537eade4f19c2 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 7 Apr 2021 17:05:40 +0300
Subject: [PATCH 130/152] More clear messages

---
 src/Interpreters/InterserverCredentials.cpp |  5 +++++
 src/Server/InterserverIOHTTPHandler.cpp     | 10 ----------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp
index e241a06b5a1..e60d397eb02 100644
--- a/src/Interpreters/InterserverCredentials.cpp
+++ b/src/Interpreters/InterserverCredentials.cpp
@@ -69,7 +69,12 @@ InterserverCredentials::CheckResult InterserverCredentials::isValidUser(const Us
     auto itr = std::find(all_users_store.begin(), all_users_store.end(), credentials);
 
     if (itr == all_users_store.end())
+    {
+        if (credentials.first.empty())
+            return {"Server requires HTTP Basic authentication, but client doesn't provide it", false};
+
         return {"Incorrect user or password in HTTP basic authentication: " + credentials.first, false};
+    }
 
     return {"", true};
 }
diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index fac8ff1d36c..98dfae7d0cb 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -21,7 +21,6 @@ namespace ErrorCodes
 {
     extern const int ABORTED;
     extern const int TOO_MANY_SIMULTANEOUS_QUERIES;
-    extern const int WRONG_PASSWORD;
 }
 
 std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(HTTPServerRequest & request) const
@@ -127,15 +126,6 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
     }
     catch (Exception & e)
     {
-        if (e.code() == ErrorCodes::WRONG_PASSWORD)
-        {
-            response.setStatusAndReason(HTTPServerResponse::HTTP_UNAUTHORIZED);
-            if (!response.sent())
-                writeString("Unauthorized.", *used_output.out);
-            LOG_WARNING(log, "Query processing failed request: '{}' authentication failed", request.getURI());
-            return;
-        }
-
         if (e.code() == ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES)
             return;
 

From a6d5c4329e3421108f02b9839cbf1a3776ec8883 Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Wed, 7 Apr 2021 17:12:23 +0300
Subject: [PATCH 131/152] Support 'pr-must-backport' label

---
 utils/github/backport.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/github/backport.py b/utils/github/backport.py
index 7fddbbee241..d2687b893a4 100644
--- a/utils/github/backport.py
+++ b/utils/github/backport.py
@@ -76,7 +76,7 @@ class Backport:
 
             # First pass. Find all must-backports
             for label in pr['labels']['nodes']:
-                if label['name'] == 'pr-bugfix':
+                if label['name'] == 'pr-bugfix' or label['name'] == 'pr-must-backport':
                     backport_map[pr['number']] = branch_set.copy()
                     continue
                 matched = RE_MUST_BACKPORT.match(label['name'])

From c19482f4d2a06ec74db0b19f462ec19d5e0ec961 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 7 Apr 2021 17:17:21 +0300
Subject: [PATCH 132/152] Update contrib.md

---
 docs/en/development/contrib.md | 40 ++++++----------------------------
 1 file changed, 7 insertions(+), 33 deletions(-)

diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md
index 76a2f647231..cefa726f287 100644
--- a/docs/en/development/contrib.md
+++ b/docs/en/development/contrib.md
@@ -5,36 +5,10 @@ toc_title: Third-Party Libraries Used
 
 # Third-Party Libraries Used {#third-party-libraries-used}
 
-| Library             | License                                                                                                                                      |
-|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------|
-| base64              | [BSD 2-Clause License](https://github.com/aklomp/base64/blob/a27c565d1b6c676beaf297fe503c4518185666f7/LICENSE)                               |
-| boost               | [Boost Software License 1.0](https://github.com/ClickHouse-Extras/boost-extra/blob/6883b40449f378019aec792f9983ce3afc7ff16e/LICENSE_1_0.txt) |
-| brotli              | [MIT](https://github.com/google/brotli/blob/master/LICENSE)                                                                                  |
-| capnproto           | [MIT](https://github.com/capnproto/capnproto/blob/master/LICENSE)                                                                            |
-| cctz                | [Apache License 2.0](https://github.com/google/cctz/blob/4f9776a310f4952454636363def82c2bf6641d5f/LICENSE.txt)                               |
-| double-conversion   | [BSD 3-Clause License](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE)                    |
-| FastMemcpy          | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE)                                                      |
-| googletest          | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE)                                                             |
-| h3                  | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE)                                                                         |
-| hyperscan           | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE)                                                               |
-| libcxxabi           | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT)                          |
-| libdivide           | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt)                                           |
-| libgsasl            | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE)                             |
-| libhdfs3            | [Apache License 2.0](https://github.com/ClickHouse-Extras/libhdfs3/blob/bd6505cbb0c130b0db695305b9a38546fa880e5a/LICENSE.txt)                |
-| libmetrohash        | [Apache License 2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libmetrohash/LICENSE)                                      |
-| libpcg-random       | [Apache License 2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libpcg-random/LICENSE-APACHE.txt)                          |
-| libressl            | [OpenSSL License](https://github.com/ClickHouse-Extras/ssl/blob/master/COPYING)                                                              |
-| librdkafka          | [BSD 2-Clause License](https://github.com/edenhill/librdkafka/blob/363dcad5a23dc29381cc626620e68ae418b3af19/LICENSE)                         |
-| libwidechar_width  | [CC0 1.0 Universal](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libwidechar_width/LICENSE)                                     |
-| llvm                | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/llvm/blob/163def217817c90fb982a6daf384744d8472b92b/llvm/LICENSE.TXT)             |
-| lz4                 | [BSD 2-Clause License](https://github.com/lz4/lz4/blob/c10863b98e1503af90616ae99725ecd120265dfb/LICENSE)                                     |
-| mariadb-connector-c | [LGPL v2.1](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/3.1/COPYING.LIB)                                                   |
-| murmurhash          | [Public Domain](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/murmurhash/LICENSE)                                             |
-| pdqsort             | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/pdqsort/license.txt)                                             |
-| poco                | [Boost Software License - Version 1.0](https://github.com/ClickHouse-Extras/poco/blob/fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f/LICENSE)      |
-| protobuf            | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/protobuf/blob/12735370922a35f03999afff478e1c6d7aa917a4/LICENSE)                  |
-| re2                 | [BSD 3-Clause License](https://github.com/google/re2/blob/7cf8b88e8f70f97fd4926b56aa87e7f53b2717e0/LICENSE)                                  |
-| sentry-native       | [MIT License](https://github.com/getsentry/sentry-native/blob/master/LICENSE)                                                                 |
-| UnixODBC            | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168)                                     |
-| zlib-ng             | [Zlib License](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md)                                                         |
-| zstd                | [BSD 3-Clause License](https://github.com/facebook/zstd/blob/dev/LICENSE)                                                                    |
+The list of third-party libraries can be obtained by the followind query:
+
+```
+SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'
+```
+
+[Example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==)

From 8e35873f993951950977836e760a9758e7267603 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 7 Apr 2021 17:17:31 +0300
Subject: [PATCH 133/152] Update contrib.md

---
 docs/en/development/contrib.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md
index cefa726f287..12b66783307 100644
--- a/docs/en/development/contrib.md
+++ b/docs/en/development/contrib.md
@@ -5,7 +5,7 @@ toc_title: Third-Party Libraries Used
 
 # Third-Party Libraries Used {#third-party-libraries-used}
 
-The list of third-party libraries can be obtained by the followind query:
+The list of third-party libraries can be obtained by the following query:
 
 ```
 SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'

From ca2171695f47551edcd9ee906c9a81843a011650 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 7 Apr 2021 17:20:18 +0300
Subject: [PATCH 134/152] Update contrib.md

---
 docs/en/development/contrib.md | 77 ++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md
index 12b66783307..64ca2387029 100644
--- a/docs/en/development/contrib.md
+++ b/docs/en/development/contrib.md
@@ -12,3 +12,80 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li
 ```
 
 [Example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==)
+
+| library_name | license_type | license_path | 
+|:-|:-|:-|
+| abseil-cpp | Apache | /contrib/abseil-cpp/LICENSE |
+| AMQP-CPP | Apache | /contrib/AMQP-CPP/LICENSE |
+| arrow | Apache | /contrib/arrow/LICENSE.txt |
+| avro | Apache | /contrib/avro/LICENSE.txt |
+| aws | Apache | /contrib/aws/LICENSE.txt |
+| aws-c-common | Apache | /contrib/aws-c-common/LICENSE |
+| aws-c-event-stream | Apache | /contrib/aws-c-event-stream/LICENSE |
+| aws-checksums | Apache | /contrib/aws-checksums/LICENSE |
+| base64 | BSD 2-clause | /contrib/base64/LICENSE |
+| boost | Boost | /contrib/boost/LICENSE_1_0.txt |
+| boringssl | BSD | /contrib/boringssl/LICENSE |
+| brotli | MIT | /contrib/brotli/LICENSE |
+| capnproto | MIT | /contrib/capnproto/LICENSE |
+| cassandra | Apache | /contrib/cassandra/LICENSE.txt |
+| cctz | Apache | /contrib/cctz/LICENSE.txt |
+| cityhash102 | MIT | /contrib/cityhash102/COPYING |
+| cppkafka | BSD 2-clause | /contrib/cppkafka/LICENSE |
+| croaring | Apache | /contrib/croaring/LICENSE |
+| curl | Apache | /contrib/curl/docs/LICENSE-MIXING.md |
+| cyrus-sasl | BSD 2-clause | /contrib/cyrus-sasl/COPYING |
+| double-conversion | BSD 3-clause | /contrib/double-conversion/LICENSE |
+| dragonbox | Apache | /contrib/dragonbox/LICENSE-Apache2-LLVM |
+| fast_float | Apache | /contrib/fast_float/LICENSE |
+| fastops | MIT | /contrib/fastops/LICENSE |
+| flatbuffers | Apache | /contrib/flatbuffers/LICENSE.txt |
+| fmtlib | Unknown | /contrib/fmtlib/LICENSE.rst |
+| gcem | Apache | /contrib/gcem/LICENSE |
+| googletest | BSD 3-clause | /contrib/googletest/LICENSE |
+| grpc | Apache | /contrib/grpc/LICENSE |
+| h3 | Apache | /contrib/h3/LICENSE |
+| hyperscan | Boost | /contrib/hyperscan/LICENSE |
+| icu | Public Domain | /contrib/icu/icu4c/LICENSE |
+| icudata | Public Domain | /contrib/icudata/LICENSE |
+| jemalloc | BSD 2-clause | /contrib/jemalloc/COPYING |
+| krb5 | MIT | /contrib/krb5/src/lib/gssapi/LICENSE |
+| libc-headers | LGPL | /contrib/libc-headers/LICENSE |
+| libcpuid | BSD 2-clause | /contrib/libcpuid/COPYING |
+| libcxx | Apache | /contrib/libcxx/LICENSE.TXT |
+| libcxxabi | Apache | /contrib/libcxxabi/LICENSE.TXT |
+| libdivide | zLib | /contrib/libdivide/LICENSE.txt |
+| libfarmhash | MIT | /contrib/libfarmhash/COPYING |
+| libgsasl | LGPL | /contrib/libgsasl/LICENSE |
+| libhdfs3 | Apache | /contrib/libhdfs3/LICENSE.txt |
+| libmetrohash | Apache | /contrib/libmetrohash/LICENSE |
+| libpq | Unknown | /contrib/libpq/COPYRIGHT |
+| libpqxx | BSD 3-clause | /contrib/libpqxx/COPYING |
+| librdkafka | MIT | /contrib/librdkafka/LICENSE.murmur2 |
+| libunwind | Apache | /contrib/libunwind/LICENSE.TXT |
+| libuv | BSD | /contrib/libuv/LICENSE |
+| llvm | Apache | /contrib/llvm/llvm/LICENSE.TXT |
+| lz4 | BSD | /contrib/lz4/LICENSE |
+| mariadb-connector-c | LGPL | /contrib/mariadb-connector-c/COPYING.LIB |
+| miniselect | Boost | /contrib/miniselect/LICENSE_1_0.txt |
+| msgpack-c | Boost | /contrib/msgpack-c/LICENSE_1_0.txt |
+| murmurhash | Public Domain | /contrib/murmurhash/LICENSE |
+| NuRaft | Apache | /contrib/NuRaft/LICENSE |
+| openldap | Unknown | /contrib/openldap/LICENSE |
+| orc | Apache | /contrib/orc/LICENSE |
+| poco | Boost | /contrib/poco/LICENSE |
+| protobuf | BSD 3-clause | /contrib/protobuf/LICENSE |
+| rapidjson | MIT | /contrib/rapidjson/bin/jsonschema/LICENSE |
+| re2 | BSD 3-clause | /contrib/re2/LICENSE |
+| replxx | BSD 3-clause | /contrib/replxx/LICENSE.md |
+| rocksdb | BSD 3-clause | /contrib/rocksdb/LICENSE.leveldb |
+| sentry-native | MIT | /contrib/sentry-native/LICENSE |
+| simdjson | Apache | /contrib/simdjson/LICENSE |
+| snappy | Public Domain | /contrib/snappy/COPYING |
+| sparsehash-c11 | BSD 3-clause | /contrib/sparsehash-c11/LICENSE |
+| stats | Apache | /contrib/stats/LICENSE |
+| thrift | Apache | /contrib/thrift/LICENSE |
+| unixodbc | LGPL | /contrib/unixodbc/COPYING |
+| xz | Public Domain | /contrib/xz/COPYING |
+| zlib-ng | zLib | /contrib/zlib-ng/LICENSE.md |
+| zstd | BSD | /contrib/zstd/LICENSE |

From 22f8a81ff912c1d813feec7b3c0856af160db7bc Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 7 Apr 2021 17:45:33 +0300
Subject: [PATCH 135/152] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index af5c6aa4a33..4c4dd431f8e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,7 @@
 
 * The `toStartOfIntervalFunction` will align hour intervals to the midnight (in previous versions they were aligned to the start of unix epoch). For example, `toStartOfInterval(x, INTERVAL 11 HOUR)` will split every day into three intervals: `00:00:00..10:59:59`, `11:00:00..21:59:59` and `22:00:00..23:59:59`. This behaviour is more suited for practical needs. This closes [#9510](https://github.com/ClickHouse/ClickHouse/issues/9510). [#22060](https://github.com/ClickHouse/ClickHouse/pull/22060) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix `cutToFirstSignificantSubdomainCustom()`/`firstSignificantSubdomainCustom()` returning wrong result for 3+ level domains present in custom top-level domain list. For input domains matching these custom top-level domains, the third-level domain was considered to be the first significant one. This is now fixed. This change may introduce incompatibility if the function is used in e.g. the sharding key. [#21946](https://github.com/ClickHouse/ClickHouse/pull/21946) ([Azat Khuzhin](https://github.com/azat)).
-* Column `keys` in table `system.dictionaries` was replaced to columns  `key.names` and `key.types`. Columns `key.names`, `key.types`, `attribute.names`, `attribute.types` from `system.dictionaries` table does not require dictionary to be loaded. [#21884](https://github.com/ClickHouse/ClickHouse/pull/21884) ([Maksim Kita](https://github.com/kitaisreal)).
+* Column `keys` in table `system.dictionaries` was replaced to columns `key.names` and `key.types`. Columns `key.names`, `key.types`, `attribute.names`, `attribute.types` from `system.dictionaries` table does not require dictionary to be loaded. [#21884](https://github.com/ClickHouse/ClickHouse/pull/21884) ([Maksim Kita](https://github.com/kitaisreal)).
 * Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)).
 
 #### New Feature

From 5c91cc35732f3121b768d8ee31a717fa89693738 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 7 Apr 2021 17:46:46 +0300
Subject: [PATCH 136/152] Update CHANGELOG.md

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4c4dd431f8e..3f0e30f2af6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,7 +33,6 @@
 * Support `dictHas` function for `RangeHashedDictionary`. Fixes [#6680](https://github.com/ClickHouse/ClickHouse/issues/6680). [#19816](https://github.com/ClickHouse/ClickHouse/pull/19816) ([Maksim Kita](https://github.com/kitaisreal)).
 * Zero-copy replication for `ReplicatedMergeTree` over S3 storage. [#16240](https://github.com/ClickHouse/ClickHouse/pull/16240) ([ianton-ru](https://github.com/ianton-ru)).
 * Extended range of `DateTime64` to properly support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([Vasily Nemkov](https://github.com/Enmk)).
-* Support ThetaSketch to do set operations. [#22207](https://github.com/ClickHouse/ClickHouse/pull/22207) ([Ping Yu](https://github.com/pingyu)).
 
 #### Performance Improvement
 

From e1e62bb58c7b3208276c8c2f8229050f0661fa25 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 7 Apr 2021 17:58:14 +0300
Subject: [PATCH 137/152] once again w/new sandbox settings


From e2545ea5b8f21e83881b045d147106db3daa013f Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Wed, 7 Apr 2021 18:02:37 +0300
Subject: [PATCH 138/152] Update CHANGELOG.md

---
 CHANGELOG.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f0e30f2af6..814938b8e05 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,9 @@
 * The `toStartOfIntervalFunction` will align hour intervals to the midnight (in previous versions they were aligned to the start of unix epoch). For example, `toStartOfInterval(x, INTERVAL 11 HOUR)` will split every day into three intervals: `00:00:00..10:59:59`, `11:00:00..21:59:59` and `22:00:00..23:59:59`. This behaviour is more suited for practical needs. This closes [#9510](https://github.com/ClickHouse/ClickHouse/issues/9510). [#22060](https://github.com/ClickHouse/ClickHouse/pull/22060) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix `cutToFirstSignificantSubdomainCustom()`/`firstSignificantSubdomainCustom()` returning wrong result for 3+ level domains present in custom top-level domain list. For input domains matching these custom top-level domains, the third-level domain was considered to be the first significant one. This is now fixed. This change may introduce incompatibility if the function is used in e.g. the sharding key. [#21946](https://github.com/ClickHouse/ClickHouse/pull/21946) ([Azat Khuzhin](https://github.com/azat)).
 * Column `keys` in table `system.dictionaries` was replaced to columns `key.names` and `key.types`. Columns `key.names`, `key.types`, `attribute.names`, `attribute.types` from `system.dictionaries` table does not require dictionary to be loaded. [#21884](https://github.com/ClickHouse/ClickHouse/pull/21884) ([Maksim Kita](https://github.com/kitaisreal)).
-* Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)).
+* Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)). **Note**: 
+  * `ATTACH PART[ITION]` queries may not work during cluster upgrade.
+  * It's not possible to rollback to older ClickHouse version after executing `ALTER ... ATTACH` query in new version as the old servers would fail to pass the `ATTACH_PART` entry in the replicated log.
 
 #### New Feature
 

From b2a370ad493f82f3940d46e2818c549c281363c4 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 7 Apr 2021 19:06:20 +0300
Subject: [PATCH 139/152] Fix PVS

---
 src/Storages/StorageReplicatedMergeTree.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index f75070c247d..28ea95358d1 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -3757,6 +3757,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora
     ReplicatedMergeTreeAddress address;
     ConnectionTimeouts timeouts;
     String interserver_scheme;
+    InterserverCredentialsPtr credentials;
     std::optional<CurrentlySubmergingEmergingTagger> tagger_ptr;
     std::function<MutableDataPartPtr()> get_part;
 
@@ -3772,7 +3773,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora
         address.fromString(zookeeper->get(source_replica_path + "/host"));
         timeouts = getFetchPartHTTPTimeouts(global_context);
 
-        auto credentials = global_context.getInterserverCredentials();
+        credentials = global_context.getInterserverCredentials();
         interserver_scheme = global_context.getInterserverScheme();
 
         get_part = [&, address, timeouts, credentials, interserver_scheme]()

From caff65f1c24d0a0b4b039435d287bd1d5c63bec9 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Apr 2021 20:52:12 +0300
Subject: [PATCH 140/152] Functions ExternalDictionaries standardize exception
 throw

---
 src/Functions/FunctionsExternalDictionaries.h | 163 +++++++++++-------
 1 file changed, 105 insertions(+), 58 deletions(-)

diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 1f8ef60b4af..dba0e509c50 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -97,15 +97,15 @@ public:
             return false;
 
         if (sample_columns.columns() < 3)
-            throw Exception{"Wrong arguments count", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong arguments count");
 
         const auto * dict_name_col = checkAndGetColumnConst<ColumnString>(sample_columns.getByPosition(0).column.get());
         if (!dict_name_col)
-            throw Exception{"First argument of function dictGet... must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of function dictGet must be a constant string");
 
         const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(sample_columns.getByPosition(1).column.get());
         if (!attr_name_col)
-            throw Exception{"Second argument of function dictGet... must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of function dictGet... must be a constant string");
 
         return getDictionary(dict_name_col->getValue<String>())->isInjective(attr_name_col->getValue<String>());
     }
@@ -154,16 +154,22 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         if (arguments.size() < 2)
-            throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Wrong argument count for function {}",
+                getName());
 
         if (!isString(arguments[0]))
-            throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of first argument of function, expected a string",
+                arguments[0]->getName(),
+                getName());
 
         if (!WhichDataType(arguments[1]).isUInt64() &&
             !isTuple(arguments[1]))
-            throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of second argument of function {} must be UInt64 or tuple(...)",
+                arguments[1]->getName(),
+                getName());
 
         return std::make_shared<DataTypeUInt8>();
     }
@@ -193,31 +199,39 @@ public:
         if (dictionary_key_type == DictionaryKeyType::range)
         {
             if (arguments.size() != 3)
-                throw Exception{"Wrong argument count for function " + getName()
-                    + " when dictionary has key type range", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+                throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                    "Wrong argument count for function {} when dictionary has key type range",
+                    getName());
 
             range_col = arguments[2].column;
             range_col_type = arguments[2].type;
 
             if (!(range_col_type->isValueRepresentedByInteger() && range_col_type->getSizeOfValueInMemory() <= sizeof(Int64)))
-                throw Exception{"Illegal type " + range_col_type->getName() + " of fourth argument of function "
-                        + getName() + " must be convertible to Int64.",
-                    ErrorCodes::ILLEGAL_COLUMN};
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+                    "Illegal type {} of fourth argument of function {} must be convertible to Int64.",
+                    range_col_type->getName(),
+                    getName());
         }
 
         if (dictionary_key_type == DictionaryKeyType::simple)
         {
             if (!key_column_type.isUInt64())
-                throw Exception{"Second argument of function " + getName() + " must be " + dictionary->getStructure().getKeyDescription(),
-                    ErrorCodes::TYPE_MISMATCH};
+                 throw Exception(
+                     ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                     "Second argument of function {} must be UInt64 when dictionary is simple. Actual type {}.",
+                     getName(),
+                     key_column_with_type.type->getName());
 
             return dictionary->hasKeys({key_column}, {std::make_shared<DataTypeUInt64>()});
         }
         else if (dictionary_key_type == DictionaryKeyType::complex)
         {
             if (!key_column_type.isTuple())
-                throw Exception{"Second argument of function " + getName() + " must be " + dictionary->getStructure().getKeyDescription(),
-                    ErrorCodes::TYPE_MISMATCH};
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Second argument of function {} must be tuple when dictionary is complex. Actual type {}.",
+                    getName(),
+                    key_column_with_type.type->getName());
 
             /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
             ColumnPtr key_column_full = key_column->convertToFullColumnIfConst();
@@ -228,7 +242,16 @@ public:
             return dictionary->hasKeys(key_columns, key_types);
         }
         else
+        {
+            if (!key_column_type.isUInt64())
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Second argument of function {} must be UInt64 when dictionary is range. Actual type {}.",
+                    getName(),
+                    key_column_with_type.type->getName());
+
             return dictionary->hasKeys({key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type});
+        }
     }
 
 private:
@@ -273,14 +296,18 @@ public:
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
         if (arguments.size() < 3)
-            throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Wrong argument count for function {}",
+                getName());
 
         String dictionary_name;
         if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
             dictionary_name = name_col->getValue<String>();
         else
-            throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
-                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of first argument of function {}, expected a const string.",
+                arguments[0].type->getName(),
+                getName());
 
         Strings attribute_names = getAttributeNamesFromColumn(arguments[1].column, arguments[1].type);
 
@@ -311,17 +338,20 @@ public:
         if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
             dictionary_name = name_col->getValue<String>();
         else
-            throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
-                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of first argument of function {}, expected a const string.",
+                arguments[0].type->getName(),
+                getName());
 
         Strings attribute_names = getAttributeNamesFromColumn(arguments[1].column, arguments[1].type);
 
         auto dictionary = helper.getDictionary(dictionary_name);
 
         if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type))
-            throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function "
-                    + getName() + ", must be UInt64 or tuple(...).",
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of third argument of function {}, must be UInt64 or tuple(...).",
+                arguments[2].type->getName(),
+                getName());
 
         auto dictionary_key_type = dictionary->getKeyType();
 
@@ -333,15 +363,20 @@ public:
         if (dictionary_key_type == DictionaryKeyType::range)
         {
             if (current_arguments_index >= arguments.size())
-                throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+                throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                    "Number of arguments for function {} doesn't match: passed {} should be {}",
+                    getName(),
+                    arguments.size(),
+                    arguments.size() + 1);
 
             range_col = arguments[current_arguments_index].column;
             range_col_type = arguments[current_arguments_index].type;
 
             if (!(range_col_type->isValueRepresentedByInteger() && range_col_type->getSizeOfValueInMemory() <= sizeof(Int64)))
-                throw Exception{"Illegal type " + range_col_type->getName() + " of fourth argument of function "
-                        + getName() + " must be convertible to Int64.",
-                    ErrorCodes::ILLEGAL_COLUMN};
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+                    "Illegal type {} of fourth argument of function must be convertible to Int64.",
+                    range_col_type->getName(),
+                    getName());
 
             ++current_arguments_index;
         }
@@ -351,7 +386,11 @@ public:
         if (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault)
         {
             if (current_arguments_index >= arguments.size())
-                throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+                throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                    "Number of arguments for function {} doesn't match: passed {} should be {}",
+                    getName(),
+                    arguments.size(),
+                    arguments.size() + 1);
 
             const auto & column_before_cast = arguments[current_arguments_index];
 
@@ -361,10 +400,14 @@ public:
 
                 for (const auto & nested_type : nested_types)
                     if (nested_type->isNullable())
-                        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Wrong argument for function ({}) default values column nullable is not supported", getName());
+                        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                            "Wrong argument for function {} default values column nullable is not supported",
+                            getName());
             }
             else if (column_before_cast.type->isNullable())
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Wrong argument for function ({}) default values column nullable is not supported", getName());
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Wrong argument for function {} default values column nullable is not supported",
+                    getName());
 
             auto result_type_no_nullable = removeNullable(result_type);
 
@@ -378,11 +421,12 @@ public:
 
                 if (!tuple_column)
                     throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Wrong argument for function ({}) default values column must be tuple", getName());
+                        "Wrong argument for function {} default values column must be tuple",
+                        getName());
 
                 if (tuple_column->tupleSize() != attribute_names.size())
                     throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Wrong argument for function ({}) default values tuple column must contain same column size as requested attributes",
+                        "Wrong argument for function {} default values tuple column must contain same column size as requested attributes",
                         getName());
 
                 default_cols = tuple_column->getColumnsCopy();
@@ -406,7 +450,7 @@ public:
             if (!WhichDataType(key_col_with_type.type).isUInt64())
                  throw Exception(
                      ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                     "Third argument of function ({}) must be uint64 when dictionary is simple. Actual type ({}).",
+                     "Third argument of function {} must be UInt64 when dictionary is simple. Actual type {}.",
                      getName(),
                      key_col_with_type.type->getName());
 
@@ -436,7 +480,7 @@ public:
             if (!isTuple(key_col_with_type.type))
                  throw Exception(
                      ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                     "Third argument of function ({}) must be tuple when dictionary is complex. Actual type ({}).",
+                     "Third argument of function {} must be tuple when dictionary is complex. Actual type {}.",
                      getName(),
                      key_col_with_type.type->getName());
 
@@ -446,7 +490,7 @@ public:
             if (!isTuple(key_col_with_type.type))
                 throw Exception(
                     ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Third argument of function ({}) must be tuple when dictionary is complex. Actual type ({}).",
+                    "Third argument of function {} must be tuple when dictionary is complex. Actual type {}.",
                     getName(),
                     key_col_with_type.type->getName());
 
@@ -479,7 +523,7 @@ public:
             if (!WhichDataType(key_col_with_type.type).isUInt64())
                  throw Exception(
                      ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                     "Third argument of function ({}) must be uint64 when dictionary is range. Actual type ({}).",
+                     "Third argument of function {} must be UInt64 when dictionary is range. Actual type {}.",
                      getName(),
                      key_col_with_type.type->getName());
 
@@ -505,7 +549,7 @@ public:
                     default_cols.front());
         }
         else
-            throw Exception{"Unknown dictionary identifier type", ErrorCodes::BAD_ARGUMENTS};
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown dictionary identifier type");
 
         return result;
     }
@@ -525,7 +569,7 @@ private:
 
             if (tuple_size < 1)
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Tuple second argument of function ({}) must contain multiple constant string columns");
+                    "Tuple second argument of function {} must contain multiple constant string columns");
 
             for (size_t i = 0; i < tuple_col.tupleSize(); ++i)
             {
@@ -535,7 +579,7 @@ private:
 
                 if (!attribute_name_column)
                     throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                        "Tuple second argument of function ({}) must contain multiple constant string columns",
+                        "Tuple second argument of function {} must contain multiple constant string columns",
                         getName());
 
                 attribute_names.emplace_back(attribute_name_column->getDataAt(0));
@@ -543,7 +587,7 @@ private:
         }
         else
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type ({}) of second argument of function ({}), expected a const string or const tuple of const strings.",
+                "Illegal type {} of second argument of function {}, expected a const string or const tuple of const strings.",
                 type->getName(),
                 getName());
 
@@ -852,6 +896,7 @@ private:
     const FunctionDictGetNoType<DictionaryGetFunctionType::get> dictionary_get_func_impl;
     const FunctionDictHas dictionary_has_func_impl;
 };
+
 /// Functions to work with hierarchies.
 
 class FunctionDictGetHierarchy final : public IFunction
@@ -879,13 +924,13 @@ private:
     {
         if (!isString(arguments[0]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type of first argument of function ({}). Expected String. Actual type ({})",
+                "Illegal type of first argument of function {}. Expected String. Actual type {}",
                 getName(),
                 arguments[0]->getName());
 
         if (!WhichDataType(arguments[1]).isUInt64())
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type of second argument of function ({}). Expected UInt64. Actual type ({})",
+                "Illegal type of second argument of function {}. Expected UInt64. Actual type {}",
                 getName(),
                 arguments[1]->getName());
 
@@ -903,7 +948,7 @@ private:
 
         if (!dictionary->hasHierarchy())
             throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
-                "Dictionary ({}) does not support hierarchy",
+                "Dictionary {} does not support hierarchy",
                 dictionary->getFullName());
 
         ColumnPtr result = dictionary->getHierarchy(arguments[1].column, std::make_shared<DataTypeUInt64>());
@@ -939,19 +984,19 @@ private:
     {
         if (!isString(arguments[0]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type of first argument of function ({}). Expected String. Actual type ({})",
+                "Illegal type of first argument of function {}. Expected String. Actual type {}",
                 getName(),
                 arguments[0]->getName());
 
         if (!WhichDataType(arguments[1]).isUInt64())
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type of second argument of function ({}). Expected UInt64. Actual type ({})",
+                "Illegal type of second argument of function {}. Expected UInt64. Actual type {}",
                 getName(),
                 arguments[1]->getName());
 
         if (!WhichDataType(arguments[2]).isUInt64())
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type of third argument of function ({}). Expected UInt64. Actual type ({})",
+                "Illegal type of third argument of function {}. Expected UInt64. Actual type {}",
                 getName(),
                 arguments[2]->getName());
 
@@ -968,7 +1013,9 @@ private:
         auto dict = helper.getDictionary(arguments[0].column);
 
         if (!dict->hasHierarchy())
-            throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Dictionary ({}) does not support hierarchy", dict->getFullName());
+            throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+                "Dictionary {} does not support hierarchy",
+                dict->getFullName());
 
         ColumnPtr res = dict->isInHierarchy(arguments[1].column, arguments[2].column, std::make_shared<DataTypeUInt64>());
 
@@ -1004,13 +1051,13 @@ private:
     {
         if (!isString(arguments[0]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type of first argument of function ({}). Expected String. Actual type ({})",
+                "Illegal type of first argument of function {}. Expected String. Actual type {}",
                 getName(),
                 arguments[0]->getName());
 
         if (!WhichDataType(arguments[1]).isUInt64())
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type of second argument of function ({}). Expected UInt64. Actual type ({})",
+                "Illegal type of second argument of function {}. Expected UInt64. Actual type {}",
                 getName(),
                 arguments[1]->getName());
 
@@ -1026,7 +1073,7 @@ private:
 
         if (!dictionary->hasHierarchy())
             throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
-                "Dictionary ({}) does not support hierarchy",
+                "Dictionary {} does not support hierarchy",
                 dictionary->getFullName());
 
         ColumnPtr result = dictionary->getDescendants(arguments[1].column, std::make_shared<DataTypeUInt64>(), 1);
@@ -1067,27 +1114,27 @@ private:
         if (arguments_size < 2 || arguments_size > 3)
         {
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Illegal arguments size of function ({}). Expects 2 or 3 arguments size. Actual size ({})",
+                "Illegal arguments size of function {}. Expects 2 or 3 arguments size. Actual size {}",
                 getName(),
                 arguments_size);
         }
 
         if (!isString(arguments[0]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type of first argument of function ({}). Expected const String. Actual type ({})",
+                "Illegal type of first argument of function {}. Expected const String. Actual type {}",
                 getName(),
                 arguments[0]->getName());
 
         if (!WhichDataType(arguments[1]).isUInt64())
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type of second argument of function ({}). Expected UInt64. Actual type ({})",
+                "Illegal type of second argument of function {}. Expected UInt64. Actual type {}",
                 getName(),
                 arguments[1]->getName());
 
         if (arguments.size() == 3 && !isUnsignedInteger(arguments[2]))
         {
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type of third argument of function ({}). Expected const unsigned integer. Actual type ({})",
+                "Illegal type of third argument of function {}. Expected const unsigned integer. Actual type {}",
                 getName(),
                 arguments[2]->getName());
         }
@@ -1108,7 +1155,7 @@ private:
         {
             if (!isColumnConst(*arguments[2].column))
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Illegal type of third argument of function ({}). Expected const unsigned integer.",
+                    "Illegal type of third argument of function {}. Expected const unsigned integer.",
                     getName());
 
             level = static_cast<size_t>(arguments[2].column->get64(0));
@@ -1116,7 +1163,7 @@ private:
 
         if (!dictionary->hasHierarchy())
             throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
-                "Dictionary ({}) does not support hierarchy",
+                "Dictionary {} does not support hierarchy",
                 dictionary->getFullName());
 
         ColumnPtr res = dictionary->getDescendants(arguments[1].column, std::make_shared<DataTypeUInt64>(), level);

From ccba5d0066df48086d6850e8bdde6fd7c851367d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 7 Apr 2021 21:08:25 +0300
Subject: [PATCH 141/152] Fix bug (broken HTML) on the website

---
 website/templates/index/quickstart.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/templates/index/quickstart.html b/website/templates/index/quickstart.html
index 454fc68151d..0d967e7b96c 100644
--- a/website/templates/index/quickstart.html
+++ b/website/templates/index/quickstart.html
@@ -36,7 +36,7 @@
            target="_blank">
             official Docker images of ClickHouse</a>, this is not the only <a href="https://clickhouse.tech/docs/en/getting-started/">option</a> though.
         Alternatively, you can easily get a running ClickHouse instance or cluster at
-        <a href="https://cloud.yandex.com/services/managed-clickhouse?utm_source=referrals&utm_medium=clickhouseofficialsite&utm_campaign=link1" rel="external nofollow noreferrer" target="_blank">
+        <a href="https://cloud.yandex.com/services/managed-clickhouse?utm_source=referrals&amp;utm_medium=clickhouseofficialsite&amp;utm_campaign=link1" rel="external nofollow noreferrer" target="_blank">
             Yandex Managed Service for ClickHouse</a>.
     </p>
 

From e8e894f912b319d5d2783e2c527105eebf4571c3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 7 Apr 2021 21:15:34 +0300
Subject: [PATCH 142/152] Add support button

---
 website/templates/index/hero.html | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/website/templates/index/hero.html b/website/templates/index/hero.html
index 55d0111ac61..efa4643e841 100644
--- a/website/templates/index/hero.html
+++ b/website/templates/index/hero.html
@@ -22,12 +22,8 @@
             <div class="row lead">
 
                 <div class="col-lg pb-2">
-                    <a href="https://play.clickhouse.tech/?file=welcome" class="btn btn-lg btn-yellow w-100 mb-2" role="button" target="_blank" rel="external nofollow">
-                        <div class="py-2">
-                            <h2>Try online demo</h2>
-                            <div class="lead">Powered by Yandex.Cloud</div>
-                        </div>
-                    </a>
+                    <a href="https://play.clickhouse.tech/?file=welcome" class="btn btn-lg btn-yellow w-100 mb-3" role="button" target="_blank" rel="external nofollow">Try online demo</a>
+                    <a href="docs/en/commercial/support/" class="btn btn-lg btn-yellow w-100" role="button">Request support</a>
                 </div>
                 <div class="col-lg pb-2">
                     <a href="#quick-start" class="btn btn-lg btn-outline-orange w-100 mb-3" role="button">Quick start</a>

From f7cc15fdcd6b1d4f45a2c6531323dc3ab02ed84f Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Apr 2021 21:38:24 +0300
Subject: [PATCH 143/152] FileDictionarySource fix absolute file path

---
 src/Dictionaries/FileDictionarySource.cpp | 24 ++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/Dictionaries/FileDictionarySource.cpp b/src/Dictionaries/FileDictionarySource.cpp
index addc2adef02..bdc7ce66900 100644
--- a/src/Dictionaries/FileDictionarySource.cpp
+++ b/src/Dictionaries/FileDictionarySource.cpp
@@ -1,8 +1,11 @@
 #include "FileDictionarySource.h"
+
+#include <Poco/File.h>
+#include <filesystem>
+
 #include <DataStreams/OwningBlockInputStream.h>
 #include <IO/ReadBufferFromFile.h>
 #include <Interpreters/Context.h>
-#include <Poco/File.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <common/logger_useful.h>
 #include "DictionarySourceFactory.h"
@@ -10,7 +13,6 @@
 #include "registerDictionaries.h"
 #include "DictionarySourceHelpers.h"
 
-
 namespace DB
 {
 static const UInt64 max_block_size = 8192;
@@ -32,9 +34,19 @@ FileDictionarySource::FileDictionarySource(
 {
     if (check_config)
     {
-        const String user_files_path = context.getUserFilesPath();
-        if (!startsWith(filepath, user_files_path))
-            throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, user_files_path);
+        auto source_file_path = std::filesystem::path(filepath);
+        auto source_file_absolute_path = std::filesystem::canonical(source_file_path);
+
+        String user_files_path_string_value = context.getUserFilesPath();
+        auto user_files_path = std::filesystem::path(user_files_path_string_value);
+        auto user_files_absolute_path = std::filesystem::canonical(user_files_path);
+
+        auto [_, user_files_absolute_path_mismatch_it] = std::mismatch(source_file_absolute_path.begin(), source_file_absolute_path.end(), user_files_absolute_path.begin(), user_files_absolute_path.end());
+
+        bool user_files_absolute_path_include_source_file_absolute_path = user_files_absolute_path_mismatch_it == user_files_absolute_path.end();
+
+        if (!user_files_absolute_path_include_source_file_absolute_path)
+            throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, user_files_path_string_value);
     }
 }
 
@@ -89,6 +101,8 @@ void registerDictionarySourceFile(DictionarySourceFactory & factory)
 
         Context context_local_copy = copyContextAndApplySettings(config_prefix, context, config);
 
+        std::cerr << "Check config " << check_config << std::endl;
+
         return std::make_unique<FileDictionarySource>(filepath, format, sample_block, context_local_copy, check_config);
     };
 

From ff04292ac82d7f44aab2dbe6e723d856ec16f409 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Apr 2021 21:42:09 +0300
Subject: [PATCH 144/152] Update FileDictionarySource.cpp

---
 src/Dictionaries/FileDictionarySource.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Dictionaries/FileDictionarySource.cpp b/src/Dictionaries/FileDictionarySource.cpp
index bdc7ce66900..0fe632802e7 100644
--- a/src/Dictionaries/FileDictionarySource.cpp
+++ b/src/Dictionaries/FileDictionarySource.cpp
@@ -101,8 +101,6 @@ void registerDictionarySourceFile(DictionarySourceFactory & factory)
 
         Context context_local_copy = copyContextAndApplySettings(config_prefix, context, config);
 
-        std::cerr << "Check config " << check_config << std::endl;
-
         return std::make_unique<FileDictionarySource>(filepath, format, sample_block, context_local_copy, check_config);
     };
 

From 58af8be028437ddcc670107c547933ef00c87031 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 7 Apr 2021 21:45:45 +0300
Subject: [PATCH 145/152] once again


From eb1228bba0306aa0835226e8d0d28ae6fd1d27a0 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 7 Apr 2021 23:00:14 +0300
Subject: [PATCH 146/152] Don't fail if directories already exists

---
 tests/config/install.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/config/install.sh b/tests/config/install.sh
index 9c4f8caca07..7e01860e241 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -71,8 +71,8 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
 
     # There is a bug in config reloading, so we cannot override macros using --macros.replica r2
     # And we have to copy configs...
-    mkdir /etc/clickhouse-server1
-    mkdir /etc/clickhouse-server2
+    mkdir -p /etc/clickhouse-server1
+    mkdir -p /etc/clickhouse-server2
     chown clickhouse /etc/clickhouse-server1
     chown clickhouse /etc/clickhouse-server2
     chgrp clickhouse /etc/clickhouse-server1
@@ -84,8 +84,8 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
     sudo -u clickhouse cat /etc/clickhouse-server/config.d/macros.xml | sed "s|<replica>r1</replica>|<replica>r2</replica>|" > /etc/clickhouse-server1/config.d/macros.xml
     sudo -u clickhouse cat /etc/clickhouse-server/config.d/macros.xml | sed "s|<shard>s1</shard>|<shard>s2</shard>|" > /etc/clickhouse-server2/config.d/macros.xml
 
-    sudo mkdir /var/lib/clickhouse1
-    sudo mkdir /var/lib/clickhouse2
+    sudo mkdir -p /var/lib/clickhouse1
+    sudo mkdir -p /var/lib/clickhouse2
     sudo chown clickhouse /var/lib/clickhouse1
     sudo chown clickhouse /var/lib/clickhouse2
     sudo chgrp clickhouse /var/lib/clickhouse1

From 6acc92ffd9085c501b78fe5241a29d39023cfb1d Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 7 Apr 2021 23:38:48 +0300
Subject: [PATCH 147/152] remove debug

---
 debian/rules                    | 2 +-
 docker/packager/binary/build.sh | 6 +++---
 docker/packager/deb/build.sh    | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/debian/rules b/debian/rules
index b30dc2ec71b..8eb47e95389 100755
--- a/debian/rules
+++ b/debian/rules
@@ -62,7 +62,7 @@ ifndef DISABLE_NINJA
     NINJA=$(shell which ninja)
 ifneq ($(NINJA),)
         CMAKE_FLAGS += -GNinja
-        export MAKE=$(NINJA) $(NINJA_FLAGS) --verbose
+        export MAKE=$(NINJA) $(NINJA_FLAGS)
 endif
 endif
 
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 54c400bf029..cf74105fbbb 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -13,8 +13,8 @@ tar xJf freebsd-11.3-toolchain.tar.xz -C build/cmake/toolchain/freebsd-x86_64 --
 
 # Uncomment to debug ccache. Don't put ccache log in /output right away, or it
 # will be confusingly packed into the "performance" package.
-export CCACHE_LOGFILE=/build/ccache.log
-export CCACHE_DEBUG=1
+# export CCACHE_LOGFILE=/build/ccache.log
+# export CCACHE_DEBUG=1
 
 mkdir -p build/build_docker
 cd build/build_docker
@@ -28,7 +28,7 @@ ccache --show-stats ||:
 ccache --zero-stats ||:
 
 # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
-ninja $NINJA_FLAGS --verbose clickhouse-bundle
+ninja $NINJA_FLAGS clickhouse-bundle
 
 ccache --show-config ||:
 ccache --show-stats ||:
diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh
index e4972edbab1..c1a0b27db5d 100755
--- a/docker/packager/deb/build.sh
+++ b/docker/packager/deb/build.sh
@@ -3,8 +3,8 @@
 set -x -e
 
 # Uncomment to debug ccache.
-export CCACHE_LOGFILE=/build/ccache.log
-export CCACHE_DEBUG=1
+# export CCACHE_LOGFILE=/build/ccache.log
+# export CCACHE_DEBUG=1
 
 ccache --show-config ||:
 ccache --show-stats ||:

From ac3de6626af6772297c2f5df653854b98635605d Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 8 Apr 2021 10:51:04 +0300
Subject: [PATCH 148/152] Update date.md

---
 docs/en/sql-reference/data-types/date.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/data-types/date.md b/docs/en/sql-reference/data-types/date.md
index 886e93f433c..0cfac4d59fe 100644
--- a/docs/en/sql-reference/data-types/date.md
+++ b/docs/en/sql-reference/data-types/date.md
@@ -5,7 +5,7 @@ toc_title: Date
 
 # Date {#data_type-date}
 
-A date. Stored in two bytes as the number of days since 1970-01-01 (unsigned). Allows storing values from just after the beginning of the Unix Epoch to the upper threshold defined by a constant at the compilation stage (currently, this is until the year 2106, but the final fully-supported year is 2105).
+A date. Stored in two bytes as the number of days since 1970-01-01 (unsigned). Allows storing values from just after the beginning of the Unix Epoch to the upper threshold defined by a constant at the compilation stage (currently, this is until the year 2149, but the final fully-supported year is 2148).
 
 The date value is stored without the time zone.
 

From 282ff3b245d6dfc39c6b97b0b7ff075843a92d71 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 8 Apr 2021 12:20:10 +0300
Subject: [PATCH 149/152] Remastered changelog for 21.4

---
 CHANGELOG.md | 95 +++++++++++++++++++++++-----------------------------
 1 file changed, 41 insertions(+), 54 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 814938b8e05..a8564e3acfc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,97 +1,97 @@
 ## ClickHouse release 21.4
 
-### ClickHouse release 21.4.1 2021-04-02
+### ClickHouse release 21.4.1 2021-04-08
 
 #### Backward Incompatible Change
 
 * The `toStartOfIntervalFunction` will align hour intervals to the midnight (in previous versions they were aligned to the start of unix epoch). For example, `toStartOfInterval(x, INTERVAL 11 HOUR)` will split every day into three intervals: `00:00:00..10:59:59`, `11:00:00..21:59:59` and `22:00:00..23:59:59`. This behaviour is more suited for practical needs. This closes [#9510](https://github.com/ClickHouse/ClickHouse/issues/9510). [#22060](https://github.com/ClickHouse/ClickHouse/pull/22060) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* `Age` and `Precision` in graphite rollup configs should increase from retention to retention. Now it's checked and the wrong config raises an exception. [#21496](https://github.com/ClickHouse/ClickHouse/pull/21496) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Fix `cutToFirstSignificantSubdomainCustom()`/`firstSignificantSubdomainCustom()` returning wrong result for 3+ level domains present in custom top-level domain list. For input domains matching these custom top-level domains, the third-level domain was considered to be the first significant one. This is now fixed. This change may introduce incompatibility if the function is used in e.g. the sharding key. [#21946](https://github.com/ClickHouse/ClickHouse/pull/21946) ([Azat Khuzhin](https://github.com/azat)).
 * Column `keys` in table `system.dictionaries` was replaced to columns `key.names` and `key.types`. Columns `key.names`, `key.types`, `attribute.names`, `attribute.types` from `system.dictionaries` table does not require dictionary to be loaded. [#21884](https://github.com/ClickHouse/ClickHouse/pull/21884) ([Maksim Kita](https://github.com/kitaisreal)).
-* Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)). **Note**: 
+* Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)). **Note**:
   * `ATTACH PART[ITION]` queries may not work during cluster upgrade.
   * It's not possible to rollback to older ClickHouse version after executing `ALTER ... ATTACH` query in new version as the old servers would fail to pass the `ATTACH_PART` entry in the replicated log.
 
 #### New Feature
 
-* Added function `dictGetOrNull`. It works like `dictGet`, but return `Null` in case key was not found in dictionary. Closes [#22375](https://github.com/ClickHouse/ClickHouse/issues/22375). [#22413](https://github.com/ClickHouse/ClickHouse/pull/22413) ([Maksim Kita](https://github.com/kitaisreal)).
-* Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)).
+* Extended range of `DateTime64` to support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([alexey-milovidov](https://github.com/alexey-milovidov), [Vasily Nemkov](https://github.com/Enmk)).
 * Add `prefer_column_name_to_alias` setting to use original column names instead of aliases. it is needed to be more compatible with common databases' aliasing rules. This is for [#9715](https://github.com/ClickHouse/ClickHouse/issues/9715) and [#9887](https://github.com/ClickHouse/ClickHouse/issues/9887). [#22044](https://github.com/ClickHouse/ClickHouse/pull/22044) ([Amos Bird](https://github.com/amosbird)).
-* Add function `timezoneOf` that returns the timezone name of `DateTime` or `DateTime64` data types. This does not close [#9959](https://github.com/ClickHouse/ClickHouse/issues/9959). Fix inconsistencies in function names: add aliases `timezone` and `timeZone` as well as `toTimezone` and `toTimeZone` and `timezoneOf` and `timeZoneOf`. [#22001](https://github.com/ClickHouse/ClickHouse/pull/22001) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)).
+* Added `executable_pool` dictionary source. Close [#14528](https://github.com/ClickHouse/ClickHouse/issues/14528). [#21321](https://github.com/ClickHouse/ClickHouse/pull/21321) ([Maksim Kita](https://github.com/kitaisreal)).
 * Added table function `dictionary`. It works the same way as `Dictionary` engine. Closes [#21560](https://github.com/ClickHouse/ClickHouse/issues/21560). [#21910](https://github.com/ClickHouse/ClickHouse/pull/21910) ([Maksim Kita](https://github.com/kitaisreal)).
 * Support `Nullable` type for `PolygonDictionary` attribute. [#21890](https://github.com/ClickHouse/ClickHouse/pull/21890) ([Maksim Kita](https://github.com/kitaisreal)).
 * Functions `dictGet`, `dictHas` use current database name if it is not specified for dictionaries created with DDL. Closes [#21632](https://github.com/ClickHouse/ClickHouse/issues/21632). [#21859](https://github.com/ClickHouse/ClickHouse/pull/21859) ([Maksim Kita](https://github.com/kitaisreal)).
-* Add `ctime` option to `zookeeper-dump-tree`. It allows to dump node creation time. [#21842](https://github.com/ClickHouse/ClickHouse/pull/21842) ([Ilya](https://github.com/HumanUser)).
+* Added function `dictGetOrNull`. It works like `dictGet`, but return `Null` in case key was not found in dictionary. Closes [#22375](https://github.com/ClickHouse/ClickHouse/issues/22375). [#22413](https://github.com/ClickHouse/ClickHouse/pull/22413) ([Maksim Kita](https://github.com/kitaisreal)).
+* Added async update in `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for `Nullable` type in `Cache`, `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for multiple attributes fetch with `dictGet`, `dictGetOrDefault` functions. Fixes [#21517](https://github.com/ClickHouse/ClickHouse/issues/21517). [#20595](https://github.com/ClickHouse/ClickHouse/pull/20595) ([Maksim Kita](https://github.com/kitaisreal)).
+* Support `dictHas` function for `RangeHashedDictionary`. Fixes [#6680](https://github.com/ClickHouse/ClickHouse/issues/6680). [#19816](https://github.com/ClickHouse/ClickHouse/pull/19816) ([Maksim Kita](https://github.com/kitaisreal)).
+* Add function `timezoneOf` that returns the timezone name of `DateTime` or `DateTime64` data types. This does not close [#9959](https://github.com/ClickHouse/ClickHouse/issues/9959). Fix inconsistencies in function names: add aliases `timezone` and `timeZone` as well as `toTimezone` and `toTimeZone` and `timezoneOf` and `timeZoneOf`. [#22001](https://github.com/ClickHouse/ClickHouse/pull/22001) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Add new optional clause `GRANTEES` for `CREATE/ALTER USER` commands. It specifies users or roles which are allowed to receive grants from this user on condition this user has also all required access granted with grant option. By default `GRANTEES ANY` is used which means a user with grant option can grant to anyone. Syntax: `CREATE USER ... GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]`. [#21641](https://github.com/ClickHouse/ClickHouse/pull/21641) ([Vitaly Baranov](https://github.com/vitlibar)).
-* Add option `--backslash` for `clickhouse-format`, which can add a backslash at the end of each line of the formatted query. [#21494](https://github.com/ClickHouse/ClickHouse/pull/21494) ([flynn](https://github.com/ucasFL)).
 * Add new column `slowdowns_count` to `system.clusters`. When using hedged requests, it shows how many times we switched to another replica because this replica was responding slowly. Also show actual value of `errors_count` in `system.clusters`. [#21480](https://github.com/ClickHouse/ClickHouse/pull/21480) ([Kruglov Pavel](https://github.com/Avogar)).
 * Add `_partition_id` virtual column for `MergeTree*` engines. Allow to prune partitions by `_partition_id`. Add `partitionID()` function to calculate partition id string. [#21401](https://github.com/ClickHouse/ClickHouse/pull/21401) ([Amos Bird](https://github.com/amosbird)).
 * Add function `isIPAddressInRange` to test if an IPv4 or IPv6 address is contained in a given CIDR network prefix. [#21329](https://github.com/ClickHouse/ClickHouse/pull/21329) ([PHO](https://github.com/depressed-pho)).
-* Added `ExecutablePool` dictionary source. Close [#14528](https://github.com/ClickHouse/ClickHouse/issues/14528). [#21321](https://github.com/ClickHouse/ClickHouse/pull/21321) ([Maksim Kita](https://github.com/kitaisreal)).
 * Added new SQL command `ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name'`. This command is needed to properly remove 'freezed' partitions from all disks. [#21142](https://github.com/ClickHouse/ClickHouse/pull/21142) ([Pavel Kovalenko](https://github.com/Jokser)).
-* Added `Grant,` `Revoke` and `System` values of `query_kind` column for corresponding queries in `system.query_log`. [#21102](https://github.com/ClickHouse/ClickHouse/pull/21102) ([Vasily Nemkov](https://github.com/Enmk)).
-* Added async update in `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for `Nullable` type in `Cache`, `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for multiple attributes fetch with `dictGet`, `dictGetOrDefault` functions. Fixes [#21517](https://github.com/ClickHouse/ClickHouse/issues/21517). [#20595](https://github.com/ClickHouse/ClickHouse/pull/20595) ([Maksim Kita](https://github.com/kitaisreal)).
-* Allow customizing timeouts for http connections used for replication independently from other http timeouts. [#20088](https://github.com/ClickHouse/ClickHouse/pull/20088) ([nvartolomei](https://github.com/nvartolomei)).
 * Supports implicit key type conversion for JOIN. [#19885](https://github.com/ClickHouse/ClickHouse/pull/19885) ([Vladimir](https://github.com/vdimir)).
-* Support `dictHas` function for `RangeHashedDictionary`. Fixes [#6680](https://github.com/ClickHouse/ClickHouse/issues/6680). [#19816](https://github.com/ClickHouse/ClickHouse/pull/19816) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### Experimental Feature
+
+* Support `RANGE OFFSET` frame (for window functions) for floating point types. Implement `lagInFrame`/`leadInFrame` window functions, which are analogous to `lag`/`lead`, but respect the window frame. They are identical when the frame is `between unbounded preceding and unbounded following`. This closes [#5485](https://github.com/ClickHouse/ClickHouse/issues/5485). [#21895](https://github.com/ClickHouse/ClickHouse/pull/21895) ([Alexander Kuzmenkov](https://github.com/akuzm)).
 * Zero-copy replication for `ReplicatedMergeTree` over S3 storage. [#16240](https://github.com/ClickHouse/ClickHouse/pull/16240) ([ianton-ru](https://github.com/ianton-ru)).
-* Extended range of `DateTime64` to properly support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([Vasily Nemkov](https://github.com/Enmk)).
+* Added possibility to migrate existing S3 disk to the schema with backup-restore capabilities. [#22070](https://github.com/ClickHouse/ClickHouse/pull/22070) ([Pavel Kovalenko](https://github.com/Jokser)).
 
 #### Performance Improvement
 
+* Supported parallel formatting in `clickhouse-local` and everywhere else. [#21630](https://github.com/ClickHouse/ClickHouse/pull/21630) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Support parallel parsing for `CSVWithNames` and `TSVWithNames` formats. This closes [#21085](https://github.com/ClickHouse/ClickHouse/issues/21085). [#21149](https://github.com/ClickHouse/ClickHouse/pull/21149) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Enable read with mmap IO for file ranges from 64 MiB (the settings `min_bytes_to_use_mmap_io`). It may lead to moderate performance improvement. [#22326](https://github.com/ClickHouse/ClickHouse/pull/22326) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Add cache for files read with `min_bytes_to_use_mmap_io` setting. It makes significant (2x and more) performance improvement when the value of the setting is small by avoiding frequent mmap/munmap calls and the consequent page faults. Note that mmap IO has major drawbacks that makes it less reliable in production (e.g. hung or SIGBUS on faulty disks; less controllable memory usage). Nevertheless it is good in benchmarks. [#22206](https://github.com/ClickHouse/ClickHouse/pull/22206) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Avoid unnecessary data copy when using codec `NONE`. Please note that codec `NONE` is mostly useless - it's recommended to always use compression (`LZ4` is by default). Despite the common belief, disabling compression may not improve performance (the opposite effect is possible). The `NONE` codec is useful in some cases: - when data is uncompressable; - for synthetic benchmarks. [#22145](https://github.com/ClickHouse/ClickHouse/pull/22145) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Faster `GROUP BY` with small `max_rows_to_group_by` and `group_by_overflow_mode='any'`. [#21856](https://github.com/ClickHouse/ClickHouse/pull/21856) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Optimize performance of queries like `SELECT ... FINAL ... WHERE`. Now in queries with `FINAL` it's allowed to move to `PREWHERE` columns, which are in sorting key. [#21830](https://github.com/ClickHouse/ClickHouse/pull/21830) ([foolchi](https://github.com/foolchi)).
-* Supported parallel formatting in `clickhouse-local` and everywhere else. [#21630](https://github.com/ClickHouse/ClickHouse/pull/21630) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Improved performance by replacing `memcpy` to another implementation. This closes [#18583](https://github.com/ClickHouse/ClickHouse/issues/18583). [#21520](https://github.com/ClickHouse/ClickHouse/pull/21520) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Support parallel parsing for `CSVWithNames` and `TSVWithNames` formats. This closes [#21085](https://github.com/ClickHouse/ClickHouse/issues/21085). [#21149](https://github.com/ClickHouse/ClickHouse/pull/21149) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Improve performance of aggregation in order of sorting key (with enabled setting `optimize_aggregation_in_order`). [#19401](https://github.com/ClickHouse/ClickHouse/pull/19401) ([Anton Popov](https://github.com/CurtizJ)).
 
 #### Improvement
 
+* Add connection pool for PostgreSQL table/database engine and dictionary source. Should fix [#21444](https://github.com/ClickHouse/ClickHouse/issues/21444). [#21839](https://github.com/ClickHouse/ClickHouse/pull/21839) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Support non-default table schema for postgres storage/table-function. Closes [#21701](https://github.com/ClickHouse/ClickHouse/issues/21701). [#21711](https://github.com/ClickHouse/ClickHouse/pull/21711) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Support replicas priority for postgres dictionary source. [#21710](https://github.com/ClickHouse/ClickHouse/pull/21710) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Introduce a new merge tree setting `min_bytes_to_rebalance_partition_over_jbod` which allows assigning new parts to different disks of a JBOD volume in a balanced way. [#16481](https://github.com/ClickHouse/ClickHouse/pull/16481) ([Amos Bird](https://github.com/amosbird)).
+* Added `Grant`, `Revoke` and `System` values of `query_kind` column for corresponding queries in `system.query_log`. [#21102](https://github.com/ClickHouse/ClickHouse/pull/21102) ([Vasily Nemkov](https://github.com/Enmk)).
+* Allow customizing timeouts for HTTP connections used for replication independently from other HTTP timeouts. [#20088](https://github.com/ClickHouse/ClickHouse/pull/20088) ([nvartolomei](https://github.com/nvartolomei)).
 * Better exception message in client in case of exception while server is writing blocks. In previous versions client may get misleading message like `Data compressed with different methods`. [#22427](https://github.com/ClickHouse/ClickHouse/pull/22427) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix error `Directory tmp_fetch_XXX already exists` which could happen after failed fetch part. Delete temporary fetch directory if it already exists. Fixes [#14197](https://github.com/ClickHouse/ClickHouse/issues/14197). [#22411](https://github.com/ClickHouse/ClickHouse/pull/22411) ([nvartolomei](https://github.com/nvartolomei)).
 * Fix MSan report for function `range` with `UInt256` argument (support for large integers is experimental). This closes [#22157](https://github.com/ClickHouse/ClickHouse/issues/22157). [#22387](https://github.com/ClickHouse/ClickHouse/pull/22387) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Add `current_database` column to `system.processes` table. It contains the current database of the query. [#22365](https://github.com/ClickHouse/ClickHouse/pull/22365) ([Alexander Kuzmenkov](https://github.com/akuzm)).
 * Add case-insensitive history search/navigation and subword movement features to `clickhouse-client`. [#22105](https://github.com/ClickHouse/ClickHouse/pull/22105) ([Amos Bird](https://github.com/amosbird)).
-* Added possibility to migrate existing S3 disk to the schema with backup-restore capabilities. [#22070](https://github.com/ClickHouse/ClickHouse/pull/22070) ([Pavel Kovalenko](https://github.com/Jokser)).
 * If tuple of NULLs, e.g. `(NULL, NULL)` is on the left hand side of `IN` operator with tuples of non-NULLs on the right hand side, e.g. `SELECT (NULL, NULL) IN ((0, 0), (3, 1))` return 0 instead of throwing an exception about incompatible types. The expression may also appear due to optimization of something like `SELECT (NULL, NULL) = (8, 0) OR (NULL, NULL) = (3, 2) OR (NULL, NULL) = (0, 0) OR (NULL, NULL) = (3, 1)`. This closes [#22017](https://github.com/ClickHouse/ClickHouse/issues/22017). [#22063](https://github.com/ClickHouse/ClickHouse/pull/22063) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Convert `system.errors.stack_trace` from `String` into `Array(UInt64)` (This should decrease overhead for the errors collecting). [#22058](https://github.com/ClickHouse/ClickHouse/pull/22058) ([Azat Khuzhin](https://github.com/azat)).
 * Update used version of simdjson to 0.9.1. This fixes [#21984](https://github.com/ClickHouse/ClickHouse/issues/21984). [#22057](https://github.com/ClickHouse/ClickHouse/pull/22057) ([Vitaly Baranov](https://github.com/vitlibar)).
 * Added case insensitive aliases for `CONNECTION_ID()` and `VERSION()` functions. This fixes [#22028](https://github.com/ClickHouse/ClickHouse/issues/22028). [#22042](https://github.com/ClickHouse/ClickHouse/pull/22042) ([Eugene Klimov](https://github.com/Slach)).
 * Add option `strict_increase` to `windowFunnel` function to calculate each event once (resolve [#21835](https://github.com/ClickHouse/ClickHouse/issues/21835)). [#22025](https://github.com/ClickHouse/ClickHouse/pull/22025) ([Vladimir](https://github.com/vdimir)).
 * If partition key of a `MergeTree` table does not include `Date` or `DateTime` columns but includes exactly one `DateTime64` column, expose its values in the `min_time` and `max_time` columns in `system.parts` and `system.parts_columns` tables. Add `min_time` and `max_time` columns to `system.parts_columns` table (these was inconsistency to the `system.parts` table). This closes [#18244](https://github.com/ClickHouse/ClickHouse/issues/18244). [#22011](https://github.com/ClickHouse/ClickHouse/pull/22011) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Supported `replication_alter_partitions_sync=1` setting for moving partitions from helping table to destination. Decreased default timeouts. Fixes [#21911](https://github.com/ClickHouse/ClickHouse/issues/21911). [#21912](https://github.com/ClickHouse/ClickHouse/pull/21912) ([turbo jason](https://github.com/songenjie)).
+* Supported `replication_alter_partitions_sync=1` setting in `clickhouse-copier` for moving partitions from helping table to destination. Decreased default timeouts. Fixes [#21911](https://github.com/ClickHouse/ClickHouse/issues/21911). [#21912](https://github.com/ClickHouse/ClickHouse/pull/21912) ([turbo jason](https://github.com/songenjie)).
 * Show path to data directory of `EmbeddedRocksDB` tables in system tables. [#21903](https://github.com/ClickHouse/ClickHouse/pull/21903) ([tavplubix](https://github.com/tavplubix)).
-* Support `RANGE OFFSET` frame for floating point types. Implement `lagInFrame`/`leadInFrame` window functions, which are analogous to `lag`/`lead`, but respect the window frame. They are identical when the frame is `between unbounded preceding and unbounded following`. This closes [#5485](https://github.com/ClickHouse/ClickHouse/issues/5485). [#21895](https://github.com/ClickHouse/ClickHouse/pull/21895) ([Alexander Kuzmenkov](https://github.com/akuzm)).
 * Add profile event `HedgedRequestsChangeReplica`, change read data timeout from sec to ms. [#21886](https://github.com/ClickHouse/ClickHouse/pull/21886) ([Kruglov Pavel](https://github.com/Avogar)).
-* Add connection pool for PostgreSQL table/database engine and dictionary source. Should fix [#21444](https://github.com/ClickHouse/ClickHouse/issues/21444). [#21839](https://github.com/ClickHouse/ClickHouse/pull/21839) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * DiskS3 (experimental feature under development). Fixed bug with the impossibility to move directory if the destination is not empty and cache disk is used. [#21837](https://github.com/ClickHouse/ClickHouse/pull/21837) ([Pavel Kovalenko](https://github.com/Jokser)).
 * Better formatting for `Array` and `Map` data types in Web UI. [#21798](https://github.com/ClickHouse/ClickHouse/pull/21798) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Support non-default table schema for postgres storage/table-function. Closes [#21701](https://github.com/ClickHouse/ClickHouse/issues/21701). [#21711](https://github.com/ClickHouse/ClickHouse/pull/21711) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* Support replicas priority for postgres dictionary source. [#21710](https://github.com/ClickHouse/ClickHouse/pull/21710) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Update clusters only if their configurations were updated. [#21685](https://github.com/ClickHouse/ClickHouse/pull/21685) ([Kruglov Pavel](https://github.com/Avogar)).
 * Propagate query and session settings for distributed DDL queries. Set `distributed_ddl_entry_format_version` to 2 to enable this. Added `distributed_ddl_output_mode` setting. Supported modes: `none`, `throw` (default), `null_status_on_timeout` and `never_throw`. Miscellaneous fixes and improvements for `Replicated` database engine. [#21535](https://github.com/ClickHouse/ClickHouse/pull/21535) ([tavplubix](https://github.com/tavplubix)).
 * If `PODArray` was instantiated with element size that is neither a fraction or a multiple of 16, buffer overflow was possible. No bugs in current releases exist. [#21533](https://github.com/ClickHouse/ClickHouse/pull/21533) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Add `last_error_time`/`last_error_message`/`last_error_stacktrace`/`remote` columns for `system.errors`. [#21529](https://github.com/ClickHouse/ClickHouse/pull/21529) ([Azat Khuzhin](https://github.com/azat)).
 * Add aliases `simpleJSONExtract/simpleJSONHas` to `visitParam/visitParamExtract{UInt, Int, Bool, Float, Raw, String}`. Fixes #21383. [#21519](https://github.com/ClickHouse/ClickHouse/pull/21519) ([fastio](https://github.com/fastio)).
 * Add setting `optimize_skip_unused_shards_limit` to limit the number of sharding key values for `optimize_skip_unused_shards`. [#21512](https://github.com/ClickHouse/ClickHouse/pull/21512) ([Azat Khuzhin](https://github.com/azat)).
-* `Age` and `Precision` in graphite rollup configs should increase from retention to retention. Now it's checked and the wrong config raises an exception. [#21496](https://github.com/ClickHouse/ClickHouse/pull/21496) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Improve `clickhouse-format` to not throw exception when there are extra spaces or comment after the last query, and throw exception early with readable message when format `ASTInsertQuery` with data . [#21311](https://github.com/ClickHouse/ClickHouse/pull/21311) ([flynn](https://github.com/ucasFL)).
 * Improve support of integer keys in data type `Map`. [#21157](https://github.com/ClickHouse/ClickHouse/pull/21157) ([Anton Popov](https://github.com/CurtizJ)).
-* MaterializeMySQL: Attempt to reconnect to MySQL if the connection is lost. [#20961](https://github.com/ClickHouse/ClickHouse/pull/20961) ([Håvard Kvålen](https://github.com/havardk)).
+* MaterializeMySQL: attempt to reconnect to MySQL if the connection is lost. [#20961](https://github.com/ClickHouse/ClickHouse/pull/20961) ([Håvard Kvålen](https://github.com/havardk)).
 * Support more cases to rewrite `CROSS JOIN` to `INNER JOIN`. [#20392](https://github.com/ClickHouse/ClickHouse/pull/20392) ([Vladimir](https://github.com/vdimir)).
 * Do not create empty parts on INSERT when `optimize_on_insert` setting enabled. Fixes [#20304](https://github.com/ClickHouse/ClickHouse/issues/20304). [#20387](https://github.com/ClickHouse/ClickHouse/pull/20387) ([Kruglov Pavel](https://github.com/Avogar)).
 * `MaterializeMySQL`: add minmax skipping index for `_version` column. [#20382](https://github.com/ClickHouse/ClickHouse/pull/20382) ([Stig Bakken](https://github.com/stigsb)).
-* Improve performance of aggregation in order of sorting key (with enabled setting `optimize_aggregation_in_order`). [#19401](https://github.com/ClickHouse/ClickHouse/pull/19401) ([Anton Popov](https://github.com/CurtizJ)).
-* Introduce a new merge tree setting `min_bytes_to_rebalance_partition_over_jbod` which allows assigning new parts to different disks of a JBOD volume in a balanced way. [#16481](https://github.com/ClickHouse/ClickHouse/pull/16481) ([Amos Bird](https://github.com/amosbird)).
+* Add option `--backslash` for `clickhouse-format`, which can add a backslash at the end of each line of the formatted query. [#21494](https://github.com/ClickHouse/ClickHouse/pull/21494) ([flynn](https://github.com/ucasFL)).
+* Now clickhouse will not throw `LOGICAL_ERROR` exception when we try to mutate the already covered part. Fixes [#22013](https://github.com/ClickHouse/ClickHouse/issues/22013). [#22291](https://github.com/ClickHouse/ClickHouse/pull/22291) ([alesapin](https://github.com/alesapin)).
 
 #### Bug Fix
 
-* Remove socket from epoll before cancelling packet receiver in `HedgedConnections` to prevent possible race. I hope it fixes [#22161](https://github.com/ClickHouse/ClickHouse/issues/22161). [#22443](https://github.com/ClickHouse/ClickHouse/pull/22443) ([Kruglov Pavel](https://github.com/Avogar)).
+* Remove socket from epoll before cancelling packet receiver in `HedgedConnections` to prevent possible race. Fixes [#22161](https://github.com/ClickHouse/ClickHouse/issues/22161). [#22443](https://github.com/ClickHouse/ClickHouse/pull/22443) ([Kruglov Pavel](https://github.com/Avogar)).
 * Add (missing) memory accounting in parallel parsing routines. In previous versions OOM was possible when the resultset contains very large blocks of data. This closes [#22008](https://github.com/ClickHouse/ClickHouse/issues/22008). [#22425](https://github.com/ClickHouse/ClickHouse/pull/22425) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Fixed bug in S3 zero-copy replication for hybrid storage. [#22378](https://github.com/ClickHouse/ClickHouse/pull/22378) ([ianton-ru](https://github.com/ianton-ru)).
-* Now clickhouse will not throw `LOGICAL_ERROR` exception when we try to mutate the already covered part. Fixes [#22013](https://github.com/ClickHouse/ClickHouse/issues/22013). [#22291](https://github.com/ClickHouse/ClickHouse/pull/22291) ([alesapin](https://github.com/alesapin)).
 * Fix exception which may happen when `SELECT` has constant `WHERE` condition and source table has columns which names are digits. [#22270](https://github.com/ClickHouse/ClickHouse/pull/22270) ([LiuNeng](https://github.com/liuneng1994)).
 * Fix query cancellation with `use_hedged_requests=0` and `async_socket_for_remote=1`. [#22183](https://github.com/ClickHouse/ClickHouse/pull/22183) ([Azat Khuzhin](https://github.com/azat)).
 * Fix uncaught exception in `InterserverIOHTTPHandler`. [#22146](https://github.com/ClickHouse/ClickHouse/pull/22146) ([Azat Khuzhin](https://github.com/azat)).
@@ -99,68 +99,55 @@
 * Fix error `Invalid number of rows in Chunk` in `JOIN` with `TOTALS` and `arrayJoin`. Closes [#19303](https://github.com/ClickHouse/ClickHouse/issues/19303). [#22129](https://github.com/ClickHouse/ClickHouse/pull/22129) ([Vladimir](https://github.com/vdimir)).
 * Fix the background thread pool name which used to poll message from Kafka. The Kafka engine with the broken thread pool will not consume the message from message queue. [#22122](https://github.com/ClickHouse/ClickHouse/pull/22122) ([fastio](https://github.com/fastio)).
 * Fix waiting for `OPTIMIZE` and `ALTER` queries for `ReplicatedMergeTree` table engines. Now the query will not hang when the table was detached or restarted. [#22118](https://github.com/ClickHouse/ClickHouse/pull/22118) ([alesapin](https://github.com/alesapin)).
-* Disable `async_socket_for_remote`/`use_hedged_requests` for buggy linux kernels. [#22109](https://github.com/ClickHouse/ClickHouse/pull/22109) ([Azat Khuzhin](https://github.com/azat)).
+* Disable `async_socket_for_remote`/`use_hedged_requests` for buggy Linux kernels. [#22109](https://github.com/ClickHouse/ClickHouse/pull/22109) ([Azat Khuzhin](https://github.com/azat)).
 * Docker entrypoint: avoid chown of `.` in case when `LOG_PATH` is empty. Closes [#22100](https://github.com/ClickHouse/ClickHouse/issues/22100). [#22102](https://github.com/ClickHouse/ClickHouse/pull/22102) ([filimonov](https://github.com/filimonov)).
 * The function `decrypt` was lacking a check for the minimal size of data encrypted in `AEAD` mode. This closes [#21897](https://github.com/ClickHouse/ClickHouse/issues/21897). [#22064](https://github.com/ClickHouse/ClickHouse/pull/22064) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * In rare case, merge for `CollapsingMergeTree` may create granule with `index_granularity + 1` rows. Because of this, internal check, added in [#18928](https://github.com/ClickHouse/ClickHouse/issues/18928) (affects 21.2 and 21.3), may fail with error `Incomplete granules are not allowed while blocks are granules size`. This error did not allow parts to merge. [#21976](https://github.com/ClickHouse/ClickHouse/pull/21976) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Reverted [#15454](https://github.com/ClickHouse/ClickHouse/issues/15454) that may cause significant increase in memory usage while loading external dictionaries of hashed type. This closes [#21935](https://github.com/ClickHouse/ClickHouse/issues/21935). [#21948](https://github.com/ClickHouse/ClickHouse/pull/21948) ([Maksim Kita](https://github.com/kitaisreal)).
 * Prevent hedged connections overlaps (`Unknown packet 9 from server` error). [#21941](https://github.com/ClickHouse/ClickHouse/pull/21941) ([Azat Khuzhin](https://github.com/azat)).
-* Fix reading the HTTP POST request with "multipart/form-data" content type. [#21936](https://github.com/ClickHouse/ClickHouse/pull/21936) ([Ivan](https://github.com/abyss7)).
+* Fix reading the HTTP POST request with "multipart/form-data" content type in some cases. [#21936](https://github.com/ClickHouse/ClickHouse/pull/21936) ([Ivan](https://github.com/abyss7)).
 * Fix wrong `ORDER BY` results when a query contains window functions, and optimization for reading in primary key order is applied. Fixes [#21828](https://github.com/ClickHouse/ClickHouse/issues/21828). [#21915](https://github.com/ClickHouse/ClickHouse/pull/21915) ([Alexander Kuzmenkov](https://github.com/akuzm)).
 * Fix deadlock in first catboost model execution. Closes [#13832](https://github.com/ClickHouse/ClickHouse/issues/13832). [#21844](https://github.com/ClickHouse/ClickHouse/pull/21844) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fix incorrect query result (and possible crash) which could happen when `WHERE` or `HAVING` condition is pushed before `GROUP BY`. Fixes [#21773](https://github.com/ClickHouse/ClickHouse/issues/21773). [#21841](https://github.com/ClickHouse/ClickHouse/pull/21841) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Better error handling and logging in `WriteBufferFromS3`. [#21836](https://github.com/ClickHouse/ClickHouse/pull/21836) ([Pavel Kovalenko](https://github.com/Jokser)).
 * Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. This is a follow-up fix of [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) . Can only reproduced in production env. [#21818](https://github.com/ClickHouse/ClickHouse/pull/21818) ([Amos Bird](https://github.com/amosbird)).
 * Fix scalar subquery index analysis. This fixes [#21717](https://github.com/ClickHouse/ClickHouse/issues/21717) , which was introduced in [#18896](https://github.com/ClickHouse/ClickHouse/pull/18896). [#21766](https://github.com/ClickHouse/ClickHouse/pull/21766) ([Amos Bird](https://github.com/amosbird)).
-* Fix bug for `ReplicatedMerge` table engines when `ALTER MODIFY COLUMN` query doesn't change the type of decimal column if its size (32 bit or 64 bit) doesn't change. [#21728](https://github.com/ClickHouse/ClickHouse/pull/21728) ([alesapin](https://github.com/alesapin)).
-* Fix concurrent `OPTIMIZE` and `DROP` for `ReplicatedMergeTree`. [#21716](https://github.com/ClickHouse/ClickHouse/pull/21716) ([Azat Khuzhin](https://github.com/azat)).
+* Fix bug for `ReplicatedMerge` table engines when `ALTER MODIFY COLUMN` query doesn't change the type of `Decimal` column if its size (32 bit or 64 bit) doesn't change. [#21728](https://github.com/ClickHouse/ClickHouse/pull/21728) ([alesapin](https://github.com/alesapin)).
+* Fix possible infinite waiting when concurrent `OPTIMIZE` and `DROP` are run for `ReplicatedMergeTree`. [#21716](https://github.com/ClickHouse/ClickHouse/pull/21716) ([Azat Khuzhin](https://github.com/azat)).
 * Fix function `arrayElement` with type `Map` for constant integer arguments. [#21699](https://github.com/ClickHouse/ClickHouse/pull/21699) ([Anton Popov](https://github.com/CurtizJ)).
 * Fix SIGSEGV on not existing attributes from `ip_trie` with `access_to_key_from_attributes`. [#21692](https://github.com/ClickHouse/ClickHouse/pull/21692) ([Azat Khuzhin](https://github.com/azat)).
 * Server now start accepting connections only after `DDLWorker` and dictionaries initialization. [#21676](https://github.com/ClickHouse/ClickHouse/pull/21676) ([Azat Khuzhin](https://github.com/azat)).
-* Add type conversion for `StorageJoin` keys (previously led to SIGSEGV). [#21646](https://github.com/ClickHouse/ClickHouse/pull/21646) ([Azat Khuzhin](https://github.com/azat)).
+* Add type conversion for keys of tables of type `Join` (previously led to SIGSEGV). [#21646](https://github.com/ClickHouse/ClickHouse/pull/21646) ([Azat Khuzhin](https://github.com/azat)).
 * Fix distributed requests cancellation (for example simple select from multiple shards with limit, i.e. `select * from remote('127.{2,3}', system.numbers) limit 100`) with `async_socket_for_remote=1`. [#21643](https://github.com/ClickHouse/ClickHouse/pull/21643) ([Azat Khuzhin](https://github.com/azat)).
 * Fix `fsync_part_directory` for horizontal merge. [#21642](https://github.com/ClickHouse/ClickHouse/pull/21642) ([Azat Khuzhin](https://github.com/azat)).
 * Remove unknown columns from joined table in `WHERE` for queries to external database engines (MySQL, PostgreSQL). close [#14614](https://github.com/ClickHouse/ClickHouse/issues/14614), close [#19288](https://github.com/ClickHouse/ClickHouse/issues/19288) (dup), close [#19645](https://github.com/ClickHouse/ClickHouse/issues/19645) (dup). [#21640](https://github.com/ClickHouse/ClickHouse/pull/21640) ([Vladimir](https://github.com/vdimir)).
 * `std::terminate` was called if there is an error writing data into s3. [#21624](https://github.com/ClickHouse/ClickHouse/pull/21624) ([Vladimir](https://github.com/vdimir)).
-* Fix possible error ` Cannot find column` when `optimize_skip_unused_shards` is enabled and zero shards are used. [#21579](https://github.com/ClickHouse/ClickHouse/pull/21579) ([Azat Khuzhin](https://github.com/azat)).
+* Fix possible error `Cannot find column` when `optimize_skip_unused_shards` is enabled and zero shards are used. [#21579](https://github.com/ClickHouse/ClickHouse/pull/21579) ([Azat Khuzhin](https://github.com/azat)).
 * In case if query has constant `WHERE` condition, and setting `optimize_skip_unused_shards` enabled, all shards may be skipped and query could return incorrect empty result. [#21550](https://github.com/ClickHouse/ClickHouse/pull/21550) ([Amos Bird](https://github.com/amosbird)).
-* Fix incorrect `fd_ready` assignment in NuKeeperTCPHandler. [#21544](https://github.com/ClickHouse/ClickHouse/pull/21544) ([小路](https://github.com/nicelulu)).
 * Fix table function `clusterAllReplicas` returns wrong `_shard_num`. close [#21481](https://github.com/ClickHouse/ClickHouse/issues/21481). [#21498](https://github.com/ClickHouse/ClickHouse/pull/21498) ([flynn](https://github.com/ucasFL)).
 * Fix that S3 table holds old credentials after config update. [#21457](https://github.com/ClickHouse/ClickHouse/pull/21457) ([Grigory Pervakov](https://github.com/GrigoryPervakov)).
 * Fixed race on SSL object inside `SecureSocket` in Poco. [#21456](https://github.com/ClickHouse/ClickHouse/pull/21456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Fix `Avro` format parsing for `Kafka`. Fixes [#21437](https://github.com/ClickHouse/ClickHouse/issues/21437). [#21438](https://github.com/ClickHouse/ClickHouse/pull/21438) ([Ilya Golshtein](https://github.com/ilejn)).
 * Fix receive and send timeouts and non-blocking read in secure socket. [#21429](https://github.com/ClickHouse/ClickHouse/pull/21429) ([Kruglov Pavel](https://github.com/Avogar)).
-* Fix official website documents which introduced cluster secret feature. [#21331](https://github.com/ClickHouse/ClickHouse/pull/21331) ([Chao Ma](https://github.com/godliness)).
 * `force_drop_table` flag didn't work for `MATERIALIZED VIEW`, it's fixed. Fixes [#18943](https://github.com/ClickHouse/ClickHouse/issues/18943). [#20626](https://github.com/ClickHouse/ClickHouse/pull/20626) ([tavplubix](https://github.com/tavplubix)).
 * Fix name clashes in `PredicateRewriteVisitor`. It caused incorrect `WHERE` filtration after full join. Close [#20497](https://github.com/ClickHouse/ClickHouse/issues/20497). [#20622](https://github.com/ClickHouse/ClickHouse/pull/20622) ([Vladimir](https://github.com/vdimir)).
 * Fixed open behavior of remote host filter in case when there is `remote_url_allow_hosts` section in configuration but no entries there. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)).
 
 #### Build/Testing/Packaging Improvement
 
-* Enable the bundled openldap on `ppc64le`. [#22487](https://github.com/ClickHouse/ClickHouse/pull/22487) ([Kfir Itzhak](https://github.com/mastertheknife)).
-* Re-enable the S3 (AWS) library on `aarch64`. [#22484](https://github.com/ClickHouse/ClickHouse/pull/22484) ([Kfir Itzhak](https://github.com/mastertheknife)).
-* Enable compiling on `ppc64le` with Clang. [#22476](https://github.com/ClickHouse/ClickHouse/pull/22476) ([Kfir Itzhak](https://github.com/mastertheknife)).
-* Fix compiling boost on `ppc64le`. [#22474](https://github.com/ClickHouse/ClickHouse/pull/22474) ([Kfir Itzhak](https://github.com/mastertheknife)).
-* Fix CMake error about internal CMake variable `CMAKE_ASM_COMPILE_OBJECT` not set on `ppc64le`. [#22469](https://github.com/ClickHouse/ClickHouse/pull/22469) ([Kfir Itzhak](https://github.com/mastertheknife)).
-* Fix Fedora\RHEL\CentOS not finding `libclang_rt.builtins` on `ppc64le`. [#22458](https://github.com/ClickHouse/ClickHouse/pull/22458) ([Kfir Itzhak](https://github.com/mastertheknife)).
-* Enable building with `jemalloc` on `ppc64le`. [#22447](https://github.com/ClickHouse/ClickHouse/pull/22447) ([Kfir Itzhak](https://github.com/mastertheknife)).
-* Fix ClickHouse's config embedding and cctz's timezone embedding on `ppc64le`. [#22445](https://github.com/ClickHouse/ClickHouse/pull/22445) ([Kfir Itzhak](https://github.com/mastertheknife)).
-* Fixed compiling on `ppc64le` and use the correct instruction pointer register on `ppc64le`. [#22430](https://github.com/ClickHouse/ClickHouse/pull/22430) ([Kfir Itzhak](https://github.com/mastertheknife)).
-* Added a way to check memory info for the RBAC testflows tests. [#22403](https://github.com/ClickHouse/ClickHouse/pull/22403) ([MyroTk](https://github.com/MyroTk)).
-* Fix test with MaterializeMySQL. MySQL is started only once with MaterializeMySQL integration test. Fixes [#22289](https://github.com/ClickHouse/ClickHouse/issues/22289). [#22341](https://github.com/ClickHouse/ClickHouse/pull/22341) ([Winter Zhang](https://github.com/zhang2014)).
+* Add [Jepsen](https://github.com/jepsen-io/jepsen) tests for ClickHouse Keeper. [#21677](https://github.com/ClickHouse/ClickHouse/pull/21677) ([alesapin](https://github.com/alesapin)).
 * Run stateless tests in parallel in CI. Depends on [#22181](https://github.com/ClickHouse/ClickHouse/issues/22181). [#22300](https://github.com/ClickHouse/ClickHouse/pull/22300) ([alesapin](https://github.com/alesapin)).
 * Enable status check for SQLancer CI run. [#22015](https://github.com/ClickHouse/ClickHouse/pull/22015) ([Ilya Yatsishin](https://github.com/qoega)).
+* Multiple preparations for PowerPC builds: Enable the bundled openldap on `ppc64le`. [#22487](https://github.com/ClickHouse/ClickHouse/pull/22487) ([Kfir Itzhak](https://github.com/mastertheknife)).  Enable compiling on `ppc64le` with Clang. [#22476](https://github.com/ClickHouse/ClickHouse/pull/22476) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix compiling boost on `ppc64le`. [#22474](https://github.com/ClickHouse/ClickHouse/pull/22474) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix CMake error about internal CMake variable `CMAKE_ASM_COMPILE_OBJECT` not set on `ppc64le`. [#22469](https://github.com/ClickHouse/ClickHouse/pull/22469) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix Fedora/RHEL/CentOS not finding `libclang_rt.builtins` on `ppc64le`. [#22458](https://github.com/ClickHouse/ClickHouse/pull/22458) ([Kfir Itzhak](https://github.com/mastertheknife)).  Enable building with `jemalloc` on `ppc64le`. [#22447](https://github.com/ClickHouse/ClickHouse/pull/22447) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix ClickHouse's config embedding and cctz's timezone embedding on `ppc64le`. [#22445](https://github.com/ClickHouse/ClickHouse/pull/22445) ([Kfir Itzhak](https://github.com/mastertheknife)). Fixed compiling on `ppc64le` and use the correct instruction pointer register on `ppc64le`. [#22430](https://github.com/ClickHouse/ClickHouse/pull/22430) ([Kfir Itzhak](https://github.com/mastertheknife)).
+* Re-enable the S3 (AWS) library on `aarch64`. [#22484](https://github.com/ClickHouse/ClickHouse/pull/22484) ([Kfir Itzhak](https://github.com/mastertheknife)).
 * Add `tzdata` to Docker containers because reading `ORC` formats requires it. This closes [#14156](https://github.com/ClickHouse/ClickHouse/issues/14156). [#22000](https://github.com/ClickHouse/ClickHouse/pull/22000) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Introduce 2 arguments for `clickhouse-server` image Dockerfile: `deb_location` & `single_binary_location`. [#21977](https://github.com/ClickHouse/ClickHouse/pull/21977) ([filimonov](https://github.com/filimonov)).
 * Allow to use clang-tidy with release builds by enabling assertions if it is used. [#21914](https://github.com/ClickHouse/ClickHouse/pull/21914) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Remove decode method with python3. [#21832](https://github.com/ClickHouse/ClickHouse/pull/21832) ([kevin wan](https://github.com/MaxWk)).
-* Add [jepsen](https://github.com/jepsen-io/jepsen) tests for NuKeeper. [#21677](https://github.com/ClickHouse/ClickHouse/pull/21677) ([alesapin](https://github.com/alesapin)).
-* Updating TestFlows to 1.6.74. [#21673](https://github.com/ClickHouse/ClickHouse/pull/21673) ([vzakaznikov](https://github.com/vzakaznikov)).
 * Add llvm-12 binaries name to search in cmake scripts. Implicit constants conversions to mute clang warnings. Updated submodules to build with CMake 3.19. Mute recursion in macro expansion in `readpassphrase` library. Deprecated `-fuse-ld` changed to `--ld-path` for clang. [#21597](https://github.com/ClickHouse/ClickHouse/pull/21597) ([Ilya Yatsishin](https://github.com/qoega)).
-* Updating `docker/test/testflows/runner/dockerd-entrypoint.sh` to use Yandex dockerhub-proxy. [#21551](https://github.com/ClickHouse/ClickHouse/pull/21551) ([vzakaznikov](https://github.com/vzakaznikov)).
-* Fixing LDAP authentication performance test by removing assertion. [#21507](https://github.com/ClickHouse/ClickHouse/pull/21507) ([vzakaznikov](https://github.com/vzakaznikov)).
-* Added `ALL` and `NONE` privilege tests. [#21354](https://github.com/ClickHouse/ClickHouse/pull/21354) ([MyroTk](https://github.com/MyroTk)).
+* Updating `docker/test/testflows/runner/dockerd-entrypoint.sh` to use Yandex dockerhub-proxy, because Docker Hub has enabled very restrictive rate limits [#21551](https://github.com/ClickHouse/ClickHouse/pull/21551) ([vzakaznikov](https://github.com/vzakaznikov)).
 * Fix macOS shared lib build. [#20184](https://github.com/ClickHouse/ClickHouse/pull/20184) ([nvartolomei](https://github.com/nvartolomei)).
+* Add `ctime` option to `zookeeper-dump-tree`. It allows to dump node creation time. [#21842](https://github.com/ClickHouse/ClickHouse/pull/21842) ([Ilya](https://github.com/HumanUser)).
+
 
 ## ClickHouse release 21.3 (LTS)
 

From 48e688737a97d9f1e296cc0d256d2ffdc0833e38 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 8 Apr 2021 12:23:08 +0300
Subject: [PATCH 150/152] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a8564e3acfc..26f6af86f55 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -137,7 +137,7 @@
 
 * Add [Jepsen](https://github.com/jepsen-io/jepsen) tests for ClickHouse Keeper. [#21677](https://github.com/ClickHouse/ClickHouse/pull/21677) ([alesapin](https://github.com/alesapin)).
 * Run stateless tests in parallel in CI. Depends on [#22181](https://github.com/ClickHouse/ClickHouse/issues/22181). [#22300](https://github.com/ClickHouse/ClickHouse/pull/22300) ([alesapin](https://github.com/alesapin)).
-* Enable status check for SQLancer CI run. [#22015](https://github.com/ClickHouse/ClickHouse/pull/22015) ([Ilya Yatsishin](https://github.com/qoega)).
+* Enable status check for [SQLancer](https://github.com/sqlancer/sqlancer) CI run. [#22015](https://github.com/ClickHouse/ClickHouse/pull/22015) ([Ilya Yatsishin](https://github.com/qoega)).
 * Multiple preparations for PowerPC builds: Enable the bundled openldap on `ppc64le`. [#22487](https://github.com/ClickHouse/ClickHouse/pull/22487) ([Kfir Itzhak](https://github.com/mastertheknife)).  Enable compiling on `ppc64le` with Clang. [#22476](https://github.com/ClickHouse/ClickHouse/pull/22476) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix compiling boost on `ppc64le`. [#22474](https://github.com/ClickHouse/ClickHouse/pull/22474) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix CMake error about internal CMake variable `CMAKE_ASM_COMPILE_OBJECT` not set on `ppc64le`. [#22469](https://github.com/ClickHouse/ClickHouse/pull/22469) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix Fedora/RHEL/CentOS not finding `libclang_rt.builtins` on `ppc64le`. [#22458](https://github.com/ClickHouse/ClickHouse/pull/22458) ([Kfir Itzhak](https://github.com/mastertheknife)).  Enable building with `jemalloc` on `ppc64le`. [#22447](https://github.com/ClickHouse/ClickHouse/pull/22447) ([Kfir Itzhak](https://github.com/mastertheknife)). Fix ClickHouse's config embedding and cctz's timezone embedding on `ppc64le`. [#22445](https://github.com/ClickHouse/ClickHouse/pull/22445) ([Kfir Itzhak](https://github.com/mastertheknife)). Fixed compiling on `ppc64le` and use the correct instruction pointer register on `ppc64le`. [#22430](https://github.com/ClickHouse/ClickHouse/pull/22430) ([Kfir Itzhak](https://github.com/mastertheknife)).
 * Re-enable the S3 (AWS) library on `aarch64`. [#22484](https://github.com/ClickHouse/ClickHouse/pull/22484) ([Kfir Itzhak](https://github.com/mastertheknife)).
 * Add `tzdata` to Docker containers because reading `ORC` formats requires it. This closes [#14156](https://github.com/ClickHouse/ClickHouse/issues/14156). [#22000](https://github.com/ClickHouse/ClickHouse/pull/22000) ([alexey-milovidov](https://github.com/alexey-milovidov)).

From 298cd01d90a04a4e3526eeabf3e6b7e4e4d85997 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Thu, 8 Apr 2021 14:53:52 +0300
Subject: [PATCH 151/152] Update run-fuzzer.sh

---
 docker/test/fuzzer/run-fuzzer.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index 4bd3fa717a2..626bedb453c 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -198,7 +198,7 @@ case "$stage" in
         # Lost connection to the server. This probably means that the server died
         # with abort.
         echo "failure" > status.txt
-        if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
+        if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
         then
             echo "Lost connection to server. See the logs." > description.txt
         fi

From 480553b4a983c86bf1cadac6604e7bc9c37e20d7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 8 Apr 2021 16:27:49 +0300
Subject: [PATCH 152/152] Add adopters

---
 docs/en/introduction/adopters.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index 5c526ac7260..3b3c1203b50 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -48,7 +48,8 @@ toc_title: Adopters
 | <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
 | <a href="https://www.ecwid.com/" class="favicon">Ecwid</a> | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) |
 | <a href="https://www.ebay.com/" class="favicon">eBay</a> | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) |
-| <a href="https://www.exness.com" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
+| <a href="https://www.exness.com/" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
+| <a href="https://www.eventbunker.io/" class="favicon">EventBunker.io</a> | Serverless Data Processing | — | — | — | [Tweet, April 2021](https://twitter.com/Halil_D_/status/1379839133472985091) |
 | <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
 | <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |
 | <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |